In [1]:
# ==============================================================================
# NOTEBOOK DE TESTES METAMÓRFICOS - NÍVEL 2: ANÁLISE DE COERÊNCIA CAUSAL
# ==============================================================================
# Objetivo: Avaliar se o sistema produz respostas logicamente consistentes a
# transformações de entrada que possuem um significado físico ou de negócio,
# aproveitando o conhecimento conceitual sobre as variáveis.
# ------------------------------------------------------------------------------

# 📦 Imports
import pandas as pd
import numpy as np
import joblib
import plotly.graph_objects as go
from sklearn.metrics import mean_squared_error
from pathlib import Path
from IPython.display import display

# ------------------------------------------------------------------------------
# 📁 ETAPA 1: SETUP DO AMBIENTE DE TESTE
# ------------------------------------------------------------------------------

# Carregar dados de referência
try:
    ethanol_path = Path(r"C:/Users/Paulo Eduardo/Documents/Dissertação/ModelosML/Conect2ai/MDPI2023-pollution/data/[Etanol] Trajeto Casa-Escola-UFRN/trackLog-2023-Feb-13_06-38-49_seg.csv")
    gasoline_path = Path(r"C:/Users/Paulo Eduardo/Documents/Dissertação/ModelosML/Conect2ai/MDPI2023-pollution/data/[Gasolina] Trajeto Casa-Escola-UFRN/trackLog-2022-Dec-01_06-43-57_qui.csv")
    df_ethanol = pd.read_csv(ethanol_path)
    df_gasoline = pd.read_csv(gasoline_path)
    print("Arquivos de dados carregados com sucesso.")
except FileNotFoundError as e:
    print(f"ERRO: Arquivos de dados não encontrados. {e}")
    raise

# Função para preparar os dados
def preparar_dados(df):
    df_copy = df.copy()
    df_copy.columns = df_copy.columns.str.strip()
    col_map = {'Speed (OBD)(km/h)': 'Speed(OBD)(km/h)', 'Latitude ': 'Latitude', 'Longitude ': 'Longitude'}
    df_copy.rename(columns={k: v for k, v in col_map.items() if k in df_copy.columns}, inplace=True)
    if "Speed(OBD)(km/h)" in df_copy.columns:
        speeds = df_copy["Speed(OBD)(km/h)"].fillna(0).values * 1000 / 3600
        df_copy["Acceleration"] = np.diff(speeds, prepend=speeds[0])
    return df_copy

df_ethanol = preparar_dados(df_ethanol)
df_gasoline = preparar_dados(df_gasoline)

# Carregar os modelos
try:
    modelos = {
        "ethanol": {
            "afr": joblib.load(Path(r"C:/Users/Paulo Eduardo/Documents/Dissertação/ModelosML/Conect2ai/MDPI2023-pollution/models/LGBMRegressor_ethanol_afr.pkl")),
            "maf": joblib.load(Path(r"C:/Users/Paulo Eduardo/Documents/Dissertação/ModelosML/Conect2ai/MDPI2023-pollution/models/XGBRegressor_ethanol_maf.pkl"))
        },
        "gasoline": {
            "afr": joblib.load(Path(r"C:/Users/Paulo Eduardo/Documents/Dissertação/ModelosML/Conect2ai/MDPI2023-pollution/models/LGBMRegressor_gasoline_afr.pkl")),
            "maf": joblib.load(Path(r"C:/Users/Paulo Eduardo/Documents/Dissertação/ModelosML/Conect2ai/MDPI2023-pollution/models/XGBRegressor_gasoline_maf.pkl"))
        }
    }
    print("Modelos de IA carregados com sucesso.")
except FileNotFoundError as e:
    print(f"ERRO: Arquivos de modelo não encontrados. {e}")
    raise

# ------------------------------------------------------------------------------
# 🔧 ETAPA 2: DEFINIÇÃO DAS FUNÇÕES DE SUPORTE
# ------------------------------------------------------------------------------

def compute_co2(afr, maf, fuel_type):
    afr = np.array(afr)
    maf = np.array(maf)
    carbon_fraction = {"ethanol": 0.5217, "gasoline": 0.8571}
    c_fraction = carbon_fraction.get(fuel_type)
    fuel_rate = np.divide(maf, afr, out=np.zeros_like(maf, dtype=float), where=afr!=0)
    return fuel_rate * c_fraction * (44.0 / 12.0)

def encontrar_coluna(df, alternativas):
    df.columns = df.columns.str.strip()
    for alt in alternativas:
        if alt in df.columns:
            return df[alt]
    return None

def executar_teste_plotly(df_original, df_modificado, modelo_afr, modelo_maf, fuel, nome_mr, co2_real=None):
    features = ["Latitude", "Longitude", "Speed(OBD)(km/h)", "Acceleration"]
    
    for feature in features:
        if feature not in df_original.columns: df_original[feature] = 0
        if feature not in df_modificado.columns: df_modificado[feature] = 0

    afr_orig = modelo_afr.predict(df_original[features])
    maf_orig = modelo_maf.predict(df_original[features])
    co2_orig = compute_co2(afr_orig, maf_orig, fuel)

    afr_mod = modelo_afr.predict(df_modificado[features])
    maf_mod = modelo_maf.predict(df_modificado[features])
    co2_mod = compute_co2(afr_mod, maf_mod, fuel)
    
    diff = np.abs(co2_orig - co2_mod)
    media, maxima, rmse, std = diff.mean(), diff.max(), mean_squared_error(co2_orig, co2_mod, squared=False), diff.std()

    print(f"\n{'='*80}\n🔁 {nome_mr} ({fuel.upper()})\n{'-'*80}")
    print(f"Diferença Média: {media:.4f} | Diferença Máxima: {maxima:.4f} | RMSE: {rmse:.4f} | Desvio Padrão (Std): {std:.4f}")
    
    tabela = pd.concat([
        df_original[features].iloc[:10].add_suffix("_Orig"),
        pd.DataFrame({" ": ["→"] * 10}),
        df_modificado[features].iloc[:10].add_suffix("_Mod"),
        pd.DataFrame({" ": [" "] * 10}),
        pd.DataFrame({
            "CO₂_Real": co2_real[:10] if co2_real is not None else "N/A",
            "CO₂_Pred_Orig": co2_orig[:10],
            "CO₂_Pred_Mod": co2_mod[:10]
        })
    ], axis=1).fillna("N/A")
    display(tabela.style.set_caption(f"Comparação dos dados - {nome_mr} ({fuel.upper()})"))

    fig = go.Figure()
    if co2_real is not None:
        fig.add_trace(go.Scatter(y=co2_real[:200], mode='lines', name='Real', line=dict(dash='dot', color='gray')))
    fig.add_trace(go.Scatter(y=co2_orig[:200], mode='lines', name='Predição Original', line=dict(dash='dash', color='blue')))
    fig.add_trace(go.Scatter(y=co2_mod[:200], mode='lines', name='Predição com RM', line=dict(color='red')))
    fig.update_layout(
        title=dict(text=f"Comparação CO₂ (200 Amostras) - {nome_mr} - {fuel.upper()}", x=0.5),
        xaxis_title="Amostras", yaxis_title="CO₂ (g/s)", template="plotly_white"
    )
    fig.show()

# ------------------------------------------------------------------------------
# 🔁 ETAPA 3: DEFINIÇÃO DOS CASOS DE TESTE METAMÓRFICOS - NÍVEL 2
# ------------------------------------------------------------------------------

testes_metamorficos_n2 = {
    # Teste ID: CT_N2_006
    "CT_N2_006: [Multiplicativa] - Escalonar Velocidade (x20)": lambda df: df.copy().assign(
        **{'Speed(OBD)(km/h)': df['Speed(OBD)(km/h)'] * 20}
    ),
    
    # Teste ID: CT_N2_007
    "CT_N2_007: [Aditiva] - Deslocar Coordenadas (+1.0)": lambda df: df.copy().assign(
        Latitude = df['Latitude'] + 1.0,
        Longitude = df['Longitude'] + 1.0
    ),
    
    # Teste ID: CT_N2_008
    "CT_N2_008: [Inversiva] - Inverter Aceleração": lambda df: df.copy().assign(
        Acceleration = df['Acceleration'] * -1
    ),
}

# ------------------------------------------------------------------------------
# 🧲 ETAPA 4: EXECUÇÃO DOS TESTES DE NÍVEL 2
# ------------------------------------------------------------------------------

print("="*80)
print("INICIANDO EXECUÇÃO DOS TESTES DE NÍVEL 2")
print("="*80)

# Pré-cálculo dos valores reais para plotagem
afr_real_etanol = encontrar_coluna(df_ethanol, ["AirFuelRatio(Commanded)(:1)", "Air Fuel Ratio(Commanded)(:1)"])
maf_real_etanol = encontrar_coluna(df_ethanol, ["MassAirFlowRate(g/s)", "Mass Air Flow Rate(g/s)"])
co2_real_etanol = compute_co2(afr_real_etanol.values, maf_real_etanol.values, "ethanol") if afr_real_etanol is not None and maf_real_etanol is not None else None

afr_real_gasoline = encontrar_coluna(df_gasoline, ["AirFuelRatio(Commanded)(:1)", "Air Fuel Ratio(Commanded)(:1)"])
maf_real_gasoline = encontrar_coluna(df_gasoline, ["MassAirFlowRate(g/s)", "Mass Air Flow Rate(g/s)"])
co2_real_gasoline = compute_co2(afr_real_gasoline.values, maf_real_gasoline.values, "gasoline") if afr_real_gasoline is not None and maf_real_gasoline is not None else None

valores_reais = {"ethanol": co2_real_etanol, "gasoline": co2_real_gasoline}

for nome_mr, transformacao in testes_metamorficos_n2.items():
    for combustivel, df_original in [("ethanol", df_ethanol), ("gasoline", df_gasoline)]:
        df_modificado = transformacao(df_original.copy())
        executar_teste_plotly(
            df_original=df_original,
            df_modificado=df_modificado,
            modelo_afr=modelos[combustivel]["afr"],
            modelo_maf=modelos[combustivel]["maf"],
            fuel=combustivel,
            nome_mr=nome_mr,
            co2_real=valores_reais[combustivel]
        )

Arquivos de dados carregados com sucesso.
Modelos de IA carregados com sucesso.
INICIANDO EXECUÇÃO DOS TESTES DE NÍVEL 2

🔁 CT_N2_006: [Multiplicativa] - Escalonar Velocidade (x20) (ETHANOL)
--------------------------------------------------------------------------------
Diferença Média: 0.4809 | Diferença Máxima: 2.7199 | RMSE: 0.6804 | Desvio Padrão (Std): 0.4814


Unnamed: 0,Latitude_Orig,Longitude_Orig,Speed(OBD)(km/h)_Orig,Acceleration_Orig,Unnamed: 5,Latitude_Mod,Longitude_Mod,Speed(OBD)(km/h)_Mod,Acceleration_Mod,Unnamed: 10,CO₂_Real,CO₂_Pred_Orig,CO₂_Pred_Mod
0,-5.820105,-35.220818,3,0.0,→,-5.820105,-35.220818,60,0.0,,0.642839,0.742631,1.006023
1,-5.820103,-35.220828,5,0.555556,→,-5.820103,-35.220828,100,0.555556,,0.653249,0.771505,1.623527
2,-5.820102,-35.220838,5,0.0,→,-5.820102,-35.220838,100,0.0,,0.653249,0.771505,1.57949
3,-5.820096,-35.220843,3,-0.555556,→,-5.820096,-35.220843,60,-0.555556,,0.679275,0.659013,0.91403
4,-5.820091,-35.22085,0,-0.833333,→,-5.820091,-35.22085,0,-0.833333,,0.610306,0.667689,0.667689
5,-5.820091,-35.22085,0,0.0,→,-5.820091,-35.22085,0,0.0,,0.610306,0.706858,0.706858
6,-5.820091,-35.22085,0,0.0,→,-5.820091,-35.22085,0,0.0,,0.490587,0.706858,0.706858
7,-5.820085,-35.220856,3,0.833333,→,-5.820085,-35.220856,60,0.833333,,1.090483,1.137803,1.000498
8,-5.820074,-35.220859,3,0.0,→,-5.820074,-35.220859,60,0.0,,1.090483,1.137803,0.924211
9,-5.820064,-35.220858,3,0.0,→,-5.820064,-35.220858,60,0.0,,0.921315,1.137803,0.924211



🔁 CT_N2_006: [Multiplicativa] - Escalonar Velocidade (x20) (GASOLINE)
--------------------------------------------------------------------------------
Diferença Média: 0.4380 | Diferença Máxima: 2.3344 | RMSE: 0.6509 | Desvio Padrão (Std): 0.4815


Unnamed: 0,Latitude_Orig,Longitude_Orig,Speed(OBD)(km/h)_Orig,Acceleration_Orig,Unnamed: 5,Latitude_Mod,Longitude_Mod,Speed(OBD)(km/h)_Mod,Acceleration_Mod,Unnamed: 10,CO₂_Real,CO₂_Pred_Orig,CO₂_Pred_Mod
0,-5.82001,-35.220825,5,0.0,→,-5.82001,-35.220825,100,0.0,,1.002671,1.456781,3.189422
1,-5.820013,-35.220822,4,-0.277778,→,-5.820013,-35.220822,80,-0.277778,,1.01336,1.016708,2.0518
2,-5.820016,-35.220819,5,0.277778,→,-5.820016,-35.220819,100,0.277778,,2.08872,1.456781,3.550872
3,-5.820014,-35.220825,7,0.555556,→,-5.820014,-35.220825,140,0.555556,,2.261889,1.99638,3.550872
4,-5.819992,-35.220829,9,0.555556,→,-5.819992,-35.220829,180,0.555556,,2.499195,2.400486,3.046464
5,-5.819974,-35.220824,12,0.833333,→,-5.819974,-35.220824,240,0.833333,,2.606089,2.400486,3.046464
6,-5.819944,-35.220823,15,0.833333,→,-5.819944,-35.220823,300,0.833333,,3.070012,2.148721,2.948598
7,-5.819908,-35.22081,18,0.833333,→,-5.819908,-35.22081,360,0.833333,,2.445747,1.47094,2.285238
8,-5.819866,-35.220794,18,0.0,→,-5.819866,-35.220794,360,0.0,,1.073221,1.588029,2.052619
9,-5.819821,-35.220775,16,-0.555556,→,-5.819821,-35.220775,320,-0.555556,,0.562265,1.085921,1.591138



🔁 CT_N2_007: [Aditiva] - Deslocar Coordenadas (+1.0) (ETHANOL)
--------------------------------------------------------------------------------
Diferença Média: 0.3068 | Diferença Máxima: 2.2510 | RMSE: 0.4360 | Desvio Padrão (Std): 0.3098


Unnamed: 0,Latitude_Orig,Longitude_Orig,Speed(OBD)(km/h)_Orig,Acceleration_Orig,Unnamed: 5,Latitude_Mod,Longitude_Mod,Speed(OBD)(km/h)_Mod,Acceleration_Mod,Unnamed: 10,CO₂_Real,CO₂_Pred_Orig,CO₂_Pred_Mod
0,-5.820105,-35.220818,3,0.0,→,-4.820105,-34.220818,3,0.0,,0.642839,0.742631,0.778424
1,-5.820103,-35.220828,5,0.555556,→,-4.820103,-34.220828,5,0.555556,,0.653249,0.771505,0.671185
2,-5.820102,-35.220838,5,0.0,→,-4.820102,-34.220838,5,0.0,,0.653249,0.771505,0.671185
3,-5.820096,-35.220843,3,-0.555556,→,-4.820096,-34.220843,3,-0.555556,,0.679275,0.659013,0.674095
4,-5.820091,-35.22085,0,-0.833333,→,-4.820091,-34.22085,0,-0.833333,,0.610306,0.667689,0.588105
5,-5.820091,-35.22085,0,0.0,→,-4.820091,-34.22085,0,0.0,,0.610306,0.706858,0.606701
6,-5.820091,-35.22085,0,0.0,→,-4.820091,-34.22085,0,0.0,,0.490587,0.706858,0.606701
7,-5.820085,-35.220856,3,0.833333,→,-4.820085,-34.220856,3,0.833333,,1.090483,1.137803,0.778424
8,-5.820074,-35.220859,3,0.0,→,-4.820074,-34.220859,3,0.0,,1.090483,1.137803,0.778424
9,-5.820064,-35.220858,3,0.0,→,-4.820064,-34.220858,3,0.0,,0.921315,1.137803,0.778424



🔁 CT_N2_007: [Aditiva] - Deslocar Coordenadas (+1.0) (GASOLINE)
--------------------------------------------------------------------------------
Diferença Média: 0.6722 | Diferença Máxima: 2.9079 | RMSE: 0.7920 | Desvio Padrão (Std): 0.4188


Unnamed: 0,Latitude_Orig,Longitude_Orig,Speed(OBD)(km/h)_Orig,Acceleration_Orig,Unnamed: 5,Latitude_Mod,Longitude_Mod,Speed(OBD)(km/h)_Mod,Acceleration_Mod,Unnamed: 10,CO₂_Real,CO₂_Pred_Orig,CO₂_Pred_Mod
0,-5.82001,-35.220825,5,0.0,→,-4.82001,-34.220825,5,0.0,,1.002671,1.456781,1.420268
1,-5.820013,-35.220822,4,-0.277778,→,-4.820013,-34.220822,4,-0.277778,,1.01336,1.016708,1.5339
2,-5.820016,-35.220819,5,0.277778,→,-4.820016,-34.220819,5,0.277778,,2.08872,1.456781,1.420268
3,-5.820014,-35.220825,7,0.555556,→,-4.820014,-34.220825,7,0.555556,,2.261889,1.99638,1.657848
4,-5.819992,-35.220829,9,0.555556,→,-4.819992,-34.220829,9,0.555556,,2.499195,2.400486,1.793865
5,-5.819974,-35.220824,12,0.833333,→,-4.819974,-34.220824,12,0.833333,,2.606089,2.400486,1.628341
6,-5.819944,-35.220823,15,0.833333,→,-4.819944,-34.220823,15,0.833333,,3.070012,2.148721,1.560293
7,-5.819908,-35.22081,18,0.833333,→,-4.819908,-34.22081,18,0.833333,,2.445747,1.47094,1.663663
8,-5.819866,-35.220794,18,0.0,→,-4.819866,-34.220794,18,0.0,,1.073221,1.588029,1.654371
9,-5.819821,-35.220775,16,-0.555556,→,-4.819821,-34.220775,16,-0.555556,,0.562265,1.085921,2.01051



🔁 CT_N2_008: [Inversiva] - Inverter Aceleração (ETHANOL)
--------------------------------------------------------------------------------
Diferença Média: 0.1935 | Diferença Máxima: 1.8051 | RMSE: 0.3569 | Desvio Padrão (Std): 0.2999


Unnamed: 0,Latitude_Orig,Longitude_Orig,Speed(OBD)(km/h)_Orig,Acceleration_Orig,Unnamed: 5,Latitude_Mod,Longitude_Mod,Speed(OBD)(km/h)_Mod,Acceleration_Mod,Unnamed: 10,CO₂_Real,CO₂_Pred_Orig,CO₂_Pred_Mod
0,-5.820105,-35.220818,3,0.0,→,-5.820105,-35.220818,3,-0.0,,0.642839,0.742631,0.742631
1,-5.820103,-35.220828,5,0.555556,→,-5.820103,-35.220828,5,-0.555556,,0.653249,0.771505,0.830598
2,-5.820102,-35.220838,5,0.0,→,-5.820102,-35.220838,5,-0.0,,0.653249,0.771505,0.771505
3,-5.820096,-35.220843,3,-0.555556,→,-5.820096,-35.220843,3,0.555556,,0.679275,0.659013,0.742631
4,-5.820091,-35.22085,0,-0.833333,→,-5.820091,-35.22085,0,0.833333,,0.610306,0.667689,0.706858
5,-5.820091,-35.22085,0,0.0,→,-5.820091,-35.22085,0,-0.0,,0.610306,0.706858,0.706858
6,-5.820091,-35.22085,0,0.0,→,-5.820091,-35.22085,0,-0.0,,0.490587,0.706858,0.706858
7,-5.820085,-35.220856,3,0.833333,→,-5.820085,-35.220856,3,-0.833333,,1.090483,1.137803,0.724289
8,-5.820074,-35.220859,3,0.0,→,-5.820074,-35.220859,3,-0.0,,1.090483,1.137803,1.137803
9,-5.820064,-35.220858,3,0.0,→,-5.820064,-35.220858,3,-0.0,,0.921315,1.137803,1.137803



🔁 CT_N2_008: [Inversiva] - Inverter Aceleração (GASOLINE)
--------------------------------------------------------------------------------
Diferença Média: 0.3578 | Diferença Máxima: 3.2358 | RMSE: 0.5916 | Desvio Padrão (Std): 0.4711


Unnamed: 0,Latitude_Orig,Longitude_Orig,Speed(OBD)(km/h)_Orig,Acceleration_Orig,Unnamed: 5,Latitude_Mod,Longitude_Mod,Speed(OBD)(km/h)_Mod,Acceleration_Mod,Unnamed: 10,CO₂_Real,CO₂_Pred_Orig,CO₂_Pred_Mod
0,-5.82001,-35.220825,5,0.0,→,-5.82001,-35.220825,5,-0.0,,1.002671,1.456781,1.456781
1,-5.820013,-35.220822,4,-0.277778,→,-5.820013,-35.220822,4,0.277778,,1.01336,1.016708,1.456781
2,-5.820016,-35.220819,5,0.277778,→,-5.820016,-35.220819,5,-0.277778,,2.08872,1.456781,1.016708
3,-5.820014,-35.220825,7,0.555556,→,-5.820014,-35.220825,7,-0.555556,,2.261889,1.99638,0.972466
4,-5.819992,-35.220829,9,0.555556,→,-5.819992,-35.220829,9,-0.555556,,2.499195,2.400486,1.621746
5,-5.819974,-35.220824,12,0.833333,→,-5.819974,-35.220824,12,-0.833333,,2.606089,2.400486,1.630442
6,-5.819944,-35.220823,15,0.833333,→,-5.819944,-35.220823,15,-0.833333,,3.070012,2.148721,1.361783
7,-5.819908,-35.22081,18,0.833333,→,-5.819908,-35.22081,18,-0.833333,,2.445747,1.47094,0.924451
8,-5.819866,-35.220794,18,0.0,→,-5.819866,-35.220794,18,-0.0,,1.073221,1.588029,1.588029
9,-5.819821,-35.220775,16,-0.555556,→,-5.819821,-35.220775,16,0.555556,,0.562265,1.085921,1.625817
