In [4]:
# ==============================================================================
# NOTEBOOK DE TESTES METAMÓRFICOS - NÍVEL 3: ANÁLISE DE GENERALIZAÇÃO
# ==============================================================================
# Objetivo: Avaliar a capacidade do modelo de fornecer saídas coerentes e
# fisicamente plausíveis sob condições dinâmicas e sequências de eventos complexos.
# ------------------------------------------------------------------------------

# 📦 Imports
import pandas as pd
import numpy as np
import joblib
import plotly.graph_objects as go
from sklearn.metrics import mean_squared_error
from pathlib import Path
from IPython.display import display

# ------------------------------------------------------------------------------
# 📁 ETAPA 1: SETUP DO AMBIENTE DE TESTE
# ------------------------------------------------------------------------------

# Carregar dados de referência
try:
    ethanol_path = Path(r"C:/Users/Paulo Eduardo/Documents/Dissertação/ModelosML/Conect2ai/MDPI2023-pollution/data/[Etanol] Trajeto Casa-Escola-UFRN/trackLog-2023-Feb-13_06-38-49_seg.csv")
    gasoline_path = Path(r"C:/Users/Paulo Eduardo/Documents/Dissertação/ModelosML/Conect2ai/MDPI2023-pollution/data/[Gasolina] Trajeto Casa-Escola-UFRN/trackLog-2022-Dec-01_06-43-57_qui.csv")
    df_ethanol = pd.read_csv(ethanol_path)
    df_gasoline = pd.read_csv(gasoline_path)
    print("Arquivos de dados carregados com sucesso.")
except FileNotFoundError as e:
    print(f"ERRO: Arquivos de dados não encontrados. {e}")
    raise

# Função para preparar os dados
def preparar_dados(df):
    df_copy = df.copy()
    df_copy.columns = df_copy.columns.str.strip()
    col_map = {'Speed (OBD)(km/h)': 'Speed(OBD)(km/h)', 'Latitude ': 'Latitude', 'Longitude ': 'Longitude'}
    df_copy.rename(columns={k: v for k, v in col_map.items() if k in df_copy.columns}, inplace=True)
    if "Speed(OBD)(km/h)" in df_copy.columns:
        speeds = df_copy["Speed(OBD)(km/h)"].fillna(0).values * 1000 / 3600
        df_copy["Acceleration"] = np.diff(speeds, prepend=speeds[0])
    return df_copy

df_ethanol = preparar_dados(df_ethanol)
df_gasoline = preparar_dados(df_gasoline)

# Carregar os modelos
try:
    modelos = {
        "ethanol": {
            "afr": joblib.load(Path(r"C:/Users/Paulo Eduardo/Documents/Dissertação/ModelosML/Conect2ai/MDPI2023-pollution/models/LGBMRegressor_ethanol_afr.pkl")),
            "maf": joblib.load(Path(r"C:/Users/Paulo Eduardo/Documents/Dissertação/ModelosML/Conect2ai/MDPI2023-pollution/models/XGBRegressor_ethanol_maf.pkl"))
        },
        "gasoline": {
            "afr": joblib.load(Path(r"C:/Users/Paulo Eduardo/Documents/Dissertação/ModelosML/Conect2ai/MDPI2023-pollution/models/LGBMRegressor_gasoline_afr.pkl")),
            "maf": joblib.load(Path(r"C:/Users/Paulo Eduardo/Documents/Dissertação/ModelosML/Conect2ai/MDPI2023-pollution/models/XGBRegressor_gasoline_maf.pkl"))
        }
    }
    print("Modelos de IA carregados com sucesso.")
except FileNotFoundError as e:
    print(f"ERRO: Arquivos de modelo não encontrados. {e}")
    raise

# ------------------------------------------------------------------------------
# 🔧 ETAPA 2: DEFINIÇÃO DAS FUNÇÕES DE SUPORTE
# ------------------------------------------------------------------------------

def compute_co2(afr, maf, fuel_type):
    afr = np.array(afr)
    maf = np.array(maf)
    carbon_fraction = {"ethanol": 0.5217, "gasoline": 0.8571}
    c_fraction = carbon_fraction.get(fuel_type)
    fuel_rate = np.divide(maf, afr, out=np.zeros_like(maf, dtype=float), where=afr!=0)
    return fuel_rate * c_fraction * (44.0 / 12.0)

def encontrar_coluna(df, alternativas):
    df.columns = df.columns.str.strip()
    for alt in alternativas:
        if alt in df.columns:
            return df[alt]
    return None

# --- FUNÇÃO PRINCIPAL CORRIGIDA ---
def executar_teste_plotly(df_original, df_modificado, modelo_afr, modelo_maf, fuel, nome_mr, co2_real=None):
    # CORREÇÃO: Definir a ordem exata que o modelo espera
    features_ordem_correta = ["Latitude", "Longitude", "Speed(OBD)(km/h)", "Acceleration"]
    
    for feature in features_ordem_correta:
        if feature not in df_original.columns: df_original[feature] = 0
        if feature not in df_modificado.columns: df_modificado[feature] = 0

    # Usar a lista com a ordem correta para fazer as predições
    afr_orig = modelo_afr.predict(df_original[features_ordem_correta])
    maf_orig = modelo_maf.predict(df_original[features_ordem_correta])
    co2_orig = compute_co2(afr_orig, maf_orig, fuel)

    afr_mod = modelo_afr.predict(df_modificado[features_ordem_correta])
    maf_mod = modelo_maf.predict(df_modificado[features_ordem_correta])
    co2_mod = compute_co2(afr_mod, maf_mod, fuel)
    
    diff = np.abs(co2_orig - co2_mod)
    media, maxima, rmse, std = diff.mean(), diff.max(), mean_squared_error(co2_orig, co2_mod, squared=False), diff.std()

    print(f"\n{'='*80}\n🔁 {nome_mr} ({fuel.upper()})\n{'-'*80}")
    print(f"Diferença Média: {media:.4f} | Diferença Máxima: {maxima:.4f} | RMSE: {rmse:.4f} | Desvio Padrão (Std): {std:.4f}")
    
    tabela = pd.concat([
        df_original[features_ordem_correta].iloc[:10].add_suffix("_Orig"),
        pd.DataFrame({" ": ["→"] * 10}),
        df_modificado[features_ordem_correta].iloc[:10].add_suffix("_Mod"),
        pd.DataFrame({" ": [" "] * 10}),
        pd.DataFrame({
            "CO₂_Real": co2_real[:10] if co2_real is not None else "N/A",
            "CO₂_Pred_Orig": co2_orig[:10],
            "CO₂_Pred_Mod": co2_mod[:10]
        })
    ], axis=1).fillna("N/A")
    display(tabela.style.set_caption(f"Comparação dos dados - {nome_mr} ({fuel.upper()})"))

    fig = go.Figure()
    if co2_real is not None:
        fig.add_trace(go.Scatter(y=co2_real[:200], mode='lines', name='Real', line=dict(dash='dot', color='gray')))
    fig.add_trace(go.Scatter(y=co2_orig[:200], mode='lines', name='Predição Original', line=dict(dash='dash', color='blue')))
    fig.add_trace(go.Scatter(y=co2_mod[:200], mode='lines', name='Predição com RM', line=dict(color='red')))
    fig.update_layout(
        title=dict(text=f"Comparação CO₂ (200 Amostras) - {nome_mr} - {fuel.upper()}", x=0.5),
        xaxis_title="Amostras", yaxis_title="CO₂ (g/s)", template="plotly_white"
    )
    fig.show()

# ==============================================================================
# 🔁 ETAPA 3: DEFINIÇÃO DOS CASOS DE TESTE METAMÓRFICOS - NÍVEL 3 (VERSÃO CORRIGIDA)
# ==============================================================================

testes_metamorficos_n3 = {
    # Teste ID: CT_N3_012
    "CT_N3_012: [Composta] - Simular Subida Íngreme": lambda df: df.copy().assign(
        **{'Speed(OBD)(km/h)': 40.0, 'Acceleration': 1.5}
    ),
    
    # Teste ID: CT_N3_013
    "CT_N3_013: [Composta] - Simular Inércia em Declive": lambda df: df.copy().assign(
        **{'Speed(OBD)(km/h)': 80.0, 'Acceleration': -0.5}
    ),
    
    # Teste ID: CT_N3_014 (CORRIGIDO)
    "CT_N3_014: [Composta] - Simular Frenagem Brusca (Cut-off)": lambda df: df.copy().assign(
        **{
            # A CORREÇÃO ESTÁ AQUI: .clip(lower=0) é aplicado imediatamente
            'Speed(OBD)(km/h)': (df['Speed(OBD)(km/h)'].shift(1, fill_value=80) - 5).clip(lower=0),
            'Acceleration': -4.0
        }
    )
}

# ------------------------------------------------------------------------------
# 🧲 ETAPA 4: EXECUÇÃO DOS TESTES DE NÍVEL 3
# ------------------------------------------------------------------------------

print("="*80)
print("INICIANDO EXECUÇÃO DOS TESTES DE NÍVEL 3")
print("="*80)

# Pré-cálculo dos valores reais para plotagem
afr_real_etanol = encontrar_coluna(df_ethanol, ["AirFuelRatio(Commanded)(:1)", "Air Fuel Ratio(Commanded)(:1)"])
maf_real_etanol = encontrar_coluna(df_ethanol, ["MassAirFlowRate(g/s)", "Mass Air Flow Rate(g/s)"])
co2_real_etanol = compute_co2(afr_real_etanol.values, maf_real_etanol.values, "ethanol") if afr_real_etanol is not None and maf_real_etanol is not None else None

afr_real_gasoline = encontrar_coluna(df_gasoline, ["AirFuelRatio(Commanded)(:1)", "Air Fuel Ratio(Commanded)(:1)"])
maf_real_gasoline = encontrar_coluna(df_gasoline, ["MassAirFlowRate(g/s)", "Mass Air Flow Rate(g/s)"])
co2_real_gasoline = compute_co2(afr_real_gasoline.values, maf_real_gasoline.values, "gasoline") if afr_real_gasoline is not None and maf_real_gasoline is not None else None

valores_reais = {"ethanol": co2_real_etanol, "gasoline": co2_real_gasoline}

for nome_mr, transformacao in testes_metamorficos_n3.items():
    for combustivel, df_original in [("ethanol", df_ethanol), ("gasoline", df_gasoline)]:
        df_modificado = transformacao(df_original.copy())
        executar_teste_plotly(
            df_original=df_original,
            df_modificado=df_modificado,
            modelo_afr=modelos[combustivel]["afr"],
            modelo_maf=modelos[combustivel]["maf"],
            fuel=combustivel,
            nome_mr=nome_mr,
            co2_real=valores_reais[combustivel]
        )

Arquivos de dados carregados com sucesso.
Modelos de IA carregados com sucesso.
INICIANDO EXECUÇÃO DOS TESTES DE NÍVEL 3

🔁 CT_N3_012: [Composta] - Simular Subida Íngreme (ETHANOL)
--------------------------------------------------------------------------------
Diferença Média: 0.4546 | Diferença Máxima: 1.5340 | RMSE: 0.5474 | Desvio Padrão (Std): 0.3049


Unnamed: 0,Latitude_Orig,Longitude_Orig,Speed(OBD)(km/h)_Orig,Acceleration_Orig,Unnamed: 5,Latitude_Mod,Longitude_Mod,Speed(OBD)(km/h)_Mod,Acceleration_Mod,Unnamed: 10,CO₂_Real,CO₂_Pred_Orig,CO₂_Pred_Mod
0,-5.820105,-35.220818,3,0.0,→,-5.820105,-35.220818,40.0,1.5,,0.642839,0.742631,1.183137
1,-5.820103,-35.220828,5,0.555556,→,-5.820103,-35.220828,40.0,1.5,,0.653249,0.771505,1.183137
2,-5.820102,-35.220838,5,0.0,→,-5.820102,-35.220838,40.0,1.5,,0.653249,0.771505,1.183137
3,-5.820096,-35.220843,3,-0.555556,→,-5.820096,-35.220843,40.0,1.5,,0.679275,0.659013,1.183137
4,-5.820091,-35.22085,0,-0.833333,→,-5.820091,-35.22085,40.0,1.5,,0.610306,0.667689,1.184775
5,-5.820091,-35.22085,0,0.0,→,-5.820091,-35.22085,40.0,1.5,,0.610306,0.706858,1.184775
6,-5.820091,-35.22085,0,0.0,→,-5.820091,-35.22085,40.0,1.5,,0.490587,0.706858,1.184775
7,-5.820085,-35.220856,3,0.833333,→,-5.820085,-35.220856,40.0,1.5,,1.090483,1.137803,1.184775
8,-5.820074,-35.220859,3,0.0,→,-5.820074,-35.220859,40.0,1.5,,1.090483,1.137803,1.184775
9,-5.820064,-35.220858,3,0.0,→,-5.820064,-35.220858,40.0,1.5,,0.921315,1.137803,1.184775



🔁 CT_N3_012: [Composta] - Simular Subida Íngreme (GASOLINE)
--------------------------------------------------------------------------------
Diferença Média: 0.7524 | Diferença Máxima: 3.0226 | RMSE: 0.9451 | Desvio Padrão (Std): 0.5719


Unnamed: 0,Latitude_Orig,Longitude_Orig,Speed(OBD)(km/h)_Orig,Acceleration_Orig,Unnamed: 5,Latitude_Mod,Longitude_Mod,Speed(OBD)(km/h)_Mod,Acceleration_Mod,Unnamed: 10,CO₂_Real,CO₂_Pred_Orig,CO₂_Pred_Mod
0,-5.82001,-35.220825,5,0.0,→,-5.82001,-35.220825,40.0,1.5,,1.002671,1.456781,2.95401
1,-5.820013,-35.220822,4,-0.277778,→,-5.820013,-35.220822,40.0,1.5,,1.01336,1.016708,2.95401
2,-5.820016,-35.220819,5,0.277778,→,-5.820016,-35.220819,40.0,1.5,,2.08872,1.456781,2.95401
3,-5.820014,-35.220825,7,0.555556,→,-5.820014,-35.220825,40.0,1.5,,2.261889,1.99638,2.95401
4,-5.819992,-35.220829,9,0.555556,→,-5.819992,-35.220829,40.0,1.5,,2.499195,2.400486,3.159595
5,-5.819974,-35.220824,12,0.833333,→,-5.819974,-35.220824,40.0,1.5,,2.606089,2.400486,3.159595
6,-5.819944,-35.220823,15,0.833333,→,-5.819944,-35.220823,40.0,1.5,,3.070012,2.148721,3.012688
7,-5.819908,-35.22081,18,0.833333,→,-5.819908,-35.22081,40.0,1.5,,2.445747,1.47094,2.745029
8,-5.819866,-35.220794,18,0.0,→,-5.819866,-35.220794,40.0,1.5,,1.073221,1.588029,2.66002
9,-5.819821,-35.220775,16,-0.555556,→,-5.819821,-35.220775,40.0,1.5,,0.562265,1.085921,2.704771



🔁 CT_N3_013: [Composta] - Simular Inércia em Declive (ETHANOL)
--------------------------------------------------------------------------------
Diferença Média: 0.8052 | Diferença Máxima: 2.9472 | RMSE: 0.9333 | Desvio Padrão (Std): 0.4720


Unnamed: 0,Latitude_Orig,Longitude_Orig,Speed(OBD)(km/h)_Orig,Acceleration_Orig,Unnamed: 5,Latitude_Mod,Longitude_Mod,Speed(OBD)(km/h)_Mod,Acceleration_Mod,Unnamed: 10,CO₂_Real,CO₂_Pred_Orig,CO₂_Pred_Mod
0,-5.820105,-35.220818,3,0.0,→,-5.820105,-35.220818,80.0,-0.5,,0.642839,0.742631,1.587458
1,-5.820103,-35.220828,5,0.555556,→,-5.820103,-35.220828,80.0,-0.5,,0.653249,0.771505,1.587458
2,-5.820102,-35.220838,5,0.0,→,-5.820102,-35.220838,80.0,-0.5,,0.653249,0.771505,1.587458
3,-5.820096,-35.220843,3,-0.555556,→,-5.820096,-35.220843,80.0,-0.5,,0.679275,0.659013,1.587458
4,-5.820091,-35.22085,0,-0.833333,→,-5.820091,-35.22085,80.0,-0.5,,0.610306,0.667689,1.51875
5,-5.820091,-35.22085,0,0.0,→,-5.820091,-35.22085,80.0,-0.5,,0.610306,0.706858,1.51875
6,-5.820091,-35.22085,0,0.0,→,-5.820091,-35.22085,80.0,-0.5,,0.490587,0.706858,1.51875
7,-5.820085,-35.220856,3,0.833333,→,-5.820085,-35.220856,80.0,-0.5,,1.090483,1.137803,1.51875
8,-5.820074,-35.220859,3,0.0,→,-5.820074,-35.220859,80.0,-0.5,,1.090483,1.137803,1.51875
9,-5.820064,-35.220858,3,0.0,→,-5.820064,-35.220858,80.0,-0.5,,0.921315,1.137803,1.51875



🔁 CT_N3_013: [Composta] - Simular Inércia em Declive (GASOLINE)
--------------------------------------------------------------------------------
Diferença Média: 0.6179 | Diferença Máxima: 2.6798 | RMSE: 0.7613 | Desvio Padrão (Std): 0.4448


Unnamed: 0,Latitude_Orig,Longitude_Orig,Speed(OBD)(km/h)_Orig,Acceleration_Orig,Unnamed: 5,Latitude_Mod,Longitude_Mod,Speed(OBD)(km/h)_Mod,Acceleration_Mod,Unnamed: 10,CO₂_Real,CO₂_Pred_Orig,CO₂_Pred_Mod
0,-5.82001,-35.220825,5,0.0,→,-5.82001,-35.220825,80.0,-0.5,,1.002671,1.456781,2.0518
1,-5.820013,-35.220822,4,-0.277778,→,-5.820013,-35.220822,80.0,-0.5,,1.01336,1.016708,2.0518
2,-5.820016,-35.220819,5,0.277778,→,-5.820016,-35.220819,80.0,-0.5,,2.08872,1.456781,2.0518
3,-5.820014,-35.220825,7,0.555556,→,-5.820014,-35.220825,80.0,-0.5,,2.261889,1.99638,2.0518
4,-5.819992,-35.220829,9,0.555556,→,-5.819992,-35.220829,80.0,-0.5,,2.499195,2.400486,1.762516
5,-5.819974,-35.220824,12,0.833333,→,-5.819974,-35.220824,80.0,-0.5,,2.606089,2.400486,1.762516
6,-5.819944,-35.220823,15,0.833333,→,-5.819944,-35.220823,80.0,-0.5,,3.070012,2.148721,1.69465
7,-5.819908,-35.22081,18,0.833333,→,-5.819908,-35.22081,80.0,-0.5,,2.445747,1.47094,1.607205
8,-5.819866,-35.220794,18,0.0,→,-5.819866,-35.220794,80.0,-0.5,,1.073221,1.588029,1.607205
9,-5.819821,-35.220775,16,-0.555556,→,-5.819821,-35.220775,80.0,-0.5,,0.562265,1.085921,1.591138



🔁 CT_N3_014: [Composta] - Simular Frenagem Brusca (Cut-off) (ETHANOL)
--------------------------------------------------------------------------------
Diferença Média: 0.3633 | Diferença Máxima: 2.3686 | RMSE: 0.5575 | Desvio Padrão (Std): 0.4229


Unnamed: 0,Latitude_Orig,Longitude_Orig,Speed(OBD)(km/h)_Orig,Acceleration_Orig,Unnamed: 5,Latitude_Mod,Longitude_Mod,Speed(OBD)(km/h)_Mod,Acceleration_Mod,Unnamed: 10,CO₂_Real,CO₂_Pred_Orig,CO₂_Pred_Mod
0,-5.820105,-35.220818,3,0.0,→,-5.820105,-35.220818,75,-4.0,,0.642839,0.742631,0.812752
1,-5.820103,-35.220828,5,0.555556,→,-5.820103,-35.220828,0,-4.0,,0.653249,0.771505,0.563401
2,-5.820102,-35.220838,5,0.0,→,-5.820102,-35.220838,0,-4.0,,0.653249,0.771505,0.563401
3,-5.820096,-35.220843,3,-0.555556,→,-5.820096,-35.220843,0,-4.0,,0.679275,0.659013,0.563401
4,-5.820091,-35.22085,0,-0.833333,→,-5.820091,-35.22085,0,-4.0,,0.610306,0.667689,0.546637
5,-5.820091,-35.22085,0,0.0,→,-5.820091,-35.22085,0,-4.0,,0.610306,0.706858,0.546637
6,-5.820091,-35.22085,0,0.0,→,-5.820091,-35.22085,0,-4.0,,0.490587,0.706858,0.546637
7,-5.820085,-35.220856,3,0.833333,→,-5.820085,-35.220856,0,-4.0,,1.090483,1.137803,0.546637
8,-5.820074,-35.220859,3,0.0,→,-5.820074,-35.220859,0,-4.0,,1.090483,1.137803,0.546637
9,-5.820064,-35.220858,3,0.0,→,-5.820064,-35.220858,0,-4.0,,0.921315,1.137803,0.546637



🔁 CT_N3_014: [Composta] - Simular Frenagem Brusca (Cut-off) (GASOLINE)
--------------------------------------------------------------------------------
Diferença Média: 0.6492 | Diferença Máxima: 2.9090 | RMSE: 0.9072 | Desvio Padrão (Std): 0.6338


Unnamed: 0,Latitude_Orig,Longitude_Orig,Speed(OBD)(km/h)_Orig,Acceleration_Orig,Unnamed: 5,Latitude_Mod,Longitude_Mod,Speed(OBD)(km/h)_Mod,Acceleration_Mod,Unnamed: 10,CO₂_Real,CO₂_Pred_Orig,CO₂_Pred_Mod
0,-5.82001,-35.220825,5,0.0,→,-5.82001,-35.220825,75,-4.0,,1.002671,1.456781,2.437737
1,-5.820013,-35.220822,4,-0.277778,→,-5.820013,-35.220822,0,-4.0,,1.01336,1.016708,0.873537
2,-5.820016,-35.220819,5,0.277778,→,-5.820016,-35.220819,0,-4.0,,2.08872,1.456781,0.873537
3,-5.820014,-35.220825,7,0.555556,→,-5.820014,-35.220825,0,-4.0,,2.261889,1.99638,0.873537
4,-5.819992,-35.220829,9,0.555556,→,-5.819992,-35.220829,2,-4.0,,2.499195,2.400486,0.870895
5,-5.819974,-35.220824,12,0.833333,→,-5.819974,-35.220824,4,-4.0,,2.606089,2.400486,0.973499
6,-5.819944,-35.220823,15,0.833333,→,-5.819944,-35.220823,7,-4.0,,3.070012,2.148721,0.842412
7,-5.819908,-35.22081,18,0.833333,→,-5.819908,-35.22081,10,-4.0,,2.445747,1.47094,0.771938
8,-5.819866,-35.220794,18,0.0,→,-5.819866,-35.220794,13,-4.0,,1.073221,1.588029,0.808814
9,-5.819821,-35.220775,16,-0.555556,→,-5.819821,-35.220775,13,-4.0,,0.562265,1.085921,0.732426
