# 📦 Import Libraries

In [None]:
import os
import joblib

In [None]:
import numpy as np
import pandas as pd

In [None]:
import matplotlib.pyplot as plt

In [None]:
from lazypredict.Supervised import LazyRegressor

In [None]:
from utils.data_acquisition import *
from utils.data_preprocessing import *

In [None]:
plot_size = (12, 6)

graph_colors = {
  "black": "#1F1E1F",
  "blue": "#03A9CE",
}

text_colors = {
  "title": "#262626",
  "subtitle": "#646464",
}

fig_path = "./figures/"

if not os.path.exists(fig_path):
    os.makedirs(fig_path)

# 💾 Data Acquisition

In [None]:
DATA_PATH = "./data/"

GASOLINE_DATA_PATH_1 = DATA_PATH + "[Gasolina] Trajeto Casa-Escola-UFRN/"
GASOLINE_DATA_PATH_2 = DATA_PATH + "[Gasolina2023] Trajeto Casa-Escola-UFRN/"
ETHANOL_DATA_PATH = DATA_PATH + "[Etanol] Trajeto Casa-Escola-UFRN/"

GASOLINE_DATA_PATHS_1 = get_list_paths(GASOLINE_DATA_PATH_1)
GASOLINE_DATA_PATHS_2 = get_list_paths(GASOLINE_DATA_PATH_2)

GASOLINE_DATA_PATHS = GASOLINE_DATA_PATHS_1 + GASOLINE_DATA_PATHS_2

ETHANOL_DATA_PATHS = get_list_paths(ETHANOL_DATA_PATH)

SELECTED_COLUMNS = [
  "DeviceTime",
  "Latitude",
  "Longitude",
  "Speed(OBD)(km/h)",
  "AirFuelRatio(Commanded)(:1)",
  "Acceleration"
]

In [None]:
all_data_gasoline = read_all_data(GASOLINE_DATA_PATHS)
all_data_ethanol = read_all_data(ETHANOL_DATA_PATHS)

# 🧩 Data Modelling

In [None]:
for data in all_data_gasoline:
    data = remove_space_in_columns(data)
    data["Acceleration"] = compute_acceleration(data)

all_data_gasoline = concatenate_dataframes(all_data_gasoline)
all_data_gasoline = filter_columns(all_data_gasoline, SELECTED_COLUMNS)

all_data_gasoline.drop(columns=["DeviceTime"], inplace=True)

In [None]:
for data in all_data_ethanol:
    data = remove_space_in_columns(data)
    data["Acceleration"] = compute_acceleration(data)

all_data_ethanol = concatenate_dataframes(all_data_ethanol)
all_data_ethanol = filter_columns(all_data_ethanol, SELECTED_COLUMNS)

all_data_ethanol.drop(columns=["DeviceTime"], inplace=True)

In [None]:
all_data_gasoline = all_data_gasoline.dropna()
all_data_ethanol = all_data_ethanol.dropna()

In [None]:
X_train_gasoline, X_test_gasoline, y_train_gasoline, y_test_gasoline = split_data(
    all_data_gasoline.drop(columns=["AirFuelRatio(Commanded)(:1)"]),
    all_data_gasoline["AirFuelRatio(Commanded)(:1)"]
)

X_train_ethanol, X_test_ethanol, y_train_ethanol, y_test_ethanol = split_data(
    all_data_ethanol.drop(columns=["AirFuelRatio(Commanded)(:1)"]),
    all_data_ethanol["AirFuelRatio(Commanded)(:1)"]
)

# 🔎 Exploratory Data Analysis (EDA)

## Gasoline

In [None]:
all_data_gasoline.shape

## Ethanol

In [None]:
all_data_ethanol.shape

# 🤖 Model Training

## Gasoline

In [None]:
lr_gasoline_afr = LazyRegressor(
    verbose=0,
    ignore_warnings=False,
    custom_metric=None,
    random_state=42
)

In [None]:
models_gasoline_afr, predictions_gasoline_afr = lr_gasoline_afr.fit(
    X_train_gasoline.values,
    X_test_gasoline.values,
    y_train_gasoline.values,
    y_test_gasoline.values
)

In [None]:
models_gasoline_afr

In [None]:
fig, ax = plt.subplots(figsize=plot_size)

ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)

ax.set_ylabel("AFR", fontsize=14)
ax.set_xlabel("Sample", fontsize=14)

plt.xticks(rotation=0, ha="left")

y_pred = lr_gasoline_afr.models["LGBMRegressor"].predict(X_test_gasoline)

ax.plot(list(range(100)), y_test_gasoline[-100:], label="Real", color=graph_colors["black"])
ax.plot(list(range(100)), y_pred[-100:], label="Predict", color=graph_colors["blue"], linewidth=2)

plt.text(0, 32, "Air Fuel Ratio [AFR] - Predicted vs Real", fontsize=18, color=text_colors["title"], weight="bold")
plt.text(0, 31, "Comparative between the last 100 samples of Gasoline data", fontsize=14, color=text_colors["subtitle"])

ax.legend(loc="upper right").set_frame_on(False)

plt.tight_layout()
plt.savefig(f"{fig_path}gasoline_afr.png")
plt.show()

## Ethanol

In [None]:
lr_ethanol_afr = LazyRegressor(
    verbose=0,
    ignore_warnings=False,
    custom_metric=None,
    random_state=42
)

In [None]:
models_ethanol_afr, predictions_ethanol_afr = lr_ethanol_afr.fit(
    X_train_ethanol.values,
    X_test_ethanol.values,
    y_train_ethanol.values,
    y_test_ethanol.values
)

In [None]:
models_ethanol_afr

In [None]:
fig, ax = plt.subplots(figsize=plot_size)

ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)

ax.set_ylabel("AFR", fontsize=14)
ax.set_xlabel("Sample", fontsize=14)

plt.xticks(rotation=0, ha="left")

y_pred = lr_ethanol_afr.models["LGBMRegressor"].predict(X_test_ethanol)

ax.plot(list(range(100)), y_test_ethanol[-100:], label="Real", color=graph_colors["black"])
ax.plot(list(range(100)), y_pred[-100:], label="Predict", color=graph_colors["blue"], linewidth=2)

plt.text(0, 32, "Air Fuel Ratio [AFR] - Predicted vs Real", fontsize=18, color=text_colors["title"], weight="bold")
plt.text(0, 31, "Comparative between the last 100 samples of Ethanol data", fontsize=14, color=text_colors["subtitle"])

ax.legend(loc="upper right").set_frame_on(False)

plt.tight_layout()
plt.savefig(f"{fig_path}ethanol_afr.png")
plt.show()

# 💾 Save Model

In [None]:
joblib.dump(lr_gasoline_afr.models["LGBMRegressor"], "./models/LGBMRegressor_gasoline_afr.pkl")

In [None]:
joblib.dump(lr_ethanol_afr.models["LGBMRegressor"], "./models/LGBMRegressor_ethanol_afr.pkl")