# Import Librarys

In [None]:
import joblib
import pandas as pd
from sklearn.feature_selection import mutual_info_regression, mutual_info_classif
from sklearn.feature_selection import f_regression, f_classif
from sklearn.metrics import r2_score, accuracy_score
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.linear_model import  LogisticRegression
from sklearn.metrics import (mean_absolute_error, mean_squared_error, r2_score,accuracy_score, precision_score, recall_score, f1_score)
from sklearn.neighbors import KNeighborsRegressor

# Load PKL

In [2]:
X_cost_train, X_cost_test, y_cost_train, y_cost_test = joblib.load("data/cost_train_test_split.pkl")
X_time_train, X_time_test, y_time_train, y_time_test = joblib.load("data/time_train_test_split.pkl")
X_long_train, X_long_test, y_long_train, y_long_test = joblib.load("data/long_train_test_split.pkl")

# Cost FS

In [3]:
cols = list(X_cost_train.columns)

mi = pd.Series(mutual_info_regression(X_cost_train, y_cost_train), index=cols)
f_scores, _ = f_regression(X_cost_train, y_cost_train)
anova = pd.Series(f_scores, index=cols)

rank_cost = (mi.rank(ascending=False) + anova.rank(ascending=False)).sort_values()

top_cost = list(rank_cost.index[:7])
print("COST TOP FEATURES:", top_cost)

COST TOP FEATURES: ['Energy Consumed (kWh)', 'State of Charge (End %)', 'Charger Type', 'Energy per 100 km (kWh/100 km)', 'Charging Station Location', 'Vehicle Model', 'Charging Rate (kW)']


In [4]:
X_cost_train = X_cost_train[top_cost]
X_cost_test  = X_cost_test[top_cost]

In [5]:
joblib.dump((X_cost_train, X_cost_test, y_cost_train, y_cost_test),"data/cost_train_test_split.pkl")

['data/cost_train_test_split.pkl']

# Cost Model

In [6]:
cost_model = RandomForestRegressor(n_estimators=300,random_state=42,n_jobs=-1)
cost_model.fit(X_cost_train, y_cost_train)
joblib.dump(cost_model, "saved_models/cost_model.pkl")

['saved_models/cost_model.pkl']

In [24]:
cost_pred=cost_model.predict(X_cost_test)
cost_model_score=cost_model.score(X_cost_test, y_cost_test)
print("COST MODEL SCORE:", cost_model_score)
print("R²  :", round(r2_score(y_cost_test, cost_pred), 4))

COST MODEL SCORE: 0.9996718885214897
R²  : 0.9997


In [74]:
cost_model2=KNeighborsRegressor(n_neighbors=5)
cost_model2.fit(X_cost_train, y_cost_train)
cost_pred2=cost_model2.predict(X_cost_test)
cost_model2_score=cost_model2.score(X_cost_test, y_cost_test)
print("COST MODEL 2 SCORE:", cost_model2_score)
print("R²  :", round(r2_score(y_cost_test, cost_pred2), 4))


COST MODEL 2 SCORE: 0.48504517454660845
R²  : 0.485


# Time FS

In [53]:
cols = list(X_time_train.columns)

mi = pd.Series(mutual_info_regression(X_time_train, y_time_train), index=cols)
f_scores, _ = f_regression(X_time_train, y_time_train)
anova = pd.Series(f_scores, index=cols)

rank_time = (mi.rank(ascending=False) + anova.rank(ascending=False)).sort_values()

top_time = list(rank_time.index[:7])
print("TIME TOP FEATURES:", top_time)

TIME TOP FEATURES: ['Long Session', 'State of Charge (Start %)', 'Battery Capacity (kWh)', 'State of Charge (End %)', 'Energy Consumed (kWh)']


In [54]:
X_time_train = X_time_train[top_time]
X_time_test  = X_time_test[top_time]


In [55]:
joblib.dump((X_time_train, X_time_test, y_time_train, y_time_test),"data/time_train_test_split.pkl")

['data/time_train_test_split.pkl']

# Time Model

In [66]:
time_model = RandomForestRegressor(n_estimators=300, random_state=42)
time_model.fit(X_time_train, y_time_train)
joblib.dump(time_model, "saved_models/time_model.pkl")

['saved_models/time_model.pkl']

In [67]:
time_pred = time_model.predict(X_time_test)
time_model_score = time_model.score(X_time_test, y_time_test)
print("TIME MODEL SCORE:", time_model_score)
print("R²  :", round(r2_score(y_time_test, time_pred), 4))


TIME MODEL SCORE: 0.04177390429498373
R²  : 0.0418


In [75]:
time_model2=KNeighborsRegressor(n_neighbors=5)
time_model2.fit(X_time_train, y_time_train)
time_pred2=time_model2.predict(X_time_test)
time_model2_score=time_model2.score(X_time_test, y_time_test)
print("TIME MODEL 2 SCORE:", time_model2_score)
print("R²  :", round(r2_score(y_time_test, time_pred2), 4))


TIME MODEL 2 SCORE: -0.17986429839258178
R²  : -0.1799


# Long FS

In [12]:
cols = list(X_long_train.columns)

mi = pd.Series(mutual_info_classif(X_long_train, y_long_train), index=cols)
f_scores, _ = f_classif(X_long_train, y_long_train)
anova = pd.Series(f_scores, index=cols)

rank_long = (mi.rank(ascending=False) + anova.rank(ascending=False)).sort_values()

top_long = list(rank_long.index[:7])
print("LONG TOP FEATURES:", top_long)

LONG TOP FEATURES: ['Charging Time Difference (minutes)', 'Charging Rate (kW)', 'Battery Capacity (kWh)', 'Vehicle Age (years)', 'Energy Consumed (kWh)', 'Temperature (°C)', 'Charging Station Location']


In [13]:
X_long_train = X_long_train[top_long]
X_long_test  = X_long_test[top_long]


In [14]:
joblib.dump((X_long_train, X_long_test, y_long_train, y_long_test),"data/long_train_test_split.pkl")

['data/long_train_test_split.pkl']

# Long Model

In [15]:
long_model = RandomForestClassifier(n_estimators=300, random_state=42)
long_model.fit(X_long_train, y_long_train)

joblib.dump(long_model, "saved_models/long_session_model.pkl")

['saved_models/long_session_model.pkl']

In [65]:
long_pred = long_model.predict(X_long_test)
long_model_score = long_model.score(X_long_test, y_long_test)
print("LONG MODEL SCORE:", long_model_score)

LONG MODEL SCORE: 0.8492489270386266


In [72]:
long_model1=LogisticRegression(max_iter=1000)
long_model1.fit(X_long_train, y_long_train)
long_pred1=long_model1.predict(X_long_test)
long_model1_score=long_model1.score(X_long_test, y_long_test)
print("LONG MODEL 1 SCORE:", long_model1_score)


LONG MODEL 1 SCORE: 0.7451716738197425
