In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load your dataset
df = pd.read_excel("C:/Users/RAJIB/Desktop/UniCharge/ml-engine/models/cpu_prediction/datasets/updated_ev_patterns1.csv.xlsx")


In [4]:
df.head()

Unnamed: 0,Energy Consumed (kWh),Charging Rate (kW),Charging Cost (USD),Time of Day,Day of Week,Vehicle Age (years),Charger Type,User Type,total_duration,Cost per Unit
0,60.712346,39.463025,13.087717,1,5,2,0,1,0.65,0.22
1,12.339275,24.925336,21.128448,2,1,3,1,0,2.02,1.71
2,19.128876,53.560852,35.66727,2,4,2,2,1,2.8,1.86
3,79.457824,293.993949,13.036239,1,2,1,1,2,3.7,0.16
4,19.629104,34.743514,10.161471,2,2,1,1,2,1.77,0.52


In [5]:
df.columns

Index(['Energy Consumed (kWh)', 'Charging Rate (kW)', 'Charging Cost (USD)',
       'Time of Day', 'Day of Week', 'Vehicle Age (years)', 'Charger Type',
       'User Type', 'total_duration', 'Cost per Unit'],
      dtype='object')

In [16]:
target = "Cost per Unit"
features = [col for col in df.columns if col != target]

X = df[features]
y = df[target]

# Identify categorical and numerical columns
categorical_cols = ["Time of Day", "Day of Week", "Charger Type", "User Type"]
numerical_cols = [col for col in features if col not in categorical_cols]

# Preprocessing
preprocessor = ColumnTransformer([
    ("num", StandardScaler(), numerical_cols),
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols)
])

# Model pipeline
model = Pipeline([
    ("preprocessing", preprocessor),
    ("regressor", GradientBoostingRegressor(n_estimators=190, learning_rate=0.1,max_leaf_nodes=8, random_state=42))
])

# Train-test split
X_train, X_test, y_train,y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"RMSE: {rmse:.3f}")
print(f"R² Score: {r2:.3f}")

RMSE: 0.072
R² Score: 0.945


In [2]:
import pickle

# Save trained model
with open("model.pkl", "wb") as f:
    pickle.dump(model, f)