In [24]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
data = pd.read_csv("preprocessed_electric_vehicle_trip_data.csv")
data.head()

Unnamed: 0,vehID,step,acceleration(m/s²),actualBatteryCapacity(Wh),SoC(%),speed(m/s),speedFactor,totalEnergyConsumed(Wh),totalEnergyRegenerated(Wh),lon,lat,alt,slope(º),completedDistance(km),mWh,remainingRange(km),time,energyConsumptionRate(Wh/km)
0,EV0,101,0.75,0.292495,1.0,0.040074,0.202206,3.6e-05,0.0,0.00905,4e-05,0.924076,0.578218,3.9e-05,0.065912,0.043997,101,0.937392
1,EV0,102,0.75,0.292448,0.999685,0.080148,0.202206,0.000143,0.0,0.008993,0.000159,0.925038,0.578218,7.7e-05,0.05453,0.034358,102,1.849586
2,EV0,103,0.75,0.292373,0.99937,0.120221,0.202206,0.000312,0.0,0.008906,0.000337,0.926481,0.578218,0.000174,0.049856,0.030394,103,1.789669
3,EV0,104,0.75,0.292271,0.99874,0.160295,0.202206,0.000543,0.0,0.008791,0.000575,0.928406,0.578218,0.00029,0.04724,0.02817,104,1.868065
4,EV1,104,0.85,0.292478,0.999685,0.061446,0.355191,7.5e-05,0.0,0.009035,7.1e-05,0.924332,0.578218,3.9e-05,0.047814,0.028677,104,1.945341


In [25]:
print(data.columns)  # See all column names

Index(['vehID', 'step', 'acceleration(m/s²)', 'actualBatteryCapacity(Wh)',
       'SoC(%)', 'speed(m/s)', 'speedFactor', 'totalEnergyConsumed(Wh)',
       'totalEnergyRegenerated(Wh)', 'lon', 'lat', 'alt', 'slope(º)',
       'completedDistance(km)', 'mWh', 'remainingRange(km)', 'time',
       'energyConsumptionRate(Wh/km)'],
      dtype='object')


In [26]:
# Define features and target
X = data.drop(columns=["SoC(%)","vehID"])  # Assuming 'SoC' is the target variable
y = data["SoC(%)"]

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Random Forest Model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

# Train Neural Network Model
mlp_model = MLPRegressor(hidden_layer_sizes=(128, 64, 32), activation='relu', max_iter=1000, random_state=42)
mlp_model.fit(X_train_scaled, y_train)

In [27]:




# Evaluate Models
def evaluate_model(model, X_test, y_test, name):
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    print(f"{name} RMSE: {rmse:.4f}")
    print(f"{name} R² Score: {r2:.4f}\n")

evaluate_model(rf_model, X_test_scaled, y_test, "Random Forest")
evaluate_model(mlp_model, X_test_scaled, y_test, "Neural Network")

# Save models and scaler
joblib.dump(rf_model, "random_forest_model.pkl")
joblib.dump(mlp_model, "neural_network_model.pkl")
joblib.dump(scaler, "scaler_nn_random.pkl")

print("Models and scaler saved successfully!")


Random Forest RMSE: 0.0005
Random Forest R² Score: 1.0000

Neural Network RMSE: 0.0093
Neural Network R² Score: 0.9984

Models and scaler saved successfully!
