<a href="https://colab.research.google.com/github/rizalpernata1/EV_Charging_Time_Prediction_Using_XGBoost/blob/main/EV_Charging_Time_Prediction_using_XGBoost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error

In [None]:
df = pd.read_csv('station_data_dataverse.csv')

## EDA

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3395 entries, 0 to 3394
Data columns (total 24 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   sessionId       3395 non-null   int64  
 1   kwhTotal        3395 non-null   float64
 2   dollars         3395 non-null   float64
 3   created         3395 non-null   object 
 4   ended           3395 non-null   object 
 5   startTime       3395 non-null   int64  
 6   endTime         3395 non-null   int64  
 7   chargeTimeHrs   3395 non-null   float64
 8   weekday         3395 non-null   object 
 9   platform        3395 non-null   object 
 10  distance        2330 non-null   float64
 11  userId          3395 non-null   int64  
 12  stationId       3395 non-null   int64  
 13  locationId      3395 non-null   int64  
 14  managerVehicle  3395 non-null   int64  
 15  facilityType    3395 non-null   int64  
 16  Mon             3395 non-null   int64  
 17  Tues            3395 non-null   i

In [None]:
df.describe()

Unnamed: 0,sessionId,kwhTotal,dollars,startTime,endTime,chargeTimeHrs,distance,userId,stationId,locationId,managerVehicle,facilityType,Mon,Tues,Wed,Thurs,Fri,Sat,Sun,reportedZip
count,3395.0,3395.0,3395.0,3395.0,3395.0,3395.0,2330.0,3395.0,3395.0,3395.0,3395.0,3395.0,3395.0,3395.0,3395.0,3395.0,3395.0,3395.0,3395.0,3395.0
mean,5487001.0,5.809629,0.118268,13.743446,16.455965,2.841488,18.652378,57423950.0,576789.678056,629934.460677,0.595582,2.428571,0.181443,0.18704,0.210015,0.216495,0.179676,0.018262,0.007069,0.703976
std,2590657.0,2.892727,0.492562,3.20437,3.406732,1.507472,11.420571,26747720.0,257486.310402,255620.993849,0.490851,0.811204,0.385442,0.390001,0.407379,0.411916,0.383974,0.133918,0.083793,0.456569
min,1004821.0,0.0,0.0,0.0,0.0,0.0125,0.856911,10427670.0,129465.0,125372.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,3234666.0,4.35,0.0,11.0,14.0,2.110278,5.135871,33295480.0,369001.0,481066.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,5451498.0,6.23,0.0,13.0,16.0,2.808889,21.023826,49241810.0,549414.0,503205.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
75%,7746644.0,6.83,0.0,17.0,20.0,3.544167,27.285053,81880520.0,864630.0,878393.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
max,9998981.0,23.68,7.5,23.0,23.0,55.238056,43.059292,98345810.0,995505.0,978130.0,1.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [None]:
df.isna().sum()

sessionId            0
kwhTotal             0
dollars              0
created              0
ended                0
startTime            0
endTime              0
chargeTimeHrs        0
weekday              0
platform             0
distance          1065
userId               0
stationId            0
locationId           0
managerVehicle       0
facilityType         0
Mon                  0
Tues                 0
Wed                  0
Thurs                0
Fri                  0
Sat                  0
Sun                  0
reportedZip          0
dtype: int64

## Preprocessing

In [None]:
df = df.dropna()

## Choosing the features to be used

In [None]:
selected_features = ['kwhTotal', 'Mon', 'Tues', 'Wed', 'Thurs', 'Fri', 'Sat', 'chargeTimeHrs']
df = df[selected_features]

## Separating features and target

In [None]:
X = df.drop('chargeTimeHrs', axis=1)
y = df['chargeTimeHrs']

## Splitting the data into training and test sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Train XGBoost Model

In [None]:
model = XGBRegressor(random_state=42)
model.fit(X_train, y_train)

## Predicting charging time on the test data

In [None]:
y_pred = model.predict(X_test)

## Display the predicted and actual values

In [None]:
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(results)

         Actual  Predicted
1289   4.417222   2.571585
2341   2.526111   3.544685
2593   0.506111   1.119157
1386   4.377778   3.262690
2363   2.379722   3.424935
...         ...        ...
2030   3.203056   3.221154
915    2.997222   3.064754
1414   3.064167   2.317014
2553  11.586944   3.645858
1983   2.577778   2.735735

[466 rows x 2 columns]


## Calculating the mean absolute error

In [None]:
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error:", mae)

Mean Absolute Error: 0.8318828035757794
