# **Forecasting the EV charging demand for 2019 and 2020**

### **Importing Required Libraries**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split

from scipy.spatial import distance_matrix

from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt

### **Reading the Datasets**

In [None]:
demand = pd.read_csv("/content/Demand_History.csv")
infra = pd.read_csv("/content/exisiting_EV_infrastructure_2018.csv")

## **Exploratory Data Analysis (EDA) and Data Visualization**

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
ax.scatter(x = demand['x_coordinate'], y = demand['y_coordinate']) #blue
ax.scatter(x = infra['x_coordinate'], y = infra['y_coordinate'])  #orange
plt.show()

In [None]:
demand.head()

In [None]:
demand2 = demand.T
demand2.head(10)

In [None]:
demand3 = demand.drop(columns = ['demand_point_index','x_coordinate', 'y_coordinate'])
demand3.head(10)

In [None]:
plt.figure(figsize = (10,6))
count = 1
for index, row in demand3.iterrows():
    plt.plot(row, label=index)
    count = count + 1
    if count == 30:
      break
plt.legend()
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
ax.scatter(x = demand['x_coordinate'], y = demand['y_coordinate'], cmap="Reds", c=demand["2018"]) #blue
ax.scatter(x = infra['x_coordinate'], y = infra['y_coordinate'], color = 'black')  #orange
plt.title("2018")

## **Finding the Change in Demand at charging points over the years**

In [None]:
demand['diff'] = demand['2018'] - demand['2017']
demand['diff%'] = (demand['2018'] - demand['2017'])*100/demand['2017']
demand['diff'].describe()

In [None]:
plt.figure(figsize = (10,6))
demand['diff'].plot.box()
plt.show()

In [None]:
demand

In [None]:
demand[demand['diff'] == demand['diff'].min()]

In [None]:
fig2, ax2 = plt.subplots(figsize=(10, 6))
ax2.scatter(x = demand['x_coordinate'], y = demand['y_coordinate'], cmap="Reds", c=demand["diff"]) #blue
ax2.scatter(x = infra['x_coordinate'], y = infra['y_coordinate'], color = 'black')  #orange
plt.title("Difference")

In [None]:
demand[(demand['diff'] > 55) | (demand['diff'] < -30)].count()

In [None]:
demand[demand['diff'] > 55]

In [None]:
demand[demand['diff'] < -30]

In [None]:
demand[(demand['diff'] > 5) & (demand['diff'] < 10)]

In [None]:
demand[(demand['diff'] > 20) & (demand['diff'] < 30)]

In [None]:
demand[(demand['diff'] > 30) & (demand['diff'] < 40)]

In [None]:
demand[(demand['diff'] > -20) & (demand['diff'] < -10)]

In [None]:
demand['2018'].describe()

In [None]:
plt.figure(figsize = (7,10))
demand['diff%'].plot.box()
plt.show()

In [None]:
demand[demand['diff%'] == demand['diff%'].min()]

In [None]:
demand['diff%'].describe()

In [None]:
fig3, ax3 = plt.subplots(figsize=(10, 6))
ax3.scatter(x = demand['x_coordinate'], y = demand['y_coordinate'], cmap="Reds", c=demand["diff%"]) #blue
ax3.scatter(x = infra['x_coordinate'], y = infra['y_coordinate'], color = 'black')  #orange
plt.title("Difference%")

In [None]:
demand[demand['diff%'].isnull() == True]
demand['diff%'] = demand['diff%'].fillna(0)

In [None]:
diff_df = pd.DataFrame(demand['diff%'])
diff_df.describe()

In [None]:
d_mat = pd.DataFrame(distance_matrix(diff_df.values, diff_df.values), index=diff_df.index, columns=demand.index)
d_mat

## **Dividing the dataset into different groups based on change in demand over years, to apply different clustering hyperparameters to each**

In [None]:
from sklearn.cluster import DBSCAN
clusters = DBSCAN(eps=5, min_samples=100).fit(diff_df)
# get cluster labels
clusters.labels_

In [None]:
plt.figure(figsize=(10,8))
p = sns.scatterplot(data=diff_df, x=demand['x_coordinate'], y=demand['y_coordinate'], hue=clusters.labels_, legend="full", palette="Reds")
sns.move_legend(p, "upper right", bbox_to_anchor=(1.17, 1.2), title='Clusters')

plt.show()

In [None]:
group1 = demand[demand['diff%'] >23 ]
group1  #0.8 , 0.5
group1.to_csv(r'group1.csv')

In [None]:
group2 = demand[(demand['diff%']>9) & (demand['diff%'] <23) ]
group2  #0.8 0.2
group2.to_csv(r'group2.csv')

In [None]:
group3 = demand[(demand['diff%']>-26) & (demand['diff%'] <9) ]
group3   #0.5 #0.2
group3.to_csv(r'group3.csv')

## **Making the distane matrix to find spatial distance between every supply point and demand point**

In [None]:
infra.head(10)

In [None]:
dist_mat = np.zeros((100,4096))

m,n = 100,4096
for i in range(m):
  for j in range(n):
    dist  = abs(infra.x_coordinate[i] - demand.x_coordinate[j]) + abs(infra.y_coordinate[i] - demand.y_coordinate[j])
    dist_mat[i][j]=dist

dist_mat

In [None]:
dist_df = pd.DataFrame(dist_mat)
dist_df

In [None]:
dist_df.to_csv(r'distance_matrix.csv', index=False)

## **Data Analysis**

In [None]:
demand = pd.read_csv("/content/Demand_History.csv")
infra = pd.read_csv("/content/exisiting_EV_infrastructure_2018.csv")

group1=pd.read_csv("group1.csv")
group2=pd.read_csv("group2.csv")
group3=pd.read_csv("group3.csv")

In [None]:
demand2 = demand.drop(columns = ['x_coordinate','y_coordinate'])
demand2.head()

In [None]:
demand2.set_index('demand_point_index')

In [None]:
demand2 = demand2.T
demand2.head()

In [None]:
group1.set_index('demand_point_index',inplace=True)
group1.drop(['Unnamed: 0', 'diff','diff%','x_coordinate','y_coordinate'], axis = 1, inplace =True)
group1 = group1.T
group1

In [None]:
group2.set_index('demand_point_index',inplace=True)
group2.drop(['Unnamed: 0', 'diff','diff%','x_coordinate','y_coordinate'], axis = 1, inplace =True)
group2 = group2.T
group2

In [None]:
group3.set_index('demand_point_index',inplace=True)
group3.drop(['Unnamed: 0', 'diff','diff%','x_coordinate','y_coordinate'], axis = 1, inplace =True)
group3 = group3.T
group3

In [None]:
group1.index = pd.to_datetime(group1.index)
group1.head()

In [None]:
group2.index = pd.to_datetime(group2.index)
group2.head()

In [None]:
group3.index = pd.to_datetime(group3.index)
group3.head()

In [None]:
demand2.drop('demand_point_index', inplace=True)
demand2

In [None]:

demand2[40].plot()

In [None]:
demand2.index = pd.to_datetime(demand2.index)
demand2.head()

In [None]:
demand2.index = pd.DatetimeIndex(demand2.index.values,
                               freq=demand2.index.inferred_freq)

demand2.head()

## **Forecasting the Power Demand**

### *Holt's Exponential Smoothening*

We tried different combinations of smoothing levels and smoothing trends for each group in the data to finalize on the hyperparameters
Create 4 different models based on either change or % change with different alpha and beta values

Example:

2018 = 40

2017 = 100

smooth level = 0.8 trend = 0.2

0.95 * 40 + 0.05*(-60) = 35

In [None]:
demand2[:][:8]

In [None]:
fit1 = Holt(demand2[1802][:8], initialization_method=None).fit(            #alpha 0.8 beta 0.2
    smoothing_level=0.8, smoothing_trend=0.2, optimized=False
)

In [None]:
fit1 = Holt(demand2[1802][:8], initialization_method=None).fit(            #alpha 0.8 beta 0.2
    smoothing_level=0.8, smoothing_trend=0.2, optimized=False
)
fcast1 = fit1.forecast(2).rename("HoltsLin")
plt.figure(figsize=(12, 8))
plt.plot(demand2[1802], marker="o", color="black")
plt.plot(fit1.fittedvalues, color="blue")
(line1,) = plt.plot(fcast1, marker="o", color="blue")
plt.legend([line1], [fcast1.name])

In [None]:
fcast1

In [None]:
demand2.columns

In [None]:
demand2

In [None]:
f = np.zeros((2,4096), 'float64')
f.shape

In [None]:
forecast = pd.DataFrame(f)
#forecast.set_index(pd.Index(['2019','2020']), inplace=True)
forecast.head()

In [None]:
group1.columns

In [None]:
idx = forecast.index
for id in idx:
  forecast.loc[id,0] = fcast1[id]

forecast.head()


In [None]:
g1c = group1.columns
idx = forecast.index
for i in g1c:
  fit1 = Holt(group1[i][:8], initialization_method=None).fit(
    smoothing_level=0.8, smoothing_trend=0.5, optimized=False
  )
  fcast1 = fit1.forecast(2).rename("HoltsLin")
  for id in idx:
    forecast.loc[id,i] = fcast1[id]


forecast.head()


In [None]:
g2c = group2.columns
idx = forecast.index
for i in g2c:
  fit2 = Holt(group2[i][:8], initialization_method=None).fit(
    smoothing_level=0.8, smoothing_trend=0.2, optimized=False
  )
  fcast2 = fit2.forecast(2).rename("HoltsLin")
  for id in idx:
    forecast.loc[id,i] = fcast2[id]


forecast.head()

In [None]:
g3c = group3.columns
idx = forecast.index
for i in g3c:
  fit3 = Holt(group3[i][:8], initialization_method=None).fit(
    smoothing_level=0.5, smoothing_trend=0.2, optimized=False
  )
  fcast3 = fit3.forecast(2).rename("HoltsLin")
  for id in idx:
    forecast.loc[id,i] = fcast3[id]


forecast.head()


In [None]:
forecast.set_index(pd.Index(['2018','2019']), inplace=True)
forecast.index = pd.to_datetime(forecast.index)
forecast.index = pd.DatetimeIndex(forecast.index.values,
                               freq=forecast.index.inferred_freq)
forecast.head()

In [None]:
demand2[:][8:]

In [None]:
forecast[:1]

In [None]:
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_squared_error

In [None]:
mean_absolute_percentage_error(demand2[:][8:], forecast[:1])

In [None]:
np.sqrt(mean_squared_error(demand2[:][8:], forecast[:1]))

In [None]:
forecast.to_csv(r'forecastfr2.csv',index=True)