In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [None]:
#Loading dataset
data = pd.read_csv("C:\\UnderGrad\\data\\ChargePoint_Data_CY20Q4.csv")
data.head()

## Data Preprocessing

### Grid Setup 

Functions that define the grid of the raster map

In [None]:
def findcell(row, list_x, list_y):
    '''
        finding grid cells for each row
    '''
    for i, x in enumerate(list_x):
        if row['Longitude'] <= x:
            x_gridcell = i-1
            break
    
    for j, y in enumerate(list_y):
        if row['Latitude'] <= y:
            y_gridcell = j-1
            break
    
    return x_gridcell, y_gridcell

def getcell(data):
    '''
        assigning grid cell indices to each row
    '''
    nx=5
    ny=5
    #grid boundaries
    x = np.linspace(min(data['Longitude'])-0.00005, max(data['Longitude'])+0.00005, nx+1)
    y = np.linspace(min(data['Latitude'])-0.00005, max(data['Latitude'])+0.00005, ny+1)
    #initializing cell columns
    data['xcell'] = -1
    data['ycell'] = -1
    for idx, row in data.iterrows():
        data.at[idx,'xcell'], data.at[idx,'ycell'] = findcell(row, x, y)
    
    return data



In [None]:
data = getcell(data)

In [None]:
data.rename(columns={'Energy (kWh)':'Energy'}, inplace=True)

In [None]:
data['Start Date'] = pd.to_datetime(data['Start Date'])
#Flooring the date to a daily format
data['Start Date'] = data['Start Date'].dt.floor('D')

data.set_index('Start Date', inplace=True)

In [None]:
data = data.loc['2012-01-01':]
data

### Raster Map setup

In [None]:
from datetime import datetime, timedelta
def build_rastermap(data):
    '''
        Function that builds the 3D array of raster maps.
        Args: data, dataframe
        Returns: raster_map, 3D array
    '''
    #5x5 grid
    nx=5
    ny=5
    
    num_h = int((data.index.max() - data.index.min()).total_seconds()//(3600*24))
    #3d raster map
    raster_map = np.zeros([num_h + 1, nx, ny])
    timestart = data.index.min()
    
    #daily loop
    for x in range(num_h+1):
        window = timestart + timedelta(days=x)
        current = data[(data.index==window)]
        
        #looping through each cell
        for y in range(nx):
            for z in range(ny):
                #unique station names count
                chargers = len(current[(current.xcell == y)&(current.ycell == z)]['Station Name'].unique())
                if chargers == 0:
                    continue
                raster_map[x,y,z] = np.sum(current[(current.xcell == y)&(current.ycell==z)]['Energy'])
    return raster_map 

In [None]:
raster_map = build_rastermap(data)

In [None]:
raster_map.shape

### Train-test split for Conv models 

Splitting the dataset into training and testing subsets using the raster map.

In [None]:
def traintest(data, model_name, forecast_h, raster_map):
    '''
        Function that builds the train-test sets.
        Args: data, dataframe
              model_name, string
              forecast_h, int
              raster_map, 3D array
        Returns: X_train, y_train, X_test, y_test, maxval
    '''
    if forecast_h not in [1,7,30]:
        raise ValueError('Forecasting horizon must be 1, 7 or 30')
        
    
    numlags=30 #lagged timesteps
    nx=5
    ny=5
    
    maxval = np.max(raster_map, axis=0)+0.01
    #normalizing raster map
    norm = raster_map/maxval
    norm[np.isnan(norm)]=0 #nan vals at 0
    norm[norm==0] = np.random.normal(np.zeros_like(norm[norm==0]),0.01)#0 vals to 0.01
    
    matrixlags = np.zeros((raster_map.shape[0]-(numlags+forecast_h), numlags+forecast_h, nx, ny))
    
    rng=matrixlags.shape[0]
    if forecast_h in [1,7]:
        itrain = rng - 30
    else:
        itrain = rng - 120
    itest= rng
    
    for i in range(rng):
        matrixlags[i] = norm[i:i+numlags+forecast_h,:,:]
    
    #Train-Test split
    #Initializing
    X_train = np.zeros((itrain,numlags,nx,ny))
    y_train = np.zeros((itrain,forecast_h,nx,ny))
    X_test = np.zeros((itest-itrain,numlags,nx,ny))
    y_test = np.zeros((itest-itrain,forecast_h,nx,ny))
    
    for x in range(nx):
        for y in range(ny):
            X_train[:,:,x,y] = matrixlags[:itrain,:numlags,x,y]
            y_train[:,:,x,y] = matrixlags[:itrain,numlags:,x,y]
            X_test[:,:,x,y] = matrixlags[itrain:itest,:numlags,x,y]
            y_test[:,:,x,y] = matrixlags[itrain:itest,numlags:,x,y]
    
    if model_name=='convlstm':
        #adjusting for CNN-LSTM
        X_trainlstm = X_train[:,:,:,:,np.newaxis]
        X_testlstm = X_test[:,:,:,:,np.newaxis]
        y_testlstm = y_test*maxval
        return X_trainlstm,y_train,X_testlstm,y_testlstm, maxval
    
    maxvalexp = maxval[np.newaxis,:, :, np.newaxis]
    X_train = np.moveaxis(X_train,1,-1)
    y_train = np.moveaxis(y_train,1,-1)
    X_test = np.moveaxis(X_test,1,-1)
    y_test = np.moveaxis(y_test,1,-1)
    X_test_denorm = X_test*maxvalexp
    y_test_denorm = y_test*maxvalexp
    return X_train,y_train,X_test,y_test_denorm, maxval

In [None]:
forecast_h=1
X_train, y_train, X_test, y_test, maxval = traintest(data,'conv',forecast_h, raster_map)
print(f'X_train shape: {X_train.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'X_test shape: {X_test.shape}')
print(f'y_test shape: {y_test.shape}')
input_shape = X_train.shape[1:]
print(f'Input shape: {input_shape}')

## Forecasting models

### 2D Convolutional Model

In [None]:
import tensorflow as tf
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, LSTM, BatchNormalization, Dense, Reshape, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanAbsoluteError

def buildconv(input_shape, forecast_h):
    '''
        Function that builds the CNN model.
        Args: input_shape, tuple
              forecast_h, int
        Returns: mdl, model
    '''
    mdl=Sequential()
    mdl.add(Conv2D(filters=16, kernel_size=3, activation='relu', padding='same', input_shape=input_shape))
    mdl.add(BatchNormalization())
    mdl.add(Dense(forecast_h, activation='relu'))
    mdl.compile(optimizer=Adam(learning_rate=1e-4), loss=MeanAbsoluteError())
    
    return mdl

In [None]:
mdlcnn = buildconv(input_shape,forecast_h)
mdlcnn.summary()

In [None]:
mdlcnn.fit(X_train, y_train, epochs=100)

In [None]:
predcnn = mdlcnn.predict(X_test)
predcnn = predcnn * maxval[np.newaxis,:, :, np.newaxis]
totalpredcnn = np.sum(predcnn, axis=(1,2))
totalactualcnn = np.sum(y_test, axis=(1,2))

In [None]:
rmse = np.sqrt(np.mean((totalpredcnn-totalactualcnn)**2))
print(f'RMSE for CNN in {forecast_h} forecasting days: {rmse}')

### Hybrid Conv-LSTM model

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Sequential
def buildconvlstm(input_shape,forecast_h):
    '''
        Function that builds the ConvLSTM model.
        Args: input_shape, tuple
              forecast_h, int
        Returns: mdl, model
    '''
    mdl=Sequential([
        keras.Input(shape=input_shape),
        layers.TimeDistributed(layers.Conv2D(16,3,activation='relu',padding='same')),
        layers.TimeDistributed(layers.Flatten()),
        layers.BatchNormalization(),
        layers.LSTM(50, dropout=0.1),
        layers.BatchNormalization(),
        layers.Dense(input_shape[1]*input_shape[2]*forecast_h),
        layers.Reshape((forecast_h,input_shape[1],input_shape[2]))
    ])
    mdl.compile(optimizer=Adam(learning_rate=1e-4), loss=MeanAbsoluteError())
    
    return mdl

In [None]:
X_train, y_train, X_test, y_test, maxval = traintest(data,'convlstm',forecast_h, raster_map)
print(f'X_train shape: {X_train.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'X_test shape: {X_test.shape}')
print(f'y_test shape: {y_test.shape}')
input_shape = X_train.shape[1:]
print(f'Input shape: {input_shape}')

In [None]:
mdl_lstm = buildconvlstm(input_shape, forecast_h)
mdl_lstm.summary()

In [None]:
mdl_lstm.fit(X_train, y_train, epochs=100)

In [None]:
predlstm = mdl_lstm.predict(X_test)
predlstm = predlstm * maxval
totalpredlstm = np.sum(predlstm, axis=(2,3))
totalactuallstm = np.sum(y_test, axis=(2,3))

In [None]:
rmse = np.sqrt(np.mean((totalpredlstm-totalactuallstm)**2))
print(f'RMSE for CNN-LSTM in {forecast_h} forecasting days: {rmse}')

In [None]:
import matplotlib.pyplot as plt
if forecast_h == 1:
    print('No plot is made for 1-day forecasting')
elif forecast_h == 7:
    #plot 7 days
    data.index = pd.to_datetime(data.index)
    #filtering data for last week
    lastdate = data.index[-1]
    startdate = lastdate - pd.DateOffset(days=7)
    monthdata = data[(data.index > startdate) & (data.index <= lastdate)]

    s = np.sum(y_test, axis=(0,2,3))
    cnn7 = np.sum(totalpredcnn, axis=0)
    lstm7 = np.sum(totalpredlstm, axis=0)
    #transfering tgcn forecasted values from other notebook.
    tgcn7 = [12227.31948756, 11694.1825426 , 11645.55619525, 11443.05265468,
           11569.57864392, 12087.08339964, 11896.93519662]

    #Total sum of energy for the last week
    summedenergy = monthdata.groupby(monthdata.index).sum()

    forecastdata = pd.DataFrame({
        'Date': summedenergy.index,
        'Actual Energy': s,
        'CNN Forecast': cnn7,
        'ConvLSTM Forecast': lstm7,
        'TGCN Forecast': tgcn7
    })

    #PLOT
    plt.figure(figsize=(10,6))
    plt.plot(forecastdata['Date'], forecastdata['Actual Energy'], marker='o', linestyle='-', color='k', label='Actual Energy')
    plt.plot(forecastdata['Date'], forecastdata['CNN Forecast'], marker='o', linestyle='--', color='b', label='CNN Forecasted Energy')
    plt.plot(forecastdata['Date'], forecastdata['ConvLSTM Forecast'], marker='o', linestyle='--', color='r', label='ConvLSTM Forecasted Energy')
    plt.plot(forecastdata['Date'], forecastdata['TGCN Forecast'], marker='o', linestyle='--', color='green', label='TGCN Forecasted Energy')

    plt.xlabel('Date')
    plt.ylabel('Energy Demand')
    plt.grid(True)
    plt.legend()
    plt.title('CNN-ConvLSTM-TGCN Forecasted vs Actual Energy')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
else:
    #plot 30 days
    data.index = pd.to_datetime(data.index)
    #filtering data for last month
    lastdate = data.index[-1]
    startdate = lastdate - pd.DateOffset(days=30)
    monthdata = data[(data.index > startdate) & (data.index <= lastdate)]

    s = np.sum(y_test, axis=(0,2,3))

    cnn30 = np.sum(totalpredcnn, axis=0)
    lstm30 = np.sum(totalpredlstm, axis=0)
    #transfering tgcn forecasted values from other notebook.
    tgcn30 = [51318.08576366, 51346.11326565, 51507.70399133, 51457.43757854,
           51700.83425308, 51820.0790417 , 51609.09431133, 51364.15813247,
           51743.12194754, 51627.75168776, 51745.20995092, 51599.82558309,
           51317.60523569, 51657.77594607, 51531.21234152, 51449.28571977,
           51585.33255327, 51588.77571669, 51546.62707234, 51482.91585872,
           51417.81598935, 51340.13877193, 51392.86726985, 51194.4655082 ,
           51144.42805127, 51141.6029261 , 51076.31028627, 51164.41770911,
           51379.3938884 , 51303.54574994]

    #Total sum of energy for the last month
    summedenergy = monthdata.groupby(monthdata.index).sum()

    forecastdata = pd.DataFrame({
        'Date': summedenergy.index,
        'Actual Energy': s,
        'CNN Forecast': cnn30,
        'ConvLSTM Forecast': lstm30,
        'TGCN Forecast': tgcn30
    })

    #PLOT
    plt.figure(figsize=(10,6))
    plt.plot(forecastdata['Date'], forecastdata['Actual Energy'], marker='o', linestyle='-', color='k', label='Actual Energy')
    plt.plot(forecastdata['Date'], forecastdata['CNN Forecast'], marker='o', linestyle='--', color='b', label='CNN Forecasted Energy')
    plt.plot(forecastdata['Date'], forecastdata['ConvLSTM Forecast'], marker='o', linestyle='--', color='r', label='ConvLSTM Forecasted Energy')
    plt.plot(forecastdata['Date'], forecastdata['TGCN Forecast'], marker='o', linestyle='--', color='green', label='TGCN Forecasted Energy')

    plt.xlabel('Date')
    plt.ylabel('Energy Demand')
    plt.grid(True)
    plt.legend(loc='lower center', fontsize='small')
    plt.title('CNN-ConvLSTM-TGCN Forecasted vs Actual Energy')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()