In [None]:
#Library for reading csv and related operations
import pandas as pd

#for numberical operations
import numpy as np
import random
import math


#Pre processing and evaluation
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import mean_squared_error

#LSTM Model
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, LSTM, RepeatVector, TimeDistributed

#Optimizer
from tensorflow.keras.optimizers import Adam
import datetime

In [None]:
#Settings seeds
np.random.seed(42)
random.seed(12345)
tf.random.set_seed(1234)


We have weekly date from 2017 to 2022 Jan.

 Data shape is 247 rows and 39 columns
3. We would like to forecast n_future=8. Which means 8 weeks ahead in future
4. Train on all data from 0:238 and test on 238:245 (just test on last 8 data rows)

We have columns week, then D1 to Total
(Coherent Hierarchical Time Series Forecasting)

First model: for column D1 TO D31 (Bottom level in the hierarchy )

Second Model: column R1 TO R6 (middle level in the hierarchy)

Third Model: column "Total" (Top level in the hierarchy)

In [None]:
#Reading the data 
df=pd.read_csv('DRaggDate.csv')
df['week'] = pd.to_datetime(df['week']).dt.date
df

Unnamed: 0,week,D1,D10,D11,D12,D13,D14,D15,D16,D17,D18,D19,D2,D20,D21,D22,D23,D24,D25,D26,D27,D28,D29,D3,D30,D31,D4,D5,D6,D7,D8,D9,R1,R2,R3,R4,R5,R6,total
0,2017-03-04,0.160,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.160,0.160
1,2017-03-11,0.275,0.330,,,,,,,,,,0.530,,,,,,,,,,,0.655,,,,0.120,0.240,,0.260,,1.485,,0.530,0.120,,0.275,2.410
2,2017-03-18,1.330,0.130,,,,0.915,,,0.725,,,0.535,,,,0.080,,,,,,,1.725,,,,0.280,1.425,,,,4.005,,0.535,0.360,0.915,1.330,7.145
3,2017-03-25,1.245,0.210,,0.960,,3.185,0.320,,,,,0.415,,,,0.020,,,,,,,1.575,,,,1.100,0.435,,,,2.220,,0.735,1.120,4.145,1.245,9.465
4,2017-04-01,0.840,0.090,,0.640,,2.645,,,,,,0.535,,,,,,,,,,,0.480,,,,1.230,0.050,,,,0.620,,0.535,1.230,3.285,0.840,6.510
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
242,2021-12-18,47.210,11.715,3.260,33.585,23.605,33.195,122.690,30.750,11.325,5.125,29.485,23.080,5.235,23.085,31.225,43.960,29.145,47.105,4.220,2.770,20.915,18.390,27.850,3.36,4.540,8.560,116.430,54.240,105.635,33.870,183.205,165.375,116.660,504.800,233.720,71.000,47.210,1141.500
243,2021-12-25,35.845,10.025,2.170,28.210,14.550,23.620,84.720,20.715,8.825,3.980,23.680,16.480,3.545,11.340,16.695,29.280,20.060,32.410,3.735,1.355,15.510,10.675,19.715,2.72,3.255,6.600,78.875,35.505,71.445,23.595,121.000,113.570,76.485,337.395,161.275,55.565,35.845,783.430
244,2022-01-15,40.470,8.810,1.895,27.450,20.455,20.465,98.270,27.355,9.200,4.295,26.240,18.325,4.255,15.715,25.970,38.325,19.555,37.965,3.555,2.215,16.990,17.745,18.280,1.11,3.330,7.435,90.055,45.180,75.600,25.765,134.370,129.905,91.870,380.245,192.685,51.470,40.470,889.465
245,2022-01-22,45.205,11.350,2.230,31.760,24.495,32.235,111.490,30.960,10.640,3.845,28.755,18.380,5.030,19.555,29.855,43.740,22.270,46.785,4.225,3.140,22.085,18.985,25.210,1.44,3.795,9.460,107.420,52.565,96.100,33.565,153.320,160.965,105.090,445.630,224.780,68.220,45.205,1052.755


In [None]:
#Function to calculate the inverse different of 2 values
def diff_inverse(obj_prev, value):
    return value + obj_prev

#To Calculate Mean absolute error
def MAE(x,y):
    result=0
    #looping from 0 to length(x)
    for i in range(len(x)):
    #taking the absolute of difference
        result += abs((x[i]-y[i])/x[i])
    #dividing by length
    result /= len(x)
    #multiplying it by 100
    result *= 100
    return result

#To Calculate Mean Squared Error
def RMSE(x,y):
    result=0
    for i in range(len(x)):
        result += ((x[i]-y[i])/x[i])**2
    #dividing by length
    result /= len(x)
    return result

#To Calculate Root Mean Squared Error
def dRMSE(y_true, y_pred):
    #difference wrt rows (0 axis)
    dy_true=np.diff(y_true, axis=0)
    dy_pred=np.diff(y_pred, axis=0)
    result=0
    for i in range(len(dy_true)):
        result += ((dy_true[i]-dy_pred[i])/dy_true[i])**2
    result /= len(dy_true)
    # Taking square root
    result = np.sqrt(result)
    return result

In [None]:
#Converting the series to samples for the LSTM supervised learning method
#Function to split the series into past and future observations
#Here:
# n_past ==> no of past observations
# n_future ==> no of future observations 
def split_series(series, n_past, n_future):
    X, y = list(), list()
    for window_start in range(len(series)):
        past_end = window_start + n_past
        future_end = past_end + n_future
        if future_end > len(series):
            break
        # slicing the past and future parts of the window
        past, future = series[window_start:past_end, :], series[past_end:future_end, :]
        #Adding past and future values to the list
        X.append(past)
        y.append(future)
    return np.array(X), np.array(y)

In [None]:
#Filling the missing values
df = df.bfill(axis ='rows')

**Level - 3 (Bottom)**


Total visitors according to city visit, with a total of 56 visits

In [None]:
#Choosing first 31 columns
first_daily_df = df.iloc[:,1:32]
first_daily_df[:-9] # chunk that will be used for training

Unnamed: 0,D1,D10,D11,D12,D13,D14,D15,D16,D17,D18,D19,D2,D20,D21,D22,D23,D24,D25,D26,D27,D28,D29,D3,D30,D31,D4,D5,D6,D7,D8,D9
0,0.160,0.330,0.240,0.960,10.570,0.915,0.320,0.190,0.725,0.460,13.360,0.530,1.270,0.130,0.020,0.080,0.110,16.510,2.500,0.260,0.11,0.020,0.655,28.345,0.715,0.050,0.120,0.240,2.230,0.260,0.060
1,0.275,0.330,0.240,0.960,10.570,0.915,0.320,0.190,0.725,0.460,13.360,0.530,1.270,0.130,0.020,0.080,0.110,16.510,2.500,0.260,0.11,0.020,0.655,28.345,0.715,0.050,0.120,0.240,2.230,0.260,0.060
2,1.330,0.130,0.240,0.960,10.570,0.915,0.320,0.190,0.725,0.460,13.360,0.535,1.270,0.130,0.020,0.080,0.110,16.510,2.500,0.260,0.11,0.020,1.725,28.345,0.715,0.050,0.280,1.425,2.230,0.125,0.060
3,1.245,0.210,0.240,0.960,10.570,3.185,0.320,0.190,0.040,0.460,13.360,0.415,1.270,0.130,0.020,0.020,0.110,16.510,2.500,0.260,0.11,0.020,1.575,28.345,0.715,0.050,1.100,0.435,2.230,0.125,0.060
4,0.840,0.090,0.240,0.640,10.570,2.645,0.160,0.190,0.040,0.460,13.360,0.535,1.270,0.130,0.020,0.720,0.110,16.510,2.500,0.260,0.11,0.020,0.480,28.345,0.715,0.050,1.230,0.050,2.230,0.125,0.060
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
233,48.255,9.645,2.855,33.355,22.155,34.780,108.530,33.765,9.310,6.010,28.095,23.370,4.205,20.570,29.100,40.065,26.335,41.965,5.775,2.060,15.80,18.545,25.530,1.000,3.870,8.900,118.415,53.955,96.550,31.415,186.190
234,51.340,11.570,3.075,34.630,23.945,34.045,111.265,34.360,9.800,1.750,27.010,23.165,4.040,17.555,29.485,38.680,27.265,43.045,5.385,2.815,16.36,16.355,26.835,1.785,4.670,9.380,117.350,55.160,95.920,34.055,201.070
235,48.685,10.325,3.295,32.765,22.315,32.995,112.940,33.365,7.475,1.820,28.370,21.460,3.750,17.960,28.775,35.800,25.970,44.260,5.190,3.245,16.72,17.315,27.055,1.430,3.955,8.740,111.570,54.010,92.425,28.110,183.220
236,47.365,12.695,3.000,35.100,22.820,34.570,116.585,32.800,7.630,1.975,30.755,22.550,4.970,18.645,32.540,39.775,29.405,43.360,5.050,3.400,19.98,17.645,27.315,1.000,4.370,9.300,118.850,57.270,102.140,29.580,189.345


In [None]:
# Checking the shape of the data
first_daily_df.shape

(247, 31)

In [None]:
# First discrete difference of element. 
# Calculates the difference of a Dataframe element compared with another element in the Dataframe (default is element in previous row).
difference = first_daily_df.diff()
difference[:-9]

Unnamed: 0,D1,D10,D11,D12,D13,D14,D15,D16,D17,D18,D19,D2,D20,D21,D22,D23,D24,D25,D26,D27,D28,D29,D3,D30,D31,D4,D5,D6,D7,D8,D9
0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0.115,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
2,1.055,-0.200,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.005,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.070,0.000,0.000,0.000,0.160,1.185,0.000,-0.135,0.000
3,-0.085,0.080,0.000,0.000,0.000,2.270,0.000,0.000,-0.685,0.000,0.000,-0.120,0.000,0.000,0.000,-0.060,0.000,0.000,0.000,0.000,0.000,0.000,-0.150,0.000,0.000,0.000,0.820,-0.990,0.000,0.000,0.000
4,-0.405,-0.120,0.000,-0.320,0.000,-0.540,-0.160,0.000,0.000,0.000,0.000,0.120,0.000,0.000,0.000,0.700,0.000,0.000,0.000,0.000,0.000,0.000,-1.095,0.000,0.000,0.000,0.130,-0.385,0.000,0.000,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
233,-0.215,0.430,0.605,-0.335,-1.965,3.205,2.685,2.430,-4.295,-0.005,0.890,-0.045,0.685,-1.155,1.415,0.245,0.720,0.460,-0.125,0.015,-0.535,-3.485,-0.460,-0.300,-1.110,0.685,5.295,1.160,-0.370,0.485,9.400
234,3.085,1.925,0.220,1.275,1.790,-0.735,2.735,0.595,0.490,-4.260,-1.085,-0.205,-0.165,-3.015,0.385,-1.385,0.930,1.080,-0.390,0.755,0.560,-2.190,1.305,0.785,0.800,0.480,-1.065,1.205,-0.630,2.640,14.880
235,-2.655,-1.245,0.220,-1.865,-1.630,-1.050,1.675,-0.995,-2.325,0.070,1.360,-1.705,-0.290,0.405,-0.710,-2.880,-1.295,1.215,-0.195,0.430,0.360,0.960,0.220,-0.355,-0.715,-0.640,-5.780,-1.150,-3.495,-5.945,-17.850
236,-1.320,2.370,-0.295,2.335,0.505,1.575,3.645,-0.565,0.155,0.155,2.385,1.090,1.220,0.685,3.765,3.975,3.435,-0.900,-0.140,0.155,3.260,0.330,0.260,-0.430,0.415,0.560,7.280,3.260,9.715,1.470,6.125


In [None]:
#Features
n_past = 1
n_future = 8 # Predicting the next
no_of_features = 31

#Splitting the data into testing and training such that

# remaining 9 are set as testing data which we will compare with our forecasting predictions
train_dataF,test_dataF = difference[0:-9], difference[-9:]  
train_dataF.shape,test_dataF.shape

((238, 31), (9, 31))

In [None]:
train = train_dataF
scalers={}

#Scaling the values for faster training of the models
for i in train_dataF.columns:
    #Using MinMaxScaler (it converts data in range 0-1)
    scaler = MinMaxScaler(feature_range=(0,1))
    s_s = scaler.fit_transform(train[i].values.reshape(-1,1))
    s_s=np.reshape(s_s,len(s_s))
    scalers['scaler_'+ i] = scaler
    train[i]=s_s

test = test_dataF
for i in train_dataF.columns:
    scaler = scalers['scaler_'+i]
    s_s = scaler.transform(test[i].values.reshape(-1,1))
    s_s=np.reshape(s_s,len(s_s))
    scalers['scaler_'+i] = scaler
    test[i]=s_s


#Storing the training split series into the variables
X_train, y_train = split_series(train.values,n_past, 2)
print(X_train.shape)
print(y_train.shape)
#Reshaping according to LSTM model input
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1],no_of_features))
y_train = y_train.reshape((y_train.shape[0], y_train.shape[1], no_of_features))

#Finding the None (Nan) Values
where_are_NaNs = np.isnan(X_train)

#Setting None to 0
X_train[where_are_NaNs] = 0

#Splitting the test values as well wrt past and future
X_test, y_test = split_series(test.values,n_past, 2)

X_test = X_test.reshape((X_test.shape[0], X_test.shape[1],no_of_features))
y_test = y_test.reshape((y_test.shape[0], y_test.shape[1], no_of_features))

(236, 1, 31)
(236, 2, 31)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy

In [None]:
#raw data 
newdataX,newdatay=split_series(first_daily_df.values,n_past, 2)

In [None]:
newdatay = newdatay[-9:, :]

In [None]:
print(X_train.shape)
print(y_train.shape)

print(X_test.shape)
print(y_test.shape)

# we use this as actual value (Testing set)
newdatay.shape

(236, 1, 31)
(236, 2, 31)
(7, 1, 31)
(7, 2, 31)


(9, 2, 31)

In [None]:
#LSTM Model Architecture
model = Sequential()
model.add(LSTM(10, input_shape=(X_train.shape[1],X_train.shape[2]),return_sequences=True))
model.add(LSTM(7))
model.add(RepeatVector(2))
model.add(LSTM(7, return_sequences=True))
model.add(LSTM(10, return_sequences=True))
model.add(TimeDistributed(Dense(no_of_features)))

#comiling the model with adam optimer and loss mean squared error
model.compile(optimizer='adam', loss='mse')

#printing summary of model
model.summary()

#Fitting the model
fit_history1=model.fit(X_train,y_train,epochs=300,validation_split=0.33,batch_size=30,verbose=2,shuffle=False)

#Saving the model
model.save("model(8w1).h5") # change here with number of step

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 1, 10)             1680      
                                                                 
 lstm_1 (LSTM)               (None, 7)                 504       
                                                                 
 repeat_vector (RepeatVector  (None, 2, 7)             0         
 )                                                               
                                                                 
 lstm_2 (LSTM)               (None, 2, 7)              420       
                                                                 
 lstm_3 (LSTM)               (None, 2, 10)             720       
                                                                 
 time_distributed (TimeDistr  (None, 2, 31)            341       
 ibuted)                                                

In [None]:
#Predictions on testing set - Next forecast
model_pred=model.predict(X_test)

#Inverse Scaling of the predicted values to get the real value from 0-1 range
for index,i in enumerate(train_dataF.columns):
    scaler = scalers['scaler_'+i]  
    model_pred[:,:,index]=scaler.inverse_transform(model_pred[:,:,index])

    y_train[:,:,index]=scaler.inverse_transform(y_train[:,:,index])
    y_test[:,:,index]=scaler.inverse_transform(y_test[:,:,index])

In [None]:
#Taking inverse of difference as well
model_pred = [diff_inverse( model_pred[i],  newdatay[i] ) for i in range(len(model_pred))]
model_pred=np.array(model_pred)

In [None]:
#Plots for losses 
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

#History for model 1 (first 56 columns)
history = fit_history1
#plot the training and validation accuracy and loss at each epoch
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(loss) + 1)

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(y=loss, name="Training Loss"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(y=val_loss, name="Validation Loss"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Training and Validation Loss"
)

# Set x-axis title
fig.update_xaxes(title_text="Epochs")

# Set y-axes titles
fig.update_yaxes(title_text="<b>Training Loss</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>Validation Loss</b>", secondary_y=True)

fig.show()

In [None]:
#Checking the metrics - RMSE and MAE

print('MAE_0',(MAE(newdatay[0,:], model_pred[0,:])).mean()) # for week 238
print('RMSE_0', (RMSE(newdatay[0,:], model_pred[0,:])).mean())
print('dRMSE_0', (RMSE(newdatay[0,:], model_pred[0,:])).mean())


print('MAE_1', (MAE(newdatay[1,:], model_pred[1,:])).mean()) # for  week 239

print('MAE_2', (MAE(newdatay[2,:], model_pred[2,:])).mean()) #  for  week 240

print('MAE_3', (MAE(newdatay[3,:], model_pred[3,:])).mean()) # for  week 241

print('MAE_4', (MAE(newdatay[4,:], model_pred[4,:])).mean())

print('MAE_5', (MAE(newdatay[5,:], model_pred[5,:])).mean())

print()
print('Full MAE', (MAE(newdatay[:-2], model_pred)).mean())
print('Full RMSE', (RMSE(newdatay[:-2], model_pred)).mean())
#print('dRMSE', (RMSE(newdatay, model_pred)).mean())

MAE_0 0.6527964128993387
RMSE_0 0.000274130063425723
dRMSE_0 0.000274130063425723
MAE_1 0.5397397184283824
MAE_2 0.6617871189145474
MAE_3 0.6530840348444901
MAE_4 0.46428136505909295
MAE_5 29.77926448638553

Full MAE 4.786010633939956
Full RMSE 0.12454503554309168


In [None]:
# Forecasting Visualizations

In [None]:
#Lets see the first 238 weeks data - years which we've trained our model for.
first_daily_df[:-9]

Unnamed: 0,D1,D10,D11,D12,D13,D14,D15,D16,D17,D18,D19,D2,D20,D21,D22,D23,D24,D25,D26,D27,D28,D29,D3,D30,D31,D4,D5,D6,D7,D8,D9
0,0.160,0.330,0.240,0.960,10.570,0.915,0.320,0.190,0.725,0.460,13.360,0.530,1.270,0.130,0.020,0.080,0.110,16.510,2.500,0.260,0.11,0.020,0.655,28.345,0.715,0.050,0.120,0.240,2.230,0.260,0.060
1,0.275,0.330,0.240,0.960,10.570,0.915,0.320,0.190,0.725,0.460,13.360,0.530,1.270,0.130,0.020,0.080,0.110,16.510,2.500,0.260,0.11,0.020,0.655,28.345,0.715,0.050,0.120,0.240,2.230,0.260,0.060
2,1.330,0.130,0.240,0.960,10.570,0.915,0.320,0.190,0.725,0.460,13.360,0.535,1.270,0.130,0.020,0.080,0.110,16.510,2.500,0.260,0.11,0.020,1.725,28.345,0.715,0.050,0.280,1.425,2.230,0.125,0.060
3,1.245,0.210,0.240,0.960,10.570,3.185,0.320,0.190,0.040,0.460,13.360,0.415,1.270,0.130,0.020,0.020,0.110,16.510,2.500,0.260,0.11,0.020,1.575,28.345,0.715,0.050,1.100,0.435,2.230,0.125,0.060
4,0.840,0.090,0.240,0.640,10.570,2.645,0.160,0.190,0.040,0.460,13.360,0.535,1.270,0.130,0.020,0.720,0.110,16.510,2.500,0.260,0.11,0.020,0.480,28.345,0.715,0.050,1.230,0.050,2.230,0.125,0.060
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
233,48.255,9.645,2.855,33.355,22.155,34.780,108.530,33.765,9.310,6.010,28.095,23.370,4.205,20.570,29.100,40.065,26.335,41.965,5.775,2.060,15.80,18.545,25.530,1.000,3.870,8.900,118.415,53.955,96.550,31.415,186.190
234,51.340,11.570,3.075,34.630,23.945,34.045,111.265,34.360,9.800,1.750,27.010,23.165,4.040,17.555,29.485,38.680,27.265,43.045,5.385,2.815,16.36,16.355,26.835,1.785,4.670,9.380,117.350,55.160,95.920,34.055,201.070
235,48.685,10.325,3.295,32.765,22.315,32.995,112.940,33.365,7.475,1.820,28.370,21.460,3.750,17.960,28.775,35.800,25.970,44.260,5.190,3.245,16.72,17.315,27.055,1.430,3.955,8.740,111.570,54.010,92.425,28.110,183.220
236,47.365,12.695,3.000,35.100,22.820,34.570,116.585,32.800,7.630,1.975,30.755,22.550,4.970,18.645,32.540,39.775,29.405,43.360,5.050,3.400,19.98,17.645,27.315,1.000,4.370,9.300,118.850,57.270,102.140,29.580,189.345


In [None]:
#Actual values for last 8 weeks - testing set
actual_df = pd.DataFrame(newdatay[:,1,:], index = list(range(238,247)))
for i in range(len(test.columns)):
    actual_df = actual_df.rename(columns={i: test.columns[i]}) 

actual_df["week"] = df.iloc[238:,0]
actual_df["week"] = pd.to_datetime(actual_df['week']).dt.date
cols = actual_df.columns.tolist()
cols = cols[-1:] + cols[:-1]
actual_df= actual_df[cols]
actual_df

Unnamed: 0,week,D1,D10,D11,D12,D13,D14,D15,D16,D17,D18,D19,D2,D20,D21,D22,D23,D24,D25,D26,D27,D28,D29,D3,D30,D31,D4,D5,D6,D7,D8,D9
238,2021-11-20,50.54,9.49,3.2,30.77,25.305,36.54,123.0,33.485,10.74,3.275,29.465,22.33,6.1,22.05,32.715,42.765,27.19,46.905,4.175,2.73,20.365,9.43,29.04,1.8,4.015,9.325,115.355,54.385,104.85,30.865,206.4
239,2021-11-27,50.08,11.005,3.15,31.18,24.29,35.035,120.37,31.545,12.63,4.055,28.49,22.835,4.88,23.12,33.595,40.005,26.9,45.74,5.16,1.82,20.265,19.315,28.535,1.445,4.0,8.23,114.235,53.725,110.345,32.225,205.155
240,2021-12-04,46.3,10.41,3.015,31.58,25.48,32.985,121.32,34.03,12.39,3.96,29.365,20.455,4.93,21.735,28.82,41.53,28.39,45.99,4.95,2.315,21.66,16.45,29.285,1.11,4.345,8.265,117.385,57.63,106.645,31.895,165.695
241,2021-12-11,45.505,11.925,2.885,33.24,24.905,33.4,122.005,33.6,12.79,3.71,29.645,20.675,5.325,22.715,32.74,43.895,31.375,47.365,4.775,2.445,21.575,18.78,29.15,1.275,4.36,8.325,120.64,55.88,106.81,34.99,181.315
242,2021-12-18,47.21,11.715,3.26,33.585,23.605,33.195,122.69,30.75,11.325,5.125,29.485,23.08,5.235,23.085,31.225,43.96,29.145,47.105,4.22,2.77,20.915,18.39,27.85,3.36,4.54,8.56,116.43,54.24,105.635,33.87,183.205
243,2021-12-25,35.845,10.025,2.17,28.21,14.55,23.62,84.72,20.715,8.825,3.98,23.68,16.48,3.545,11.34,16.695,29.28,20.06,32.41,3.735,1.355,15.51,10.675,19.715,2.72,3.255,6.6,78.875,35.505,71.445,23.595,121.0
244,2022-01-15,40.47,8.81,1.895,27.45,20.455,20.465,98.27,27.355,9.2,4.295,26.24,18.325,4.255,15.715,25.97,38.325,19.555,37.965,3.555,2.215,16.99,17.745,18.28,1.11,3.33,7.435,90.055,45.18,75.6,25.765,134.37
245,2022-01-22,45.205,11.35,2.23,31.76,24.495,32.235,111.49,30.96,10.64,3.845,28.755,18.38,5.03,19.555,29.855,43.74,22.27,46.785,4.225,3.14,22.085,18.985,25.21,1.44,3.795,9.46,107.42,52.565,96.1,33.565,153.32
246,2022-01-29,45.75,10.405,2.46,32.49,24.325,32.645,117.965,31.165,11.725,2.755,30.75,21.355,5.43,22.6,31.44,47.065,25.535,48.65,3.935,3.31,23.16,17.58,28.2,2.18,3.91,9.11,113.2,53.02,104.755,34.495,173.575


In [None]:
#Predicted values for last 8 weeks
forecast1_df = pd.DataFrame(model_pred[1:,0,:], index = list(range(238,244))) 
forecast2_df = pd.DataFrame(model_pred[-5:-2,1,:], index = list(range(244,247))) 

for i in range(len(test.columns)):
    forecast1_df = forecast1_df.rename(columns={i: test.columns[i]})
    forecast2_df = forecast2_df.rename(columns={i: test.columns[i]}) 

merged = pd.concat([forecast1_df, forecast2_df], axis=0) 

merged["week"] = df.iloc[238:,0]
merged["week"] = pd.to_datetime(merged['week']).dt.date
cols = merged.columns.tolist()
cols = cols[-1:] + cols[:-1]
merged= merged[cols]
merged

Unnamed: 0,week,D1,D10,D11,D12,D13,D14,D15,D16,D17,D18,D19,D2,D20,D21,D22,D23,D24,D25,D26,D27,D28,D29,D3,D30,D31,D4,D5,D6,D7,D8,D9
238,2021-11-20,50.637178,9.529693,3.194612,30.77337,25.310356,36.585637,123.068465,33.462638,10.988887,3.052339,29.45824,22.355491,6.100611,22.041092,32.761255,42.869788,27.383557,46.914722,4.174681,2.732458,20.406241,9.551982,29.062749,1.811275,4.002383,9.328013,115.474257,54.431485,104.991885,30.942134,206.514926
239,2021-11-27,50.16326,11.037604,3.140675,31.168682,24.277822,35.047409,120.383473,31.501043,12.865225,3.783005,28.462324,22.837883,4.878252,23.098486,33.621172,40.086015,27.073684,45.726811,5.15372,1.81371,20.29144,19.418642,28.552373,1.335072,3.985923,8.228728,114.335095,53.738674,110.430587,32.272522,205.223956
240,2021-12-04,46.406051,10.454326,3.012196,31.592916,25.496998,33.052494,121.424626,34.021682,12.647582,3.769734,29.371751,20.495375,4.932181,21.734447,28.879656,41.650046,28.596473,46.014496,4.95355,2.323266,21.711036,16.583747,29.31117,1.200593,4.333335,8.270869,117.516729,57.698116,106.823648,31.991438,165.8394
241,2021-12-11,45.596803,11.962002,2.87812,33.237846,24.903796,33.43313,122.052677,33.569412,13.0336,3.468644,29.63024,20.691965,5.324736,22.701366,32.778761,43.990702,31.561068,47.365991,4.772413,2.444181,21.610699,18.895024,29.170689,1.240551,4.346847,8.326415,120.752077,55.9142,106.930526,35.055859,181.412358
242,2021-12-18,45.054498,10.768291,2.71094,30.730642,20.803148,27.955298,114.882674,27.529588,9.422271,-1.898886,26.541344,19.94636,4.821225,21.144255,28.540929,40.015169,25.300814,43.114624,3.254666,1.437009,18.6073,14.902222,26.778492,-16.22283,4.102323,7.963753,112.023867,48.783366,97.715884,29.873147,175.183856
243,2021-12-25,35.96222,10.075112,2.170406,28.234669,14.581233,23.714441,84.869457,20.724183,9.093536,3.829993,23.70371,16.538765,3.549092,11.349694,16.771092,29.419079,20.282215,32.452819,3.743357,1.370398,15.573043,10.823315,19.745454,2.90829,3.244471,6.609372,79.021854,35.599626,71.669362,23.71554,121.181282
244,2022-01-15,46.415952,10.454914,3.011509,31.584482,25.482816,33.035859,121.360943,34.021587,12.655901,3.738318,29.377735,20.457783,4.930867,21.717919,28.823298,41.653333,28.594479,45.993329,4.953043,2.317619,21.706116,16.598382,29.33023,1.017146,4.334015,8.259799,117.555416,57.664209,106.717787,31.959651,165.902538
245,2022-01-22,45.620939,11.971648,2.882646,33.236798,24.905728,33.449945,122.054904,33.595003,13.060201,3.50311,29.664584,20.68285,5.325124,22.697467,32.748514,44.015356,31.568315,47.365704,4.778024,2.44765,21.621207,18.919591,29.192357,1.171828,4.346245,8.321156,120.787662,55.910389,106.895941,35.063398,181.508601
246,2022-01-29,47.325945,11.76056,3.256932,33.586666,23.607056,33.245551,122.734276,30.742864,11.592506,4.908818,29.500306,23.084698,5.235596,23.06776,31.230271,44.082276,29.345394,47.107397,4.223041,2.772634,20.961175,18.535186,27.894164,3.263564,4.527994,8.555313,116.592001,54.272872,105.712738,33.93791,183.407399


# Forecasting for next 8 steps in future

In [None]:
ffs1 = actual_df.iloc[-1:,0]+datetime.timedelta(days=7)
ffs2 = ffs1+datetime.timedelta(days=7)
ffs3 = ffs2+datetime.timedelta(days=7)
ffs4 = ffs3+datetime.timedelta(days=7)
ffs5 = ffs4+datetime.timedelta(days=7)
ffs6 = ffs5+datetime.timedelta(days=7)
ffs7 = ffs6+datetime.timedelta(days=7)
ffs8 = ffs7+datetime.timedelta(days=7)


future_weeks = pd.concat([ffs1, ffs2, ffs3, ffs4, ffs5, ffs6, ffs7, ffs8], axis=0)
future_weeks.index = range(247,255)
future_weeks

247    2022-02-05
248    2022-02-12
249    2022-02-19
250    2022-02-26
251    2022-03-05
252    2022-03-12
253    2022-03-19
254    2022-03-26
Name: week, dtype: object

In [None]:
#Predicted forecasting for testing set - 8 weeeks ahead
forecast1_df = pd.DataFrame(model_pred[0:1,0,:], index = list(range(247,248))) 
forecast2_df = pd.DataFrame(model_pred[:,1,:], index = list(range(248,255))) 

for i in range(len(test.columns)):
    forecast1_df = forecast1_df.rename(columns={i: test.columns[i]})
    forecast2_df = forecast2_df.rename(columns={i: test.columns[i]}) 

merged = pd.concat([forecast1_df, forecast2_df], axis=0) 
merged["week"] = future_weeks
merged["week"] = pd.to_datetime(merged['week']).dt.date
cols = merged.columns.tolist()
cols = cols[-1:] + cols[:-1]
merged= merged[cols]
merged

Unnamed: 0,week,D1,D10,D11,D12,D13,D14,D15,D16,D17,D18,D19,D2,D20,D21,D22,D23,D24,D25,D26,D27,D28,D29,D3,D30,D31,D4,D5,D6,D7,D8,D9
247,2022-02-05,48.473967,10.063262,2.876618,31.505789,24.719509,33.172666,116.861554,33.993489,9.105539,2.542439,30.36861,22.997094,5.016847,19.762643,32.326751,44.526534,28.093706,42.076132,5.412672,3.397001,18.348927,9.896066,26.075358,1.553109,4.108132,8.470254,117.594033,55.098399,103.02032,28.44703,200.657565
248,2022-02-12,50.655878,9.536477,3.197549,30.767556,25.30606,36.590161,123.049235,33.47968,11.009748,3.066801,29.483882,22.337473,6.100212,22.032594,32.723165,42.88558,27.384486,46.905944,4.178028,2.732696,20.411301,9.570385,29.082589,1.698266,4.001512,9.321065,115.504866,54.415916,104.934781,30.937577,206.594756
249,2022-02-19,50.196131,11.050931,3.147174,31.179761,24.291359,35.085014,120.415916,31.538687,12.898686,3.841996,28.50695,22.840693,4.880383,23.102519,33.601128,40.126748,27.097812,45.74188,5.163037,1.822531,20.311009,19.458212,28.578533,1.345605,3.987331,8.225544,114.391685,53.756628,110.425578,32.294918,205.354398
250,2022-02-26,46.415952,10.454914,3.011509,31.584482,25.482816,33.035859,121.360943,34.021587,12.655901,3.738318,29.377735,20.457783,4.930867,21.717919,28.823298,41.653333,28.594479,45.993329,4.953043,2.317619,21.706116,16.598382,29.33023,1.017146,4.334015,8.259799,117.555416,57.664209,106.717787,31.959651,165.902538
251,2022-03-05,45.620939,11.971648,2.882646,33.236798,24.905728,33.449945,122.054904,33.595003,13.060201,3.50311,29.664584,20.68285,5.325124,22.697467,32.748514,44.015356,31.568315,47.365704,4.778024,2.44765,21.621207,18.919591,29.192357,1.171828,4.346245,8.321156,120.787662,55.910389,106.895941,35.063398,181.508601
252,2022-03-12,47.325945,11.76056,3.256932,33.586666,23.607056,33.245551,122.734276,30.742864,11.592506,4.908818,29.500306,23.084698,5.235596,23.06776,31.230271,44.082276,29.345394,47.107397,4.223041,2.772634,20.961175,18.535186,27.894164,3.263564,4.527994,8.555313,116.592001,54.272872,105.712738,33.93791,183.407399
253,2022-03-19,34.753865,9.473647,1.833878,26.842917,13.09777,20.672176,80.186026,18.847651,7.838024,-0.426936,21.922061,14.585339,3.333501,10.252557,14.954053,27.272753,18.339932,30.166761,3.209181,0.608694,14.260161,9.085358,19.196708,-8.080885,3.062699,6.227731,77.065482,32.500092,66.614849,21.188325,117.22195
254,2022-03-26,40.585861,8.857529,1.893206,27.442853,20.454523,20.514333,98.324265,27.351623,9.472278,4.09542,26.263012,18.335262,4.254731,15.69714,25.981036,38.443696,19.742328,37.964168,3.557982,2.217622,17.036143,17.880019,18.320913,1.000526,3.314827,7.431816,90.190929,45.208206,75.69233,25.842718,134.556497


In [None]:
#Visualizations

In [None]:
# Setting x-axis for visualization of plots
x = list(range(0, 31))
x1 = list(range(max(x)+1, max(x)+32))
x2 = list(range(max(x1)+1, max(x1)+32))
x3 = list(range(max(x2)+1, max(x2)+32))
x4 = list(range(max(x3)+1, max(x3)+32))
x5 = list(range(max(x4)+1, max(x4)+32))
x6 = list(range(max(x5)+1, max(x5)+32))
x7 = list(range(max(x6)+1, max(x6)+32))
x8 = list(range(max(x7)+1, max(x7)+32))
x9 = list(range(max(x8)+1, max(x8)+32))

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

prev_val1 = first_daily_df.values[-2] # Week No 245
prev_val2 = first_daily_df.values[-1] # Week No 246

forc_val1 = model_pred[:,0,:][0] # Predicted # Week No 247
forc_val2 = model_pred[:,1,:][1] # Predicted # Week No 248
forc_val3 = model_pred[:,1,:][2] # Predicted # Week No 249
forc_val4 = model_pred[:,1,:][3] # Predicted # Week No 250

# Add traces
fig.add_trace(
    go.Scatter(x = x, y=prev_val1, name="Test - Week 245",line=dict(color="#0000ff")),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x= x1, y=prev_val2, name="Test - Week 246",line=dict(color="#0000ff")),
    secondary_y=True,
)

fig.add_trace(
    go.Scatter(x= x2, y=forc_val1, name="Forecast - Week 247",line=dict(color="darkgreen")),
    secondary_y=True,
)


fig.add_trace(
    go.Scatter(x= x3, y=forc_val2, name="Forecast - Week 248",line=dict(color="darkgreen")),
    secondary_y=True,
)

fig.add_trace(
    go.Scatter(x= x4, y=forc_val3, name="Forecast - Week 249",line=dict(color="darkgreen")),
    secondary_y=True,
)

fig.add_trace(
    go.Scatter(x= x5, y=forc_val4, name="Forecast - Week 250",line=dict(color="darkgreen")),
    secondary_y=True,
)
#can be forecasted upto 36 quarters!

# Add figure title
fig.update_layout(
    title_text="Visualization of previous 2 weeks vs next 4 weeks - forecasting"
)

# Set x-axis title
fig.update_xaxes(title_text="examples")

# Set y-axes titles
fig.update_yaxes(title_text="<b>actual features</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>predictions features</b>", secondary_y=True)

fig.show()

**Level 2 (middle):** 

Second Model: column R1 TO R6 (middle level in the hierarchy)

# Taking the next columns from R1-R6

In [None]:
first_daily_df=df.iloc[:,32:38]
difference=first_daily_df.diff()
difference[:-9]

Unnamed: 0,R1,R2,R3,R4,R5,R6
0,,,,,,
1,0.000,0.000,0.000,0.000,0.000,0.115
2,2.520,0.000,0.005,0.240,0.000,1.055
3,-1.785,0.000,0.200,0.760,3.230,-0.085
4,-1.600,0.000,-0.200,0.110,-0.860,-0.405
...,...,...,...,...,...,...
233,-4.630,6.235,10.945,1.300,2.745,-0.215
234,10.110,-1.030,14.655,-4.365,0.150,3.085
235,-11.645,-3.995,-19.580,-6.695,-3.110,-2.655
236,8.175,7.845,19.385,17.645,3.770,-1.320


In [None]:
#Features
n_past = 1
n_future = 8
no_of_features = 6 # now we have taken 6 features

train_dataF,test_dataF = difference[0:-9], difference[-9:]  

#Scaling the values for faster training of the models.
train = train_dataF
scalers={}

for i in train_dataF.columns:
    #MixMaxScler converts into [0 1]
    scaler = MinMaxScaler(feature_range=(0,1))
    s_s = scaler.fit_transform(train[i].values.reshape(-1,1))
    s_s=np.reshape(s_s,len(s_s))
    scalers['scaler_'+ i] = scaler
    train[i]=s_s

test = test_dataF
for i in train_dataF.columns:
    scaler = scalers['scaler_'+i]
    s_s = scaler.transform(test[i].values.reshape(-1,1))
    s_s=np.reshape(s_s,len(s_s))
    scalers['scaler_'+i] = scaler
    test[i]=s_s



X_train, y_train = split_series(train.values,n_past, 2)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1],no_of_features))
y_train = y_train.reshape((y_train.shape[0], y_train.shape[1], no_of_features))
where_are_NaNs = np.isnan(X_train)
X_train[where_are_NaNs] = 0

X_test, y_test = split_series(test.values,n_past, 2)
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1],no_of_features))
y_test = y_test.reshape((y_test.shape[0], y_test.shape[1], no_of_features))
#print(y_train,X_train)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [None]:
#Shape of our training and testing data without zero padding
print(X_train.shape)
print(X_test.shape)
#Here we see that we now have 28 columns in our training/testing input data.

(236, 1, 6)
(7, 1, 6)


In [None]:
#Padding zeros to overcome the dimension errors

#Beacuse we need to need have 31 columns to re-use the [model no 1] 

# So we pad 25 columns of zeros (z_train) to our data using concatenate function
z_train=np.zeros((238-2,n_past,25))
X_train = np.concatenate([X_train, z_train], -1)
print(X_train.shape)
print(y_train.shape)
z_test=np.zeros((9-2,n_past,25))
X_test = np.concatenate([X_test, z_test], -1)
print(X_test.shape)
print(y_test.shape)

#Splitting the data for testing wrt past and future
newdataX,newdatay=split_series(first_daily_df.values,n_past, 2)
newdatay = newdatay[-9:, :]
newdataX.shape

(236, 1, 31)
(236, 2, 6)
(7, 1, 31)
(7, 2, 6)


(245, 1, 6)

In [None]:
#Loading the previous saved model as we are performing the Hierarchical Time Series Forecasting
from keras.models import Model
from keras.models import load_model

#This was trained with first 31 columns, we load it and re-train it
model2=load_model('model(8w1).h5')

# we set the 5 layers of model trainable false to re-train them with next columns for Hierarchical forecasting

#trainable to False moves all the layer's weights from trainable to non-trainable. 
#This is called "freezing" the layer: the state of a frozen layer won't be updated 
#during training
#So it means we will use those pre-trained weights (transfer learning)
for layer in model2.layers[:5]:
    layer.trainable = False 

# Get the input
new_input = model2.input
# Find the layer to connect
hidden_layer = model2.layers[-1].output # we will connect the new layer after this
# Connect a new layer on it
new_output = Dense(no_of_features) (hidden_layer)
# Build a new model
new_model2 = Model(new_input, new_output)

#Compiling the model again
new_model2.compile(optimizer='adam', loss='mse')
new_model2.summary()

#Training the model again
fit_history2 = new_model2.fit(X_train, y_train, epochs=250, batch_size=50, 
                     validation_split=0.33, verbose=2, shuffle=False) 

#Saving the model now trainined with 31+6 columns
new_model2.save('model(8w2).h5')

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_input (InputLayer)     [(None, 1, 31)]           0         
                                                                 
 lstm (LSTM)                 (None, 1, 10)             1680      
                                                                 
 lstm_1 (LSTM)               (None, 7)                 504       
                                                                 
 repeat_vector (RepeatVector  (None, 2, 7)             0         
 )                                                               
                                                                 
 lstm_2 (LSTM)               (None, 2, 7)              420       
                                                                 
 lstm_3 (LSTM)               (None, 2, 10)             720       
                                                             

In [None]:
history = fit_history2
#plot the training and validation accuracy and loss at each epoch
loss = history.history['loss']
val_loss = history.history['val_loss']


# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(y=loss, name="Training Loss"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(y=val_loss, name="Validation Loss"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Training and Validation Loss"
)

# Set x-axis title
fig.update_xaxes(title_text="Epochs")

# Set y-axes titles
fig.update_yaxes(title_text="<b>Training Loss</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>Validation Loss</b>", secondary_y=True)

fig.show()

In [None]:
#Predictions
model_pred=new_model2.predict(X_test)
print(model_pred.shape)


#Inverse Scaling of the predicted values

for index,i in enumerate(train_dataF.columns):
    scaler = scalers['scaler_'+i]  
    model_pred[:,:,index]=scaler.inverse_transform(model_pred[:,:,index])

model_pred = [diff_inverse( model_pred[i],  newdatay[i] ) for i in range(len(model_pred))]
model_pred=np.array(model_pred)
print(model_pred.shape)

(7, 2, 6)
(7, 2, 6)


In [None]:
#Error Estimation

#Checking the metrics - RMSE and MAE

print('MAE_0',(MAE(newdatay[0,:], model_pred[0,:])).mean())
print('RMSE_0', (RMSE(newdatay[0,:], model_pred[0,:])).mean())

#print('RMSE', (dRMSE(newdatay, model_pred))[0,:].mean())

print('MAE_1', (MAE(newdatay[1,:], model_pred[1,:])).mean())
print('RMSE_1', (RMSE(newdatay[1,:], model_pred[1,:])).mean())

print('MAE_2', (MAE(newdatay[2,:], model_pred[2,:])).mean())

print('MAE_3', (MAE(newdatay[3,:], model_pred[3,:])).mean())

print('MAE_4', (MAE(newdatay[4,:], model_pred[4,:])).mean())

print('MAE_5', (MAE(newdatay[5,:], model_pred[5,:])).mean())

print()
print('Full MAE', (MAE(newdatay[:-2], model_pred)).mean())
print('Full RMSE', (RMSE(newdatay[:-2], model_pred)).mean())

MAE_0 0.6086954475939561
RMSE_0 5.564964546492629e-05
MAE_1 0.6962588240190478
RMSE_1 7.119092391947046e-05
MAE_2 0.8479946270450478
MAE_3 0.6547764221894198
MAE_4 0.6956229943285942
MAE_5 2.5990900543705413

Full MAE 0.9660576213984869
Full RMSE 0.00026195655383002517


In [None]:
#Lets see the first 238 weeks data - years which we've trained our model for.
first_daily_df[:-9]


Unnamed: 0,R1,R2,R3,R4,R5,R6
0,1.485,0.050,0.530,0.120,0.915,0.160
1,1.485,0.050,0.530,0.120,0.915,0.275
2,4.005,0.050,0.535,0.360,0.915,1.330
3,2.220,0.050,0.735,1.120,4.145,1.245
4,0.620,0.050,0.535,1.230,3.285,0.840
...,...,...,...,...,...,...
233,154.070,112.170,478.165,224.790,73.910,48.255
234,164.180,111.140,492.820,220.425,74.060,51.340
235,152.535,107.145,473.240,213.730,70.950,48.685
236,160.710,114.990,492.625,231.375,74.720,47.365


In [None]:
#Actual values for last 8 weeks - testing set
actual_df = pd.DataFrame(newdatay[:,1,:], index = list(range(238,247)))
for i in range(len(test.columns)):
    actual_df = actual_df.rename(columns={i: test.columns[i]}) 

actual_df["week"] = df.iloc[238:,0]
actual_df["week"] = pd.to_datetime(actual_df['week']).dt.date
cols = actual_df.columns.tolist()
cols = cols[-1:] + cols[:-1]
actual_df= actual_df[cols]
actual_df


Unnamed: 0,week,R1,R2,R3,R4,R5,R6
238,2021-11-20,162.555,117.09,526.255,221.395,71.485,50.54
239,2021-11-27,164.23,113.8,527.565,226.31,71.375,50.08
240,2021-12-04,169.405,112.52,481.84,230.735,69.515,46.3
241,2021-12-11,172.085,119.235,500.885,238.895,71.415,45.505
242,2021-12-18,165.375,116.66,504.8,233.72,71.0,47.21
243,2021-12-25,113.57,76.485,337.395,161.275,55.565,35.845
244,2022-01-15,129.905,91.87,380.245,192.685,51.47,40.47
245,2022-01-22,160.965,105.09,445.63,224.78,68.22,45.205
246,2022-01-29,165.48,110.075,488.9,235.665,69.07,45.75


In [None]:
#Predicted values for last 8 weeks
forecast1_df = pd.DataFrame(model_pred[1:,0,:], index = list(range(238,244))) 
forecast2_df = pd.DataFrame(model_pred[-5:-2,1,:], index = list(range(244,247))) 

for i in range(len(test.columns)):
    forecast1_df = forecast1_df.rename(columns={i: test.columns[i]})
    forecast2_df = forecast2_df.rename(columns={i: test.columns[i]}) 

merged = pd.concat([forecast1_df, forecast2_df], axis=0) 

merged["week"] = df.iloc[238:,0]
merged["week"] = pd.to_datetime(merged['week']).dt.date
cols = merged.columns.tolist()
cols = cols[-1:] + cols[:-1]
merged= merged[cols]
merged

Unnamed: 0,week,R1,R2,R3,R4,R5,R6
238,2021-11-20,162.211076,114.860626,524.135003,220.917153,70.886358,50.403014
239,2021-11-27,163.596814,110.88123,524.611482,225.414808,70.541678,49.836965
240,2021-12-04,169.159641,110.525716,480.006152,230.400214,68.996918,46.199702
241,2021-12-11,171.741751,117.001902,498.762474,238.417882,70.815923,45.3678
242,2021-12-18,161.334844,104.686016,491.399515,228.215331,67.288626,45.692822
243,2021-12-25,113.679185,75.309749,336.569962,161.463398,55.333493,35.876305
244,2022-01-15,170.492669,114.143307,484.293279,232.128812,70.022687,46.619034
245,2022-01-22,173.177358,120.76037,503.268805,240.392863,71.927297,45.831799
246,2022-01-29,166.47048,118.211678,507.202596,235.195132,71.512959,47.535381


In [None]:
ffs1 = actual_df.iloc[-1:,0]+datetime.timedelta(days=7)
ffs2 = ffs1+datetime.timedelta(days=7)
ffs3 = ffs2+datetime.timedelta(days=7)
ffs4 = ffs3+datetime.timedelta(days=7)
ffs5 = ffs4+datetime.timedelta(days=7)
ffs6 = ffs5+datetime.timedelta(days=7)
ffs7 = ffs6+datetime.timedelta(days=7)
ffs8 = ffs7+datetime.timedelta(days=7)


future_weeks = pd.concat([ffs1, ffs2, ffs3, ffs4, ffs5, ffs6, ffs7, ffs8], axis=0)
future_weeks.index = range(247,255)
future_weeks


247    2022-02-05
248    2022-02-12
249    2022-02-19
250    2022-02-26
251    2022-03-05
252    2022-03-12
253    2022-03-19
254    2022-03-26
Name: week, dtype: object

In [None]:
#Predicted forecasting for testing set - 8 weeeks ahead
forecast1_df = pd.DataFrame(model_pred[0:1,0,:], index = list(range(247,248))) 
forecast2_df = pd.DataFrame(model_pred[:,1,:], index = list(range(248,255))) 

for i in range(len(test.columns)):
    forecast1_df = forecast1_df.rename(columns={i: test.columns[i]})
    forecast2_df = forecast2_df.rename(columns={i: test.columns[i]}) 

merged = pd.concat([forecast1_df, forecast2_df], axis=0) 
merged["week"] = future_weeks
merged["week"] = pd.to_datetime(merged['week']).dt.date
cols = merged.columns.tolist()
cols = cols[-1:] + cols[:-1]
merged= merged[cols]
merged

Unnamed: 0,week,R1,R2,R3,R4,R5,R6
247,2022-02-05,156.242918,112.93991,503.314415,224.213172,69.563584,48.299805
248,2022-02-12,163.643912,118.572845,528.603025,222.911786,71.994509,50.866496
249,2022-02-19,165.325016,115.356917,529.972544,227.78514,71.888289,50.405618
250,2022-02-26,170.492669,114.143307,484.293279,232.128812,70.022687,46.619034
251,2022-03-05,173.177358,120.76037,503.268805,240.392863,71.927297,45.831799
252,2022-03-12,166.47048,118.211678,507.202596,235.195132,71.512959,47.535381
253,2022-03-19,113.192398,74.947829,336.324555,160.410486,54.904759,35.612928
254,2022-03-26,130.967993,93.204513,382.465595,194.245014,51.961865,40.794326


In [None]:
# Setting x-axis for visualization of plots
x = list(range(0, 6))
x1 = list(range(max(x)+1, max(x)+7))
x2 = list(range(max(x1)+1, max(x1)+7))
x3 = list(range(max(x2)+1, max(x2)+7))
x4 = list(range(max(x3)+1, max(x3)+7))
x5 = list(range(max(x4)+1, max(x4)+7))
x6 = list(range(max(x5)+1, max(x5)+7))
x7 = list(range(max(x6)+1, max(x6)+7))
x8 = list(range(max(x7)+1, max(x7)+7))
x9 = list(range(max(x8)+1, max(x8)+7))

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

prev_val1 = first_daily_df.values[-2] # Week No 245
prev_val2 = first_daily_df.values[-1] # Week No 246

forc_val1 = model_pred[:,0,:][0] # Predicted # Week No 247
forc_val2 = model_pred[:,1,:][1] # Predicted # Week No 248
forc_val3 = model_pred[:,1,:][2] # Predicted # Week No 249
forc_val4 = model_pred[:,1,:][3] # Predicted # Week No 250

# Add traces
fig.add_trace(
    go.Scatter(x = x, y=prev_val1, name="Test - Week 245",line=dict(color="#0000ff")),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x= x1, y=prev_val2, name="Test - Week 246",line=dict(color="#0000ff")),
    secondary_y=True,
)

fig.add_trace(
    go.Scatter(x= x2, y=forc_val1, name="Forecast - Week 247",line=dict(color="darkgreen")),
    secondary_y=True,
)


fig.add_trace(
    go.Scatter(x= x3, y=forc_val2, name="Forecast - Week 248",line=dict(color="darkgreen")),
    secondary_y=True,
)

fig.add_trace(
    go.Scatter(x= x4, y=forc_val3, name="Forecast - Week 249",line=dict(color="darkgreen")),
    secondary_y=True,
)

fig.add_trace(
    go.Scatter(x= x5, y=forc_val4, name="Forecast - Week 250",line=dict(color="darkgreen")),
    secondary_y=True,
)
#can be forecasted upto 36 quarters!

# Add figure title
fig.update_layout(
    title_text="Visualization of previous 2 weeks vs next 4 weeks - forecasting"
)

# Set x-axis title
fig.update_xaxes(title_text="examples")

# Set y-axes titles
fig.update_yaxes(title_text="<b>actual features</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>predictions features</b>", secondary_y=True)

fig.show()

Third Model: column "Total" (Top level in the hierarchy)

#Final Data Split (taking last feature)

In [None]:
#last column
first_daily_df=df.iloc[:,38:]
difference=first_daily_df.diff()
first_daily_df[:-9]

Unnamed: 0,total
0,0.160
1,2.410
2,7.145
3,9.465
4,6.510
...,...
233,1091.360
234,1114.075
235,1066.285
236,1121.815


In [None]:
n_past = 1
n_future = 8 # we will now predict for 8 future values
no_of_features = 1

#Split into the train and test data
train_dataF,test_dataF = difference[0:-9], difference[-9:] 

In [None]:
train_dataF.columns

Index(['total'], dtype='object')

In [None]:
#Scaling the values for faster training of the models
train = train_dataF
scalers={}
for i in train_dataF.columns:
    scaler = MinMaxScaler(feature_range=(0,1))
    s_s = scaler.fit_transform(train[i].values.reshape(-1,1))
    s_s=np.reshape(s_s,len(s_s))
    scalers['scaler_'+ i] = scaler
    train[i]=s_s
test = test_dataF

for i in train_dataF.columns:
    scaler = scalers['scaler_'+i]
    s_s = scaler.transform(test[i].values.reshape(-1,1))
    s_s=np.reshape(s_s,len(s_s))
    scalers['scaler_'+i] = scaler
    test[i]=s_s




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
X_train, y_train = split_series(train.values,n_past, 2)

In [None]:
X_train, y_train = split_series(train.values,n_past, 2)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1],no_of_features))
y_train = y_train.reshape((y_train.shape[0], y_train.shape[1], no_of_features))

where_are_NaNs = np.isnan(X_train)
X_train[where_are_NaNs] = 0
X_test, y_test = split_series(test.values,n_past, 2)
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1],no_of_features))
y_test = y_test.reshape((y_test.shape[0], y_test.shape[1], no_of_features))
#print(y_train,X_train)

In [None]:
print(X_train.shape)
#padding 30 zeros to match 31
z_train=np.zeros((238-2,n_past,30))
X_train = np.concatenate([X_train, z_train], -1)
print(X_train.shape)
print(y_train.shape)

(236, 1, 1)
(236, 1, 31)
(236, 2, 1)


In [None]:
z_test=np.zeros((9-2,n_past,30))
X_test = np.concatenate([X_test, z_test], -1)
print(X_test.shape)
print(y_test.shape)
print(X_train.shape)
newdataX,newdatay=split_series(first_daily_df.values,n_past, 2)
newdatay = newdatay[-9:, :]

(7, 1, 31)
(7, 2, 1)
(236, 1, 31)


In [None]:
model3=load_model('model(8w2).h5') 
for layer in model3.layers[:7]:
    layer.trainable = False

# Get input
new_input = model3.input
# Find the layer to connect
hidden_layer = model3.layers[-2].output
# Connect a new layer on it
new_output = Dense(no_of_features) (hidden_layer)
# Build a new model
new_model3 = Model(new_input, new_output)
new_model3.compile(optimizer='adam', loss='mse')
new_model3.summary()

fit_history3 = new_model3.fit(X_train, y_train, epochs=300, batch_size=60, 
                     validation_split=0.33, verbose=2, shuffle=False)   #700

#Predictions on test set
model_pred=new_model3.predict(X_test)
print(model_pred.shape)


"""Inverse Scaling of the predicted values"""
for index,i in enumerate(train_dataF.columns):
    scaler = scalers['scaler_'+i]  
    model_pred[:,:,index]=scaler.inverse_transform(model_pred[:,:,index])


model_pred = [diff_inverse( model_pred[i],  newdatay[i] ) for i in range(len(model_pred))]
model_pred=np.array(model_pred)
#print(model_pred)
print(model_pred.shape)

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_input (InputLayer)     [(None, 1, 31)]           0         
                                                                 
 lstm (LSTM)                 (None, 1, 10)             1680      
                                                                 
 lstm_1 (LSTM)               (None, 7)                 504       
                                                                 
 repeat_vector (RepeatVector  (None, 2, 7)             0         
 )                                                               
                                                                 
 lstm_2 (LSTM)               (None, 2, 7)              420       
                                                                 
 lstm_3 (LSTM)               (None, 2, 10)             720       
                                                           

In [None]:
history = fit_history3
#plot the training and validation accuracy and loss at each epoch
loss = history.history['loss']
val_loss = history.history['val_loss']


# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(y=loss, name="Training Loss"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(y=val_loss, name="Validation Loss"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Training and Validation Loss"
)

# Set x-axis title
fig.update_xaxes(title_text="Epochs")

# Set y-axes titles
fig.update_yaxes(title_text="<b>Training Loss</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>Validation Loss</b>", secondary_y=True)

fig.show()

In [None]:
model_pred.shape

(7, 2, 1)

In [None]:
newdatay[:-2].shape

(7, 2, 1)

In [None]:
def difference(dataset, interval=1):
	diff = list()
	for i in range(interval, len(dataset)):
		value = dataset[i] - dataset[i - interval]
		diff.append(value)
	return pd.Series(diff)
 

dy_true = difference(newdatay[:-2])
dy_pred = difference(model_pred)

#Error Estimation
print('MAE', (MAE(newdatay[:-2], model_pred))[0,:].mean())
print('RMSE', (RMSE(newdatay[:-2], model_pred))[0,:].mean())
print('dRMSE', (dRMSE(dy_true, dy_pred))[0,:])
print('MAE', (MAE(newdatay[:-2], model_pred))[1,:].mean())
print('RMSE', (RMSE(newdatay[:-2], model_pred))[1,:].mean())
print('dRMSE', (dRMSE(dy_true, dy_pred))[1,:])

print()
print('MAE', (MAE(newdatay[:-2], model_pred)).mean())
print('RMSE', (RMSE(newdatay[:-2], model_pred)).mean())
print('dRMSE', (dRMSE(dy_true, dy_pred)).mean())


MAE 1.4176770811267811
RMSE 0.00021491511259441216
dRMSE [0.11631994]
MAE 1.3676842777769527
RMSE 0.00019185940646392245
dRMSE [0.02658267]

MAE 1.392680679451867
RMSE 0.0002033872595291673
dRMSE 0.07145130237003106


In [None]:
#Lets see the first 238 weeks data - years which we've trained our model for.
first_daily_df[:-9]

Unnamed: 0,total
0,0.160
1,2.410
2,7.145
3,9.465
4,6.510
...,...
233,1091.360
234,1114.075
235,1066.285
236,1121.815


In [None]:
#Actual values for last 8 weeks - testing set
actual_df = pd.DataFrame(newdatay[:,1,:], index = list(range(238,247)))
for i in range(len(test.columns)):
    actual_df = actual_df.rename(columns={i: test.columns[i]}) 

actual_df["week"] = df.iloc[238:,0]
actual_df["week"] = pd.to_datetime(actual_df['week']).dt.date
cols = actual_df.columns.tolist()
cols = cols[-1:] + cols[:-1]
actual_df= actual_df[cols]
actual_df

Unnamed: 0,week,total
238,2021-11-20,1150.32
239,2021-11-27,1154.96
240,2021-12-04,1111.825
241,2021-12-11,1149.715
242,2021-12-18,1141.5
243,2021-12-25,783.43
244,2022-01-15,889.465
245,2022-01-22,1052.755
246,2022-01-29,1118.48


In [None]:
#Predicted values for last 8 weeks
forecast1_df = pd.DataFrame(model_pred[1:,0,:], index = list(range(238,244))) 
forecast2_df = pd.DataFrame(model_pred[-5:-2,1,:], index = list(range(244,247))) 

for i in range(len(test.columns)):
    forecast1_df = forecast1_df.rename(columns={i: test.columns[i]})
    forecast2_df = forecast2_df.rename(columns={i: test.columns[i]}) 

merged = pd.concat([forecast1_df, forecast2_df], axis=0) 

merged["week"] = df.iloc[238:,0]
merged["week"] = pd.to_datetime(merged['week']).dt.date
cols = merged.columns.tolist()
cols = cols[-1:] + cols[:-1]
merged= merged[cols]
merged

Unnamed: 0,week,total
238,2021-11-20,1136.226263
239,2021-11-27,1139.370443
240,2021-12-04,1098.743758
241,2021-12-11,1135.223434
242,2021-12-18,1115.123682
243,2021-12-25,772.347967
244,2022-01-15,1126.252382
245,2022-01-22,1165.347606
246,2022-01-29,1156.462649


In [None]:
ffs1 = actual_df.iloc[-1:,0]+datetime.timedelta(days=7)
ffs2 = ffs1+datetime.timedelta(days=7)
ffs3 = ffs2+datetime.timedelta(days=7)
ffs4 = ffs3+datetime.timedelta(days=7)
ffs5 = ffs4+datetime.timedelta(days=7)
ffs6 = ffs5+datetime.timedelta(days=7)
ffs7 = ffs6+datetime.timedelta(days=7)
ffs8 = ffs7+datetime.timedelta(days=7)


future_weeks = pd.concat([ffs1, ffs2, ffs3, ffs4, ffs5, ffs6, ffs7, ffs8], axis=0)
future_weeks.index = range(247,255)
future_weeks


247    2022-02-05
248    2022-02-12
249    2022-02-19
250    2022-02-26
251    2022-03-05
252    2022-03-12
253    2022-03-19
254    2022-03-26
Name: week, dtype: object

In [None]:
#Predicted forecasting for testing set - 8 weeeks ahead
forecast1_df = pd.DataFrame(model_pred[0:1,0,:], index = list(range(247,248))) 
forecast2_df = pd.DataFrame(model_pred[:,1,:], index = list(range(248,255))) 

for i in range(len(test.columns)):
    forecast1_df = forecast1_df.rename(columns={i: test.columns[i]})
    forecast2_df = forecast2_df.rename(columns={i: test.columns[i]}) 

merged = pd.concat([forecast1_df, forecast2_df], axis=0) 
merged["week"] = future_weeks
merged["week"] = pd.to_datetime(merged['week']).dt.date
cols = merged.columns.tolist()
cols = cols[-1:] + cols[:-1]
merged= merged[cols]
merged

Unnamed: 0,week,total
247,2022-02-05,1105.599357
248,2022-02-12,1165.861431
249,2022-02-19,1170.113584
250,2022-02-26,1126.252382
251,2022-03-05,1165.347606
252,2022-03-12,1156.462649
253,2022-03-19,791.90624
254,2022-03-26,906.014744


In [None]:
first_daily_df.values.flatten()[:-9].shape

(238,)

In [None]:
# Setting x-axis for visualization of plots
x = list(range(237, 237+len(model_pred.flatten())))

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

prev_values = first_daily_df.values.flatten() # previous values - used of training

forc_values = model_pred.flatten()

# Add traces
fig.add_trace(
    go.Scatter(y=prev_values, name="Prev Trained Values",line=dict(color="#0000ff")),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x= x, y=forc_values, name="Forecast",line=dict(color="darkgreen")),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Visualization of forecasting"
)

# Set x-axis title
fig.update_xaxes(title_text="examples")

# Set y-axes titles
fig.update_yaxes(title_text="<b>actual features</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>predictions features</b>", secondary_y=True)

fig.show()