In [1]:
import numpy as np
import pandas as pd
import re
import jdatetime
import datetime
import calendar

In [2]:
sale_df = pd.read_excel('P2-SalesData.xlsx')
sale_df.head()

Unnamed: 0,Date,Sales
0,14010115,1395.898445
1,14010114,1654.535199
2,14010113,767.028903
3,14010112,1323.005591
4,14010111,1294.524908


In [3]:
sale_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1111 entries, 0 to 1110
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    1111 non-null   int64  
 1   Sales   1111 non-null   float64
dtypes: float64(1), int64(1)
memory usage: 17.5 KB


In [4]:
def to_gregorian(datetime):
    datetime = int(datetime)
    return jdatetime.date(datetime//10000, datetime//100%100, datetime%100).togregorian()

In [5]:
sale_df['Date'] = sale_df['Date'].apply(to_gregorian)

In [6]:
sale_df['Date'] = pd.to_datetime(sale_df['Date'])
sale_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1111 entries, 0 to 1110
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    1111 non-null   datetime64[ns]
 1   Sales   1111 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 17.5 KB


In [7]:
def week_of_month(tgtdate):
    days_this_month = calendar.mdays[tgtdate.month]
    for i in range(1, days_this_month):
        d = datetime.datetime(tgtdate.year, tgtdate.month, i)
        if d.day - d.weekday() > 0:
            startdate = d
            break
    # now we canuse the modulo 7 appraoch
    return (tgtdate - startdate).days //7 + 1

In [8]:
# Create Cols
sale_df['Month Day'] = sale_df['Date'].dt.day
sale_df['Week Day'] = sale_df['Date'].dt.weekday
sale_df['Month'] = sale_df['Date'].dt.month
sale_df['Month Week'] = sale_df['Date'].apply(week_of_month)
sale_df['Year Week'] = sale_df['Date'].dt.weekofyear
sale_df['Year Day'] = sale_df['Date'].dt.dayofyear

  sale_df['Year Week'] = sale_df['Date'].dt.weekofyear


In [9]:
sale_df

Unnamed: 0,Date,Sales,Month Day,Week Day,Month,Month Week,Year Week,Year Day
0,2022-04-04,1395.898445,4,0,4,1,14,94
1,2022-04-03,1654.535199,3,6,4,0,13,93
2,2022-04-02,767.028903,2,5,4,0,13,92
3,2022-04-01,1323.005591,1,4,4,0,13,91
4,2022-03-31,1294.524908,31,3,3,4,13,90
...,...,...,...,...,...,...,...,...
1106,2019-03-25,1314.944083,25,0,3,4,13,84
1107,2019-03-24,1278.500591,24,6,3,3,12,83
1108,2019-03-23,1316.051583,23,5,3,3,12,82
1109,2019-03-22,1281.464259,22,4,3,3,12,81


In [79]:
import keras
from keras.layers import Dense
from keras.layers import Dropout
from keras.models import Sequential
from keras.optimizers import Adam 
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from keras.layers import LSTM
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error
from keras import backend as K


In [11]:
train_set = sale_df.drop(axis=1, labels='Date')
train_set

Unnamed: 0,Sales,Month Day,Week Day,Month,Month Week,Year Week,Year Day
0,1395.898445,4,0,4,1,14,94
1,1654.535199,3,6,4,0,13,93
2,767.028903,2,5,4,0,13,92
3,1323.005591,1,4,4,0,13,91
4,1294.524908,31,3,3,4,13,90
...,...,...,...,...,...,...,...
1106,1314.944083,25,0,3,4,13,84
1107,1278.500591,24,6,3,3,12,83
1108,1316.051583,23,5,3,3,12,82
1109,1281.464259,22,4,3,3,12,81


In [48]:
from sklearn.preprocessing import MinMaxScaler
scaler_x = MinMaxScaler(feature_range=(-1, 1))
scaler_y = MinMaxScaler(feature_range=(-1, 1))

In [50]:
scaled_train_set_x = scaler_x.fit_transform(train_set.drop(axis=1, labels='Sales'))
scaled_train_set_y = scaler_y.fit_transform(train_set[['Sales']])

In [52]:
scaled_train_set_x.shape

(1111, 6)

In [53]:
scaled_train_set_y.shape

(1111, 1)

In [54]:
train_x = []
train_y = []

n_past = 50

for i in range(n_past, len(scaled_train_set_x)):
    train_x.append(scaled_train_set_x[i - n_past:i])
    train_y.append(scaled_train_set_y[i][0])

train_x , train_y = np.array(train_x) , np.array(train_y)

TRAIN: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212] TEST: [213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
 231 232 233 234 235 236 237 238 239 240 241 242 

In [82]:
def root_mean_squared_error(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) 

def training_model(train_x, train_y, test_x, test_y):
    model = Sequential()
    model.add(LSTM(64, input_shape=(train_x.shape[1], train_x.shape[2]), activation='tanh', return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(32, activation='tanh', return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(1))


    model.compile(loss = root_mean_squared_error, optimizer='adam')
    model.summary()

    model.fit(train_x, train_y, epochs=100, batch_size=32)

    prediction = model.predict(X_test)

    return np.sqrt(mean_squared_error(y_test, prediction))

In [83]:
tscv = TimeSeriesSplit(n_splits = 4)
rmse = []
for train_index, test_index in tscv.split(train_x):
    X_train, X_test = train_x[train_index], train_x[test_index]
    y_train, y_test = train_y[train_index], train_y[test_index]

    error = training_model(X_train, y_train, X_test, y_test)

    rmse.append(error)

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_26 (LSTM)              (None, 50, 64)            18176     
                                                                 
 dropout_22 (Dropout)        (None, 50, 64)            0         
                                                                 
 lstm_27 (LSTM)              (None, 32)                12416     
                                                                 
 dropout_23 (Dropout)        (None, 32)                0         
                                                                 
 dense_12 (Dense)            (None, 1)                 33        
                                                                 
Total params: 30,625
Trainable params: 30,625
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
E

Forecasting

In [84]:
rmse

[0.15736036700909625,
 0.23331419397596223,
 0.31800076628586854,
 0.18481381409309072]

In [43]:
# Create dataset
prediction_df = pd.DataFrame({
    'Date': pd.date_range(end='2022-05-04', periods=30)})
prediction_df

Unnamed: 0,Date
0,2022-04-05
1,2022-04-06
2,2022-04-07
3,2022-04-08
4,2022-04-09
5,2022-04-10
6,2022-04-11
7,2022-04-12
8,2022-04-13
9,2022-04-14


In [44]:
# Create Cols
prediction_df['Month Day'] = prediction_df['Date'].dt.day
prediction_df['Week Day'] = prediction_df['Date'].dt.weekday
prediction_df['Month'] = prediction_df['Date'].dt.month
prediction_df['Month Week'] = prediction_df['Date'].apply(week_of_month)
prediction_df['Year Week'] = prediction_df['Date'].dt.weekofyear
prediction_df['Year Day'] = prediction_df['Date'].dt.dayofyear

  prediction_df['Year Week'] = prediction_df['Date'].dt.weekofyear


In [45]:
prediction_df

Unnamed: 0,Date,Month Day,Week Day,Month,Month Week,Year Week,Year Day
0,2022-04-05,5,1,4,1,14,95
1,2022-04-06,6,2,4,1,14,96
2,2022-04-07,7,3,4,1,14,97
3,2022-04-08,8,4,4,1,14,98
4,2022-04-09,9,5,4,1,14,99
5,2022-04-10,10,6,4,1,14,100
6,2022-04-11,11,0,4,2,15,101
7,2022-04-12,12,1,4,2,15,102
8,2022-04-13,13,2,4,2,15,103
9,2022-04-14,14,3,4,2,15,104
