In [1]:
import matplotlib.pyplot as plt
import matplotlib.ticker as plticker
import seaborn as sns
import numpy as np
import pandas as pd
import torch
import gc
from datetime import date, timedelta
from torch import nn,optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
%matplotlib inline

In [2]:
plt.style.use('seaborn-bright')

In [3]:
# If a gpu is available, set gpu else cpu
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Import the Data

In [None]:
df = pd.read_csv('final.csv',parse_dates=['date'],usecols=[0,1,3,4,5,12])
df.head()

In [None]:
df.shape

In [6]:
df = df[df['date'] > '2017-01-01']
df.sort_values(by=['date','store_nbr','family'],inplace=True)
df.reset_index(drop = True,inplace = True)
print(df.shape)
df.head()

(23806568, 6)


Unnamed: 0,date,store_nbr,unit_sales,onpromotion,family,Oil Price
0,2017-01-02,1,3.0,False,AUTOMOTIVE,52.7075
1,2017-01-02,1,1.0,False,AUTOMOTIVE,52.7075
2,2017-01-02,1,1.0,False,AUTOMOTIVE,52.7075
3,2017-01-02,1,1.0,False,BEVERAGES,52.7075
4,2017-01-02,1,7.0,False,BEVERAGES,52.7075


## Data Transformation and Visualization

In [6]:
def create_dummies(data,col_list):
    
    for col_name in col_list:
        
        drop_first = False
        if len(data[col_name].value_counts().index) == 2:
            drop_first = True
            
        tempdf = pd.get_dummies(data[col_name],drop_first = drop_first)
        
        if not drop_first:
            tempdf.columns = [col_name +'_' + str(col) for col in tempdf.columns]
        else:
            tempdf.columns = [col_name]
            
        data = data.drop(col_name,axis = 1).join(tempdf)
        
    return data

In [7]:
# The below function creates lag observation(t-n)
def transformer(col,cnt,df):
    lst = []
    col_values= []
    length = len(df)
    temp_df = df.drop(list(range(cnt + 1)))
    #col = list(df_final.columns)
    #col.remove('date')
    
    for val in col:
        tmp = cnt
        for i in range(length-1,length-tmp-1,-1):
            lst.append(df[val].array[tmp:i])
            temp_df[f'{val} (t-{length - i})'] = df[val].array[tmp:i]
            tmp -= 1

    return temp_df.reset_index(drop = True)

In [11]:
df = create_dummies(df,['onpromotion'])
df['store_family_id'] = df['store_nbr'].astype(str) + '_' + df['family'].astype(str)
row_indices = df[df['store_family_id'].isin(list(df['store_family_id'].value_counts()[df['store_family_id'].value_counts() < 15].keys()))].index
df.drop(row_indices, inplace = True)
df.reset_index(drop=True,inplace=True)
df.head()

Unnamed: 0,date,store_nbr,unit_sales,family,Oil Price,onpromotion,store_family_id
0,2017-01-02,1,3.0,AUTOMOTIVE,52.7075,0,1_AUTOMOTIVE
1,2017-01-02,1,1.0,AUTOMOTIVE,52.7075,0,1_AUTOMOTIVE
2,2017-01-02,1,1.0,AUTOMOTIVE,52.7075,0,1_AUTOMOTIVE
3,2017-01-02,1,1.0,BEVERAGES,52.7075,0,1_BEVERAGES
4,2017-01-02,1,7.0,BEVERAGES,52.7075,0,1_BEVERAGES


In [12]:
#cols = list(set(df.columns) - set(['date','unit_sales']))
#df['input_sequence'] = df[cols].apply(lambda x: np.array(x), axis=1)
#df.drop(cols,axis=1,inplace=True)
#print(df.shape)
#df.head()

In [13]:
#df = pd.DataFrame(df.groupby('date').agg(lambda x: list(x)))
#print(df.shape)
#df.head()

In [9]:
def create_lags(num_cols=1):
    df_comb = pd.DataFrame(columns = df.columns)
    i=0
    for store_fam in df['store_family_id'].unique():
        df_final = df[(df['store_nbr'] == int(store_fam.split('_')[0])) & (df['family'] == store_fam.split('_')[1])]
        df_final.reset_index(inplace= True,drop = True)
        df_final = transformer(list(set(df_final.columns) - set(['date','onpromotion','store_nbr','store_family_id','family'])),num_cols,df_final)
        df_comb = df_comb.append(df_final, ignore_index = True)
        i += 1
        print(f'{store_fam} Complete. {i}/1694')
        
    return df_comb.drop('store_family_id',axis=1)

In [15]:
df = create_lags(12)
print(df.shape)
df.head()

1_AUTOMOTIVE Complete. 1/1694
1_BEVERAGES Complete. 2/1694
1_BREAD/BAKERY Complete. 3/1694
1_CLEANING Complete. 4/1694
1_DAIRY Complete. 5/1694
1_DELI Complete. 6/1694
1_EGGS Complete. 7/1694
1_FROZEN FOODS Complete. 8/1694
1_GROCERY I Complete. 9/1694
1_GROCERY II Complete. 10/1694
1_HOME AND KITCHEN I Complete. 11/1694
1_HOME AND KITCHEN II Complete. 12/1694
1_HOME CARE Complete. 13/1694
1_LADIESWEAR Complete. 14/1694
1_LAWN AND GARDEN Complete. 15/1694
1_LINGERIE Complete. 16/1694
1_LIQUOR,WINE,BEER Complete. 17/1694
1_MAGAZINES Complete. 18/1694
1_MEATS Complete. 19/1694
1_PERSONAL CARE Complete. 20/1694
1_PET SUPPLIES Complete. 21/1694
1_PLAYERS AND ELECTRONICS Complete. 22/1694
1_POULTRY Complete. 23/1694
1_PREPARED FOODS Complete. 24/1694
1_PRODUCE Complete. 25/1694
1_SEAFOOD Complete. 26/1694
2_AUTOMOTIVE Complete. 27/1694
2_BABY CARE Complete. 28/1694
2_BEAUTY Complete. 29/1694
2_BEVERAGES Complete. 30/1694
2_BREAD/BAKERY Complete. 31/1694
2_CELEBRATION Complete. 32/1694
2_CLE

9_HOME AND KITCHEN I Complete. 256/1694
9_HOME AND KITCHEN II Complete. 257/1694
9_HOME CARE Complete. 258/1694
9_LAWN AND GARDEN Complete. 259/1694
9_LINGERIE Complete. 260/1694
9_LIQUOR,WINE,BEER Complete. 261/1694
9_MAGAZINES Complete. 262/1694
9_MEATS Complete. 263/1694
9_PERSONAL CARE Complete. 264/1694
9_PET SUPPLIES Complete. 265/1694
9_PLAYERS AND ELECTRONICS Complete. 266/1694
9_POULTRY Complete. 267/1694
9_PREPARED FOODS Complete. 268/1694
9_PRODUCE Complete. 269/1694
9_SCHOOL AND OFFICE SUPPLIES Complete. 270/1694
9_SEAFOOD Complete. 271/1694
10_AUTOMOTIVE Complete. 272/1694
10_BEAUTY Complete. 273/1694
10_BEVERAGES Complete. 274/1694
10_BREAD/BAKERY Complete. 275/1694
10_CELEBRATION Complete. 276/1694
10_CLEANING Complete. 277/1694
10_DAIRY Complete. 278/1694
10_DELI Complete. 279/1694
10_EGGS Complete. 280/1694
10_FROZEN FOODS Complete. 281/1694
10_GROCERY I Complete. 282/1694
10_GROCERY II Complete. 283/1694
10_HOME AND KITCHEN I Complete. 284/1694
10_HOME AND KITCHEN II 

18_HOME AND KITCHEN I Complete. 502/1694
18_HOME AND KITCHEN II Complete. 503/1694
18_HOME CARE Complete. 504/1694
18_LADIESWEAR Complete. 505/1694
18_LAWN AND GARDEN Complete. 506/1694
18_LINGERIE Complete. 507/1694
18_LIQUOR,WINE,BEER Complete. 508/1694
18_MAGAZINES Complete. 509/1694
18_MEATS Complete. 510/1694
18_PERSONAL CARE Complete. 511/1694
18_PET SUPPLIES Complete. 512/1694
18_PLAYERS AND ELECTRONICS Complete. 513/1694
18_POULTRY Complete. 514/1694
18_PREPARED FOODS Complete. 515/1694
18_PRODUCE Complete. 516/1694
18_SCHOOL AND OFFICE SUPPLIES Complete. 517/1694
18_SEAFOOD Complete. 518/1694
19_AUTOMOTIVE Complete. 519/1694
19_BEAUTY Complete. 520/1694
19_BEVERAGES Complete. 521/1694
19_BREAD/BAKERY Complete. 522/1694
19_CELEBRATION Complete. 523/1694
19_CLEANING Complete. 524/1694
19_DAIRY Complete. 525/1694
19_DELI Complete. 526/1694
19_EGGS Complete. 527/1694
19_FROZEN FOODS Complete. 528/1694
19_GROCERY I Complete. 529/1694
19_GROCERY II Complete. 530/1694
19_HARDWARE Com

26_MEATS Complete. 745/1694
26_PERSONAL CARE Complete. 746/1694
26_PET SUPPLIES Complete. 747/1694
26_PLAYERS AND ELECTRONICS Complete. 748/1694
26_POULTRY Complete. 749/1694
26_PREPARED FOODS Complete. 750/1694
26_PRODUCE Complete. 751/1694
26_SCHOOL AND OFFICE SUPPLIES Complete. 752/1694
26_SEAFOOD Complete. 753/1694
27_AUTOMOTIVE Complete. 754/1694
27_BEAUTY Complete. 755/1694
27_BEVERAGES Complete. 756/1694
27_BOOKS Complete. 757/1694
27_BREAD/BAKERY Complete. 758/1694
27_CELEBRATION Complete. 759/1694
27_CLEANING Complete. 760/1694
27_DAIRY Complete. 761/1694
27_DELI Complete. 762/1694
27_EGGS Complete. 763/1694
27_FROZEN FOODS Complete. 764/1694
27_GROCERY I Complete. 765/1694
27_GROCERY II Complete. 766/1694
27_HARDWARE Complete. 767/1694
27_HOME AND KITCHEN I Complete. 768/1694
27_HOME AND KITCHEN II Complete. 769/1694
27_HOME CARE Complete. 770/1694
27_LADIESWEAR Complete. 771/1694
27_LAWN AND GARDEN Complete. 772/1694
27_LINGERIE Complete. 773/1694
27_LIQUOR,WINE,BEER Complet

35_HARDWARE Complete. 990/1694
35_HOME AND KITCHEN I Complete. 991/1694
35_HOME AND KITCHEN II Complete. 992/1694
35_HOME APPLIANCES Complete. 993/1694
35_HOME CARE Complete. 994/1694
35_LINGERIE Complete. 995/1694
35_LIQUOR,WINE,BEER Complete. 996/1694
35_MEATS Complete. 997/1694
35_PERSONAL CARE Complete. 998/1694
35_PET SUPPLIES Complete. 999/1694
35_POULTRY Complete. 1000/1694
35_PREPARED FOODS Complete. 1001/1694
35_PRODUCE Complete. 1002/1694
35_SEAFOOD Complete. 1003/1694
36_AUTOMOTIVE Complete. 1004/1694
36_BEAUTY Complete. 1005/1694
36_BEVERAGES Complete. 1006/1694
36_BREAD/BAKERY Complete. 1007/1694
36_CELEBRATION Complete. 1008/1694
36_CLEANING Complete. 1009/1694
36_DAIRY Complete. 1010/1694
36_DELI Complete. 1011/1694
36_EGGS Complete. 1012/1694
36_FROZEN FOODS Complete. 1013/1694
36_GROCERY I Complete. 1014/1694
36_GROCERY II Complete. 1015/1694
36_HARDWARE Complete. 1016/1694
36_HOME AND KITCHEN I Complete. 1017/1694
36_HOME AND KITCHEN II Complete. 1018/1694
36_HOME APP

43_MEATS Complete. 1227/1694
43_PERSONAL CARE Complete. 1228/1694
43_PET SUPPLIES Complete. 1229/1694
43_POULTRY Complete. 1230/1694
43_PREPARED FOODS Complete. 1231/1694
43_PRODUCE Complete. 1232/1694
44_AUTOMOTIVE Complete. 1233/1694
44_BEAUTY Complete. 1234/1694
44_BEVERAGES Complete. 1235/1694
44_BOOKS Complete. 1236/1694
44_BREAD/BAKERY Complete. 1237/1694
44_CELEBRATION Complete. 1238/1694
44_CLEANING Complete. 1239/1694
44_DAIRY Complete. 1240/1694
44_DELI Complete. 1241/1694
44_EGGS Complete. 1242/1694
44_FROZEN FOODS Complete. 1243/1694
44_GROCERY I Complete. 1244/1694
44_GROCERY II Complete. 1245/1694
44_HARDWARE Complete. 1246/1694
44_HOME AND KITCHEN I Complete. 1247/1694
44_HOME AND KITCHEN II Complete. 1248/1694
44_HOME APPLIANCES Complete. 1249/1694
44_HOME CARE Complete. 1250/1694
44_LADIESWEAR Complete. 1251/1694
44_LAWN AND GARDEN Complete. 1252/1694
44_LINGERIE Complete. 1253/1694
44_LIQUOR,WINE,BEER Complete. 1254/1694
44_MAGAZINES Complete. 1255/1694
44_MEATS Compl

51_GROCERY I Complete. 1464/1694
51_GROCERY II Complete. 1465/1694
51_HARDWARE Complete. 1466/1694
51_HOME AND KITCHEN I Complete. 1467/1694
51_HOME AND KITCHEN II Complete. 1468/1694
51_HOME APPLIANCES Complete. 1469/1694
51_HOME CARE Complete. 1470/1694
51_LADIESWEAR Complete. 1471/1694
51_LAWN AND GARDEN Complete. 1472/1694
51_LINGERIE Complete. 1473/1694
51_LIQUOR,WINE,BEER Complete. 1474/1694
51_MAGAZINES Complete. 1475/1694
51_MEATS Complete. 1476/1694
51_PERSONAL CARE Complete. 1477/1694
51_PET SUPPLIES Complete. 1478/1694
51_PLAYERS AND ELECTRONICS Complete. 1479/1694
51_POULTRY Complete. 1480/1694
51_PREPARED FOODS Complete. 1481/1694
51_PRODUCE Complete. 1482/1694
51_SCHOOL AND OFFICE SUPPLIES Complete. 1483/1694
51_SEAFOOD Complete. 1484/1694
53_AUTOMOTIVE Complete. 1485/1694
53_BEAUTY Complete. 1486/1694
53_BEVERAGES Complete. 1487/1694
53_BOOKS Complete. 1488/1694
53_BREAD/BAKERY Complete. 1489/1694
53_CELEBRATION Complete. 1490/1694
53_CLEANING Complete. 1491/1694
53_DAIR

19_SCHOOL AND OFFICE SUPPLIES Complete. 1692/1694
13_SCHOOL AND OFFICE SUPPLIES Complete. 1693/1694
15_SCHOOL AND OFFICE SUPPLIES Complete. 1694/1694
(23784369, 30)


Unnamed: 0,date,store_nbr,unit_sales,family,Oil Price,onpromotion,unit_sales (t-1),unit_sales (t-2),unit_sales (t-3),unit_sales (t-4),...,Oil Price (t-3),Oil Price (t-4),Oil Price (t-5),Oil Price (t-6),Oil Price (t-7),Oil Price (t-8),Oil Price (t-9),Oil Price (t-10),Oil Price (t-11),Oil Price (t-12)
0,2017-01-07,1,3.0,AUTOMOTIVE,53.303333,0,1.0,1.0,1.0,1.0,...,53.98,53.77,53.77,53.26,52.36,52.36,52.36,52.36,52.7075,52.7075
1,2017-01-07,1,1.0,AUTOMOTIVE,53.303333,0,3.0,1.0,1.0,1.0,...,53.98,53.98,53.77,53.77,53.26,52.36,52.36,52.36,52.36,52.7075
2,2017-01-09,1,1.0,AUTOMOTIVE,51.95,0,1.0,3.0,1.0,1.0,...,53.303333,53.98,53.98,53.77,53.77,53.26,52.36,52.36,52.36,52.36
3,2017-01-09,1,1.0,AUTOMOTIVE,51.95,0,1.0,1.0,3.0,1.0,...,53.303333,53.303333,53.98,53.98,53.77,53.77,53.26,52.36,52.36,52.36
4,2017-01-10,1,1.0,AUTOMOTIVE,50.82,0,1.0,1.0,1.0,3.0,...,53.303333,53.303333,53.303333,53.98,53.98,53.77,53.77,53.26,52.36,52.36


In [10]:
#df.drop(['Oil Price','unit_sales (t-1)','unit_sales (t-2)','unit_sales (t-3)','unit_sales (t-4)',
#        'unit_sales (t-5)','unit_sales (t-6)','unit_sales (t-7)','unit_sales (t-8)','unit_sales (t-9)',
#        'unit_sales (t-10)','unit_sales (t-11)','unit_sales (t-12)','Oil Price (t-1)','Oil Price (t-2)',
#        'Oil Price (t-3)','Oil Price (t-4)','Oil Price (t-5)','Oil Price (t-6)','Oil Price (t-7)',
#        'Oil Price (t-8)','Oil Price (t-9)','Oil Price (t-10)','Oil Price (t-11)','Oil Price (t-12)'],axis=1,inplace=True)
#df.drop('Oil Price',axis=1,inplace=True)
#df = create_dummies(df,['store_nbr','family'])
#print(df.shape)
#df.head()

(23784369, 114)


Unnamed: 0,date,unit_sales,onpromotion,unit_sales (t-1),unit_sales (t-2),unit_sales (t-3),unit_sales (t-4),unit_sales (t-5),unit_sales (t-6),unit_sales (t-7),...,family_MAGAZINES,family_MEATS,family_PERSONAL CARE,family_PET SUPPLIES,family_PLAYERS AND ELECTRONICS,family_POULTRY,family_PREPARED FOODS,family_PRODUCE,family_SCHOOL AND OFFICE SUPPLIES,family_SEAFOOD
0,2017-01-02,4.0,0,3.0,5.0,27.0,3.0,8.0,14.0,3.0,...,0,0,0,0,0,0,0,0,0,0
1,2017-01-02,17.0,0,4.0,3.0,5.0,27.0,3.0,8.0,14.0,...,0,0,0,0,0,0,0,0,0,0
2,2017-01-02,1.0,0,17.0,4.0,3.0,5.0,27.0,3.0,8.0,...,0,0,0,0,0,0,0,0,0,0
3,2017-01-02,5.0,0,1.0,17.0,4.0,3.0,5.0,27.0,3.0,...,0,0,0,0,0,0,0,0,0,0
4,2017-01-02,2.0,0,5.0,1.0,17.0,4.0,3.0,5.0,27.0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
#Split into train and test and create data loader objects
#split = '2017-07-15'
#X_train,y_train,X_test,y_test = data_split(df)
#del df
#gc.collect()
#X_train,y_train,X_test,y_test = data_prep(X_train,y_train,X_test,y_test)
#train_iter,test_iter = loader(X_train,y_train,X_test,y_test)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50


The training mean squared error for Plain Backpropogation is: 3.690158109748154e-06
The validation mean squared error for Plain Backpropogation is: 2.632255473145051e-06
The computational effort for Plain Backpropogation in milliseconds is: 10202598.0


60

Model 224
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38


In [None]:
plot_comp(pbpt_mse_lst,pbp_time)

In [None]:
best_model_pbb = pbpt_mse_lst.index(min(pbpt_mse_lst))
print(f"The best training mean squared error for Backpropogation through time is: {pbp_mse_lst[best_model_pbb]}")
print(f"The best validation mean squared error for Backpropogation through time is: {pbpt_mse_lst[best_model_pbb]}")
print(f"The best computational effort for Backpropogation through time in milliseconds is: {pbp_time[best_model_pbb]}")

In [None]:
#plot_prediction(y_test,pb2y_pred)

In [None]:
torch.cuda.empty_cache()
gc.collect()

## Backpropogation through time

In [None]:
train_iter,test_iter,y_test,month = read_process_data(['Oil Price','unit_sales (t-1)','unit_sales (t-2)','unit_sales (t-3)','unit_sales (t-4)',
        'unit_sales (t-5)','unit_sales (t-6)','unit_sales (t-7)','unit_sales (t-8)','unit_sales (t-9)',
        'unit_sales (t-10)','unit_sales (t-11)','unit_sales (t-12)','Oil Price (t-1)','Oil Price (t-2)',
        'Oil Price (t-3)','Oil Price (t-4)','Oil Price (t-5)','Oil Price (t-6)','Oil Price (t-7)',
        'Oil Price (t-8)','Oil Price (t-9)','Oil Price (t-10)','Oil Price (t-11)','Oil Price (t-12)'])

In [None]:
# IDefine a class for the RNN model
class RecurrentModel(nn.Module):
    def __init__(self, input_size, hidden_size, batch_size,num_layers=1):
        super().__init__()
        # Initialize attributes used by the model
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.num_layers = num_layers
        
        # Define an RNN layer
        self.rnn = nn.RNN(self.input_size, self.hidden_size,num_layers = self.num_layers)
    
    # Define a method to set the hidden state at the beginning of every epoch
    def hidden_reset(self):
        return (torch.zeros(self.num_layers, self.batch_size, self.hidden_size),
                torch.zeros(self.num_layers, self.batch_size, self.hidden_size))

    def forward(self, x):
        # Define the sequence of operation and pass the input tensor through each operation
        x,_ = self.rnn(x.view(len(x), self.batch_size, -1))
        
        return x[:,:,-1][:,-1].view(len(x),1)

In [None]:
rnn_mse_lst = []
rnnt_mse_lst = []
rnny_pred_lst = []
rnn_time = []
for i in [5,8,14,18]:
    for j in [1,2,3]:
        print(f"Model {i},{j}")
        # Initialize objects to track execution time
        start = torch.cuda.Event(enable_timing=True)
        end = torch.cuda.Event(enable_timing=True)

        # Begin recording execution time
        start.record()
        # Initialize model
        model = RecurrentModel(input_size = 2, hidden_size = i, batch_size = num_cols,num_layers=j)
        model.to(device)
        rnn_mse,rnnt_mse,rnny_pred = train(model,60,True)
        rnn_mse_lst.append(rnn_mse)
        rnnt_mse_lst.append(rnnt_mse)
        rnny_pred_lst.append(rnny_pred)
        # Stop recording execution time
        end.record()
        # Wait for kernels in all streams of a cuda device to complete
        torch.cuda.synchronize()
        # Compute and store execution time
        rnn_time.append(start.elapsed_time(end))

In [None]:
t = torch.cuda.get_device_properties(0).total_memory
c = torch.cuda.memory_reserved(0)
a = torch.cuda.memory_allocated(0)
f = c-a  # free inside cache

In [None]:
print(t)
print(c)
print(a)
print(f)