In [1]:
import matplotlib.pyplot as plt
import matplotlib.ticker as plticker
import seaborn as sns
import numpy as np
import pandas as pd
import torch
from datetime import date, timedelta
from torch import nn,optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
%matplotlib inline

In [2]:
plt.style.use('seaborn-bright')

In [3]:
# If a gpu is available, set gpu else cpu
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Import the Data

In [4]:
df = pd.read_csv('final.csv',parse_dates=['date'])
df.head()

Unnamed: 0,date,store_nbr,item_nbr,unit_sales,onpromotion,family,class,perishable,city,state,type,cluster,Oil Price
0,2013-01-01,25,103665,7.0,False,BREAD/BAKERY,2712,1,Salinas,Santa Elena,D,1,93.14
1,2013-01-02,25,103665,5.0,False,BREAD/BAKERY,2712,1,Salinas,Santa Elena,D,1,93.14
2,2013-01-04,25,103665,5.0,False,BREAD/BAKERY,2712,1,Salinas,Santa Elena,D,1,93.12
3,2013-01-05,25,103665,5.0,False,BREAD/BAKERY,2712,1,Salinas,Santa Elena,D,1,93.146667
4,2013-01-06,25,103665,7.0,False,BREAD/BAKERY,2712,1,Salinas,Santa Elena,D,1,93.173333


In [5]:
reg_dict = {'El Oro': 'Coast','Esmeraldas': 'Coast','Santo Domingo de los Tsachilas':'Coast','Guayas':'Coast','Los Rios':'Coast'
           ,'Manabi':'Coast','Santa Elena':'Coast',
           'Azuay':'Sierra','Bolivar':'Sierra','Cotopaxi':'Sierra','Chimborazo':'Sierra','Imbabura':'Sierra'
            ,'Loja':'Sierra','Pichincha':'Sierra','Tungurahua':'Sierra','Pastaza':'Amazon'}

In [6]:
df['Geographic Region'] = df['state'].map(reg_dict)
df.head()

Unnamed: 0,date,store_nbr,item_nbr,unit_sales,onpromotion,family,class,perishable,city,state,type,cluster,Oil Price,Geographic Region
0,2013-01-01,25,103665,7.0,False,BREAD/BAKERY,2712,1,Salinas,Santa Elena,D,1,93.14,Coast
1,2013-01-02,25,103665,5.0,False,BREAD/BAKERY,2712,1,Salinas,Santa Elena,D,1,93.14,Coast
2,2013-01-04,25,103665,5.0,False,BREAD/BAKERY,2712,1,Salinas,Santa Elena,D,1,93.12,Coast
3,2013-01-05,25,103665,5.0,False,BREAD/BAKERY,2712,1,Salinas,Santa Elena,D,1,93.146667,Coast
4,2013-01-06,25,103665,7.0,False,BREAD/BAKERY,2712,1,Salinas,Santa Elena,D,1,93.173333,Coast


In [7]:
# Defined a function to plot predicted values vs actual values
def plot_prediction(data,tens):
    month = data['date'][split:].to_numpy()

    plt.figure(figsize = (18,6))
    plt.plot(month,y_test)
    plt.plot(month,tens.cpu().numpy()[:,-1])
    plt.legend(['Actual Values', 'Predicted Values'], loc='upper left')
    plt.xticks(rotation=90)
    plt.xlabel("Year")
    plt.ylabel("Unit Sales")
    plt.title("Unit Sales vs Year")
    
    ax = plt.gca()
    ax.xaxis.set_major_locator(plticker.MultipleLocator(base=15.0))
    plt.show()

## Data Transformation and Visualization

In [8]:
# The below function creates lag observation(t-n)
def transformer(cnt,df):
    lst = []
    col_values= []
    length = len(df)
    temp_df = df.drop(list(range(cnt + 1)))
    col = list(df_final.columns)
    col.remove('date')
    
    for val in col:
        tmp = cnt
        for i in range(length-1,length-tmp-1,-1):
            lst.append(df[val].array[tmp:i])
            temp_df[f'{val} (t-{length - i})'] = df[val].array[tmp:i]
            tmp -= 1

    return temp_df.reset_index(drop = True)

In [9]:
def data_split(data):
    # Split data into training and testing sets
    data_train = data[:split]
    data_test  = data[split:]

    min_max_scaler = MinMaxScaler()
    
    mm = min_max_scaler.fit(data_train.drop('date',axis = 1))
    data_train = pd.DataFrame(mm.transform(data_train.drop('date',axis = 1)),columns=data_train.drop('date',axis = 1).columns, index=data_train.drop('date',axis = 1).index)
    data_test  = pd.DataFrame(mm.transform(data_test.drop('date',axis = 1)),columns=data_test.drop('date',axis = 1).columns, index=data_test.drop('date',axis = 1).index)
    
    # Split data into dependent and independent variables
    X_train = data_train.drop(['unit_sales'],axis = 1).to_numpy()
    y_train = data_train['unit_sales'].to_numpy()
    X_test  = data_test.drop(['unit_sales'],axis = 1).to_numpy()
    y_test  = data_test['unit_sales'].to_numpy()
    
    return X_train,y_train,X_test,y_test

# Defined a function to convert numpy arrays into tensors
def data_prep(X_train,y_train,X_test,y_test):
    
    # Convert numpy arrays into tensors
    X_train = torch.tensor(X_train).float()
    y_train = torch.tensor(y_train).view(-1, 1).float()

    X_test = torch.tensor(X_test).float()
    y_test = torch.tensor(y_test).view(-1, 1).float()

    return X_train,y_train,X_test,y_test
    

def loader(X_train,y_train,X_test,y_test):
    # Build a data loader object from the two tensors for training data
    train_datasets = torch.utils.data.TensorDataset(X_train, y_train)
    train_iter = torch.utils.data.DataLoader(train_datasets, batch_size=round(len(X_train)/8), shuffle=False)

    # Build a data loader object from the two tensors for testing data
    test_datasets = torch.utils.data.TensorDataset(X_test, y_test)
    test_iter = torch.utils.data.DataLoader(test_datasets, batch_size=round(len(X_test)), shuffle=False)
    
    return train_iter,test_iter

## Backpropogation through time

In [10]:
# IDefine a class for the RNN model
class RecurrentModel(nn.Module):
    def __init__(self, input_size, hidden_size, batch_size,num_layers=1):
        super().__init__()
        # Initialize attributes used by the model
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.num_layers = num_layers
        
        # Define an RNN layer
        self.rnn = nn.RNN(self.input_size, self.hidden_size,num_layers = self.num_layers)
    
    # Define a method to set the hidden state at the beginning of every epoch
    def hidden_reset(self):
        return (torch.zeros(self.num_layers, self.batch_size, self.hidden_size),
                torch.zeros(self.num_layers, self.batch_size, self.hidden_size))

    def forward(self, x):
        # Define the sequence of operation and pass the input tensor through each operation
        x,_ = self.rnn(x.view(len(x), self.batch_size, -1))
        
        return x[:,:,-1][:,-1].view(len(x),1)

In [11]:
# Define a function to calculate the validation MSE
def test(model):
    # Use no_grad to allow us to perform regular Python operations on tensors, independent of PyTorch’s computation graph
    with torch.no_grad():
        # Loop through the test data
        for inputs, labels in test_iter:
            # Move the tensors to the device selected above (GPU or CPU)
            inputs = inputs.to(device)
            labels = labels.to(device)
            # Enter model evaluation mode
            model.eval()
            # Make predictions on the validation dataset
            y_pred = model(inputs)
            # Calculate the mean squared error for the predictions
            loss = lossfunc(y_pred, labels)
    
    # Return MSE and prediction values
    return loss.item(),y_pred

In [12]:
# Initialze the loss function as Mean Square Error loss
lossfunc = nn.MSELoss()

#Define a function to train the model
def train(model, num_epochs = 200,recurr = False):
    # Initialize the optimizer as Stochastic Gradient Descent and initialize the learning rate
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    
    # Loop over the epochs
    for epoch in range(num_epochs):
        # Check if the model being passed in is a recurrent neural network
        if recurr == True:
            model.hidden = model.hidden_reset()
        # Loop through each batch in the training dataset
        for inputs, labels in train_iter:
            # Move the tensors to the device selected above (GPU or CPU)
            inputs = inputs.to(device)
            labels = labels.to(device)
            # Call the model to perform the forward computation
            outputs = model(inputs)
            # Compute the loss given our outputs and labels
            loss = lossfunc(outputs, labels)
            # Since Gradients are accumulated, we have to zero them
            optimizer.zero_grad()
            # Compute Gradients using backpropogation
            loss.backward()
            # Update weights based on the current gradient
            optimizer.step()

    # Store the final validation and training loss
    test_mse,y_pred = test(model)
    mse = loss.item()
            
    return mse,test_mse,y_pred

In [13]:
# Define a function to reshape array into a 3 dimensional array
def array_reshape(arr):
    return np.reshape(arr,(len(arr),arr.shape[1],1))

In [14]:
pred_df = pd.DataFrame(columns=['labels','predictions','Geographic Region','family'])
pred_df.to_csv('pred.csv',index=False)

In [15]:
num_cols = 1
i = 0
for reg in df['Geographic Region'].unique():
    for fam in df['family'].unique():
        df_final = df[(df['Geographic Region'] == reg) & (df['family'] == fam)]
        df_final = pd.DataFrame(df.groupby(by=['date'])[['unit_sales','Oil Price']].sum())
        df_final = df_final.reset_index()
        split = int(round(0.70 * len(df_final),0))
        
        rnn_df = transformer(num_cols,df_final)
        rnn_df.drop('Oil Price',axis=1,inplace=True)
        
        # Transform the data and create data loader objects
        X_train,y_train,X_test,y_test = data_split(rnn_df)
        X_train = array_reshape(X_train)
        X_test = array_reshape(X_test)
        X_train,y_train,X_test,y_test = data_prep(X_train,y_train,X_test,y_test) 
        train_iter,test_iter = loader(X_train,y_train,X_test,y_test)
        
        model = RecurrentModel(input_size = 2, hidden_size = 18, batch_size = num_cols,num_layers=1)
        model.to(device)
        rnn_mse,rnnt_mse,rnny_pred = train(model,400,True)
        pred_df = pd.DataFrame()
        pred_df['labels'] = y_test.tolist()
        pred_df['predictions'] = rnny_pred.tolist()
        pred_df['Geographic Region'] = [reg] * len(pred_df['labels'])
        pred_df['family'] = [fam] * len(pred_df['labels'])
        pred_df.to_csv('pred.csv',index=False,mode='a', header=False)
        i +=1
        print(f'{i} models run')

1 models run
2 models run
3 models run
4 models run
5 models run
6 models run
7 models run
8 models run
9 models run
10 models run
11 models run
12 models run
13 models run
14 models run
15 models run
16 models run
17 models run
18 models run
19 models run
20 models run
21 models run
22 models run
23 models run
24 models run
25 models run
26 models run
27 models run
28 models run
29 models run
30 models run
31 models run
32 models run
33 models run
34 models run
35 models run
36 models run
37 models run
38 models run
39 models run
40 models run
41 models run
42 models run
43 models run
44 models run
45 models run
46 models run
47 models run
48 models run
49 models run
50 models run
51 models run
52 models run
53 models run
54 models run
55 models run
56 models run
57 models run
58 models run
59 models run
60 models run
61 models run
62 models run
63 models run
64 models run
65 models run
66 models run
67 models run
68 models run
69 models run
70 models run
71 models run
72 models run
7

In [18]:
pred_df = pd.read_csv('pred.csv')
pred_df.head()

Unnamed: 0,labels,predictions,Geographic Region,family
0,[0.5316542983055115],[0.5957717895507812],Coast,BREAD/BAKERY
1,[0.4967631697654724],[0.4846758544445038],Coast,BREAD/BAKERY
2,[0.704898476600647],[0.5121616125106812],Coast,BREAD/BAKERY
3,[0.9320246577262878],[0.6948016881942749],Coast,BREAD/BAKERY
4,[1.0267444849014282],[0.7554768919944763],Coast,BREAD/BAKERY


In [19]:
pred_df['labels'] = pred_df['labels'].str.slice(1,-2).astype('float')
pred_df['predictions'] = pred_df['predictions'].str.slice(1,-2).astype('float')

In [20]:
pred_df.head()

Unnamed: 0,labels,predictions,Geographic Region,family
0,0.531654,0.595772,Coast,BREAD/BAKERY
1,0.496763,0.484676,Coast,BREAD/BAKERY
2,0.704898,0.512162,Coast,BREAD/BAKERY
3,0.932025,0.694802,Coast,BREAD/BAKERY
4,1.026744,0.755477,Coast,BREAD/BAKERY


In [21]:
mean_squared_error(pred_df['labels'],pred_df['predictions'])

0.013882312780108678