#Lab 10: Deep Learning For Sequential Data

---


## Future total sales of pizza<br>
In this lab, we will predict the future total sales of pizza based on the sequential transactions by using LSTM.<br><br>

This pizza sales dataset make up 12 relevant features:<br>

order_id: Unique identifier for each order placed by a table<br>
order_details_id: Unique identifier for each pizza placed within each order (pizzas of the same type and size are kept in the same row, and the quantity increases)<br>
pizza_id: Unique key identifier that ties the pizza ordered to its details, like size and price<br>
quantity: Quantity ordered for each pizza of the same type and size
order_date: Date the order was placed (entered into the system prior to cooking & serving)<br>
order_time: Time the order was placed (entered into the system prior to cooking & serving)<br>
unit_price: Price of the pizza in USD<br>
total_price: unit_price * quantity<br>
pizza_size: Size of the pizza (Small, Medium, Large, X Large, or XX Large)<br>
pizza_type: Unique key identifier that ties the pizza ordered to its details, like size and price<br>
pizza_ingredients: ingredients used in the pizza as shown in the menu (they all include Mozzarella Cheese, even if not specified; and they all include Tomato Sauce, unless another sauce is specified)<br>
pizza_name: Name of the pizza as shown in the menu<br>

In [None]:
# Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler

from itertools import chain
import torch
import torch.nn as nn
import torch.optim as optim

###1. Upload and clean data

In [None]:
# Read data
PizzaSales = pd.read_csv("/content/drive/MyDrive/DL_data/PizzaSales.csv")
PizzaSales

In [None]:
# Show the head rows of a data frame
PizzaSales.head()

In [None]:
# Examine variable type
PizzaSales.dtypes

In [None]:
# Examine data size
PizzaSales.shape

###2. Simple data exploration

In [None]:
# Convert order_date column to datetime
PizzaSales['order_date'] = pd.to_datetime(PizzaSales['order_date'])

In [None]:
# Exam the number of pizza sold by month
PizzaSales['order_month'] =pd.DatetimeIndex(PizzaSales['order_date']).month
PizzaSales.loc[(PizzaSales['order_month'] ==1), 'order_month'] = 'January'
PizzaSales.loc[(PizzaSales['order_month'] ==2), 'order_month'] = 'February'
PizzaSales.loc[(PizzaSales['order_month'] ==3), 'order_month'] = 'March'
PizzaSales.loc[(PizzaSales['order_month'] ==4), 'order_month'] = 'April'
PizzaSales.loc[(PizzaSales['order_month'] ==5), 'order_month'] = 'May'
PizzaSales.loc[(PizzaSales['order_month'] ==6), 'order_month'] = 'June'
PizzaSales.loc[(PizzaSales['order_month'] ==7), 'order_month'] = 'July'
PizzaSales.loc[(PizzaSales['order_month'] ==8), 'order_month'] = 'August'
PizzaSales.loc[(PizzaSales['order_month'] ==9), 'order_month'] = 'September'
PizzaSales.loc[(PizzaSales['order_month'] ==10), 'order_month'] = 'October'
PizzaSales.loc[(PizzaSales['order_month'] ==11), 'order_month'] = 'November'
PizzaSales.loc[(PizzaSales['order_month'] ==12), 'order_month'] = 'December'
PizzaSales['order_month'].value_counts()

In [None]:
# Exam the number of pizza sold by day
PizzaSales["Dayofweek"] = PizzaSales['order_date'].dt.day_name()
PizzaSales["Dayofweek"].value_counts()

In [None]:
# Examine the number of pizza sold by hour
PizzaSales[['Hour','Minute', 'Second']]= PizzaSales['order_time'].str.split(":",expand=True)
PizzaSales["Hour"].value_counts()

In [None]:
# Examine the number of pizza sold by pizza id
PizzaSales["pizza_id"].value_counts()

In [None]:
# Examine the number of pizza sold by pizza size
PizzaSales["pizza_size"].value_counts()

In [None]:
PizzaSales['order_id'].max()

In [None]:
# Examine the average order value
PizzaSales['total_price'].agg('sum')/PizzaSales['order_id'].max()

###3. Partition the data set for pizza sales prediction

In [None]:
# Organize sales by date
sales_by_date = PizzaSales.groupby(['order_date']).sum()
sales_by_date["Dayofweek"] = sales_by_date.index.day_name()
sales_by_date = sales_by_date[['quantity','total_price','Dayofweek']]
sales_by_date

In [None]:
# Create dummy variables
sales_by_date = pd.get_dummies(sales_by_date, columns=['Dayofweek'],drop_first=True)
sales_by_date

In [None]:
# Extract the total sales


In [None]:
# Apply standardization
numeric_variables = sales_by_date[['quantity', 'total_price']]
scaler_s = StandardScaler().fit(numeric_variables)
standard_variables = scaler_s.transform(numeric_variables)
sales_by_date[['quantity', 'total_price']] = standard_variables

In [None]:
# Create sequences


In [None]:
# Partition the data
target = seq_data['total_sales']
predictors = seq_data.drop(['total_sales'], axis=1)
predictors_train, predictors_test, target_train, target_test = train_test_split(predictors, target, test_size = 0.3, random_state = 0)
print(predictors_train.shape, predictors_test.shape, target_train.shape, target_test.shape)

In [None]:
# Examine the distribution of target variable for training data set
snsplot = sns.histplot(data = target_train)
snsplot.set_title("Histogram of total_sales in the training data set")

In [None]:
# Examine the distribution of target variable for testing data set
snsplot = sns.histplot(data = target_test)
snsplot.set_title("Histogram of total_sales in the testing data set")

### 4. Neural network prediction and evaluation

In [None]:
# Build a neural network on training data
class extract_tensor(nn.Module):
    def forward(self,x):
        tensor, _ = x
        return tensor[:, -1, :]


In [None]:
# Create tensor from pandas dataframe
predictors_train_tensor = torch.tensor(predictors_train.values).view(248, 3, 8) # reshape the tensor to 248 sequences; each sequence has length = 3 and input size = 8 (will be used to predict the next day's sale based on the sales and orders of the past three days)
target_train_tensor = torch.tensor(target_train.values)
predictors_test_tensor = torch.tensor(predictors_test.values).view(107, 3, 8)
target_test_tensor = torch.tensor(target_test.values)

# Create tensor dataset (set target variable to float type)
train_dataset = torch.utils.data.TensorDataset(predictors_train_tensor.float(), target_train_tensor.float())
test_dataset = torch.utils.data.TensorDataset(predictors_test_tensor.float(), target_test_tensor.float())

# Define training and testing data loader, and set batch size to 16
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16, shuffle=False)

In [None]:
# Define training loop function
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(0, n_epochs):
        # Training Phase 
        model.train()
        loss_train = 0.0
        for inputs, labels in train_loader:

            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_train += loss.item()

        if epoch == 0 or epoch == n_epochs-1 or epoch % 50 == 0:
            print('Epoch {}, Training loss {}'.format(epoch, loss_train / len(train_loader)))

In [None]:
# Model training


In [None]:
# Define testing function
def test(model, train_loader, test_loader):
 
  # testing phase
  model.eval()
  predict_train = []
  predict_test = []
  label_train = []
  label_test = []

  with torch.no_grad():
      for inputs, labels in train_loader:
          outputs = model(inputs)
          predict_train.append(outputs.tolist())
          label_train.append(labels.tolist())

      for inputs, labels in test_loader:
          outputs = model(inputs)
          predict_test.append(outputs.tolist())
          label_test.append(labels.tolist())
  
  MAE_train = mean_absolute_error(list(chain(*label_train)), list(chain(*predict_train)))
  RMSE_train = mean_squared_error(list(chain(*label_train)), list(chain(*predict_train)), squared=False)

  MAE_test = mean_absolute_error(list(chain(*label_test)), list(chain(*predict_test)))
  RMSE_test = mean_squared_error(list(chain(*label_test)), list(chain(*predict_test)), squared=False)

  print("Training MAE and RMSE:", MAE_train, RMSE_train)
  print()
  print("testing MAE and RMSE:", MAE_test, RMSE_test)

In [None]:
# Examine evaluation results


In [None]:
!jupyter nbconvert --to html "/content/drive/MyDrive/DL_lab/Lab10:Learning_from_Squential_Data.ipynb"