# 4.1 Deep Learning Basic Model

The Deep learning section will be split into two sections

1. **4.1 Notebook example of a Neural Network**
2. **4.2 To further explore the best model for the task, an experiment   
   version connected to Microsoft Azure ML Studio will be created**  

This Notebook details a bare bone example of a Neural Network implemented in Pytorch.  

Whilst a full solution. This still needs a fair bit of work to get the accuracy anywhere near acceptable (at very lb 70%). 

In [300]:
from pathlib import Path
import pandas as pd
import numpy as np
import copy
from time import time

from torch import nn
import torch
from torch.utils.data import Dataset, DataLoader

To capitalise on the use of the GPU, define whether cuda is available. If not retain the device as the cpu

In [40]:
device = "cuda" if torch.cuda.is_available() else "cpu"

Create a class to define the wine dataset 

In [472]:
from sklearn.preprocessing import StandardScaler

In [621]:
class Winedataset(Dataset):
    """
    Returns the dataset of the given phase
    
    Parameters:
    -----------------------
    phase: str 
        Either 'train', 'valid', or 'test'. Determines the type of data set required
    """
    
    def __init__(self, phase = "train"):

        #Check whether the phase is train, valid or test
        assert phase in ["train","valid","test"], "Phase must be either 'train', 'valid', or 'test'"
        self.phase = phase
        
        #Base path for the folder
        base_path = Path("data","stratified_sets")
        file_name = phase + ".csv" 
        #Load phase data
        xy = pd.read_csv(base_path / file_name)
        
        #Seperate the x and y data
        self.y = xy.quality
        self.X = xy.drop("quality",axis=1)

        #Define some charecteristics of the data
        self.feature_names = self.X.columns
        self.label_name = self.y.name
        self.classes = np.arange(1,11)
        
        #Define the set size
        self.n_samples = self.X.shape[0]
        self.n_features = self.X.shape[1]
        self.n_classes = self.classes.shape[0]
        
        #Preprocessing
        #Normalise data
        self.X = self.normalise()
        
        #Convert the data into torch tensors
        self.y = torch.tensor(y.values, dtype = torch.long)
        self.X = torch.tensor(self.X, dtype = torch.float)
    
    def __getitem__(self, index):
        return self.X[index], self.y[index]
        
    def __len__(self):
        return self.n_samples
    
    def normalise(self):
        if phase != "train":
            #Check if scaler is defined
            try:
                Winedataset.scaler
            except NameError:
                raise Exception("Preprocessing requires training phase to be called before validation and test phases")
        
        if self.phase == "train":
            Winedataset.scaler = StandardScaler()
            return Winedataset.scaler.fit_transform(self.X)
        else:
            return Winedataset.scaler.transform(self.X)

#### Define the architecture to use in the model

In [622]:
class ClassificationNetwork(nn.Module):
    """
    Classification Neural Network for Tabular Data
    
    (0): Linear(in_features=65, out_features=200, bias=True)
    (1): ReLU(inplace)
    (2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Linear(in_features=200, out_features=100, bias=True)
    (4): ReLU(inplace)
    (5): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Linear(in_features=100, out_features=2, bias=True)
    
    """
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(ClassificationNetwork,self).__init__()
        
        # Rectified Linear Unit max(x,0)
        self.relu = nn.ReLU()
        
        # Linear Layers with a Bias
        self.l1 = nn.Linear(input_size,hidden_size1)
        self.l2 = nn.Linear(hidden_size1, hidden_size2)
        self.l3 = nn.Linear(hidden_size2, output_size)
        
        #Batch Normalization
        self.bn1 = nn.BatchNorm1d(hidden_size1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.bn2 = nn.BatchNorm1d(hidden_size2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        
       
    def forward(self, x):
        
        out = self.bn1(self.relu(self.l1(x)))
        out = self.bn2(self.relu(self.l2(out)))        
        out = self.l3(out)
        return out

#### Create an optimisation loop to optimise the model

In [644]:
def fit_model(train_dataloader, valid_dataloader, n_epochs, model, optimiser, critereon, scheduler):
    """
    Optimises a Pytorch Neural Network.
    
    parameters
    ------------------------
    train_dataloader: torch.util.data.DataLoader  
        The dataloader with the data that is to be used to update the Paramaters
        
    valid_dataloader: torch.util.data.DataLoader
        The dataloader with the data that is used to caluclate model performance and metrics 
    
    n_epochs:
        How many times to iterate over the datasets
        
    model: 
        Neural Network that is to be optimised 
        
    optimiser: torch.nn.optim.<Optimiser class>
        The optimiser that will govern the calculation of gradient updates and the update of Parameters
        
    critereon: torch.nn.<critereon>
        Loss/cost function
    
    """
    #Start up time to report model training time
    start = time()
    
    #Instantiate best model weights
    best_model_wts = copy.deepcopy(model.state_dict())
    best_accuracy = 0

    #Use GPU if available else CPU
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    datasets = {
        "train": train_dataloader,
        "valid": valid_dataloader
    }
    
    for epoch in range(n_epochs):
        
        for phase in ["train","valid"]:
            
            #Set the model to the respctive type
            if phase == "train":
                model.train()
            else:
                model.eval()
                
            #Reset loss and corrects for each phase
            running_loss = 0.0
            running_correct = 0.0
    
    
            #Loop over the data in each dataset for each epoch
            for data, label in datasets[phase]:            
                data.to(device)
                label.to(device)
            
                with torch.set_grad_enabled(phase == "train"):
                    #Forward Pass - Calculate outputs and probability of each class
                    outputs = model(data)
                    #Find the argmax of the probabilities to get the predicted class
                    _, preds = torch.max(outputs, 1)
                    
                    #Calculate Loss (Cross Entropy)
                    loss = critereon(outputs, label)
                    
                    #Backpropogate and update gradients if phase is training
                    if phase == "train":
                        
                        #Update Parameters if training phase
                        optimiser.zero_grad()
                        loss.backward()
                        optimiser.step()
                    
                    #Calculate metrics
                    running_correct += torch.sum(preds == label.data)
                    running_loss += loss.item() * data.size(0)
            
            if phase == 'train':
                scheduler.step()
            
            epoch_accuracy = (running_correct / len(datasets[phase].dataset)) * 100
            epoch_loss = running_loss / len(datasets[phase].dataset)
                
            if phase == "valid":
                print(f"Phase: {phase}, Epoch: {epoch + 1}, Accuracy: {epoch_accuracy:.2f}% Loss: {epoch_loss:.6f}")
                
            if phase == "valid" and epoch_accuracy > best_accuracy:
                best_accuracy = epoch_accuracy
                best_model_wts = copy.deepcopy(model.state_dict())
                
    print("\n","-" * 40)
    
    time_elapsed = time() - start
    print('Training complete in {:.0f}m {:.0f}s'.format(
    time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_accuracy))
    
    model.load_state_dict(best_model_wts)
                
    return model

#### Instantiate the dataset classes and pass the classes to a dataloader 

In [645]:
bs = 100

#Using Winedataset defined above
dataset_train = Winedataset("train")
dataset_test = Winedataset("test")

#Using the standard Pytorch DataLoaders
dataloader_train = DataLoader(dataset_train, batch_size = bs, shuffle=True)
dataloader_test = DataLoader(dataset_test, batch_size = bs)

#### Set Hyperparameters and Layer sizes in NN

In [658]:
n_epochs = 20
lr = 0.1

input_size = dataloader_train.dataset.n_features
hidden_size1 = 800
hidden_size2 = 200
output_size = dataloader_train.dataset.n_classes

#### Instantiate the model, estimator and optimiser

In [659]:
model = ClassificationNetwork(input_size, hidden_size1, hidden_size2, output_size).to(device)
optimiser = torch.optim.Adam(model.parameters(), lr = lr,weight_decay=0.01)

#Includes a Softmax and NNLoss function ence why model does not have a Softmax func
critereon = nn.CrossEntropyLoss()

In [660]:
total_steps = len(dataloader_train) * n_epochs
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimiser,max_lr = 0.01,total_steps=total_steps)

In [661]:
model = fit_model(dataloader_train, dataloader_test, n_epochs, model, optimiser, critereon, scheduler)

Phase: valid, Epoch: 1, Accuracy: 28.75% Loss: 2.590140
Phase: valid, Epoch: 2, Accuracy: 35.62% Loss: 2.894423
Phase: valid, Epoch: 3, Accuracy: 36.88% Loss: 2.949168
Phase: valid, Epoch: 4, Accuracy: 38.75% Loss: 2.950368
Phase: valid, Epoch: 5, Accuracy: 37.50% Loss: 2.992406
Phase: valid, Epoch: 6, Accuracy: 38.12% Loss: 2.998225
Phase: valid, Epoch: 7, Accuracy: 35.62% Loss: 2.882654
Phase: valid, Epoch: 8, Accuracy: 35.00% Loss: 2.848441
Phase: valid, Epoch: 9, Accuracy: 37.50% Loss: 2.764010
Phase: valid, Epoch: 10, Accuracy: 37.50% Loss: 2.866683
Phase: valid, Epoch: 11, Accuracy: 39.38% Loss: 2.882730
Phase: valid, Epoch: 12, Accuracy: 40.00% Loss: 2.810384
Phase: valid, Epoch: 13, Accuracy: 38.12% Loss: 2.656329
Phase: valid, Epoch: 14, Accuracy: 38.12% Loss: 2.632841
Phase: valid, Epoch: 15, Accuracy: 38.12% Loss: 2.745728
Phase: valid, Epoch: 16, Accuracy: 44.38% Loss: 2.448293
Phase: valid, Epoch: 17, Accuracy: 39.38% Loss: 2.643915
Phase: valid, Epoch: 18, Accuracy: 35.62