## Neural Network

In [1]:
# Let's start by importing the relevant packages
# matplotlib for plots
import matplotlib as mpl
from matplotlib import pyplot as plt
# pandas to read in some data
import pandas as pd
# numpy to build our first perceptron
import numpy as np
# Train test split to do validate our findings from the perceptron training
from sklearn.model_selection import train_test_split
# MinMaxScaler to normalise the data before inputting them to the perceptron
from sklearn.preprocessing import MinMaxScaler
# PyTorch for neural networks
import torch
import time
from torch import nn
%matplotlib inline
mpl.rcParams['figure.figsize'] = (16, 9)
import os
home = os.path.expanduser("~")
data = home + '/data/workshop_data/occupancy_data/datatraining.txt'


In [2]:
# Load the occupancy data so we have something to predict
df = pd.read_csv(data)
target = 'Occupancy'
features = [col for col in df.columns if target not in col and 'date' not in col]

In [3]:
x_train, x_val, y_train, y_val = train_test_split(df[features], df[target], shuffle=False)
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_val = scaler.transform(x_val)

## Build the neural network
To extend our previously build neuron to a neural network, we will need to add a second (third, fourth) linear layer.

The first layer needs to output as many layers as the second one consumes. Try 10 for the time being.
You will need to update the logits and forward function as well to pass through all layers.


In [10]:
class Network(nn.Module):
    
    def __init__(self, number_of_inputs, hidden_units):
        super().__init__()
        # Build the network using nn.Linear
        self.linear1 = nn.Linear(number_of_inputs, hidden_units)
        self.linear2 = nn.Linear(hidden_units, 1)
        self.dropout = nn.Dropout(0.05) # ADDED LATER TO SUPPORT THE BOTTOM
        # use nn.Sigmoid as an activation function
        self.act1 = nn.Sigmoid()
        self.act2 = nn.Sigmoid()
    
    def logit(self, inp):
        # logit = everything before the final activation
        # Calculate the logits as the input to the last activation
        # self.dropout(inp) # ADDED LATER: Could be used on the inputs
        outer_layer1 = self.dropout(self.act1(self.linear1(inp)))
        return self.linear2(outer_layer1)
    
    def forward(self, inp):
        # Calculate the output of the whole network utilising the logit function
        return self.act2(self.logit(inp))
    

Let us now select a random selection of the training data and calculate the gradients for the neuron:

In [11]:
loss = nn.BCEWithLogitsLoss()

In [12]:
def fit_batch(optim, loss, net, x, y):
    net.train() # tell the layers during training time to use dropout (when applied)
    optim.zero_grad()
    y_pred = net.logit(x)
    err = loss(y_pred, y)
    err.mean().backward()
    optim.step()
    return y_pred

def eval_batch(net, x):
    net.eval()
    y_pred = net(x)
    return y_pred

In [13]:
if torch.cuda.is_available():
    net = Network(5, 10).cuda()
    optim = torch.optim.Adam(net.parameters(), lr=1e-2)
    start = time.time()
    for i in range(20):
        acc = None
        for i in range(200):
            select = np.random.randint(0, len(x_train), 2048)
            x = torch.from_numpy(x_train[select]).float().cuda()
            y = torch.from_numpy(y_train.iloc[select].values).float().unsqueeze(1).cuda()
            y_pred = fit_batch(optim, loss, net, x, y)
            if acc is None:
                acc = (y==(y_pred > .5).float()).float().mean()
            else:
                acc += (y==(y_pred > .5).float()).float().mean()
        print('train accuracy {}'.format(acc.data.cpu().numpy()/200))
    
    x = torch.from_numpy(x_val).float().cuda()
    y = torch.from_numpy(y_val.values).float().unsqueeze(1).cuda()
    y_pred = eval_batch(net, x)
    acc = (y==(y_pred > .5).float()).float().mean()
    print('val accuracy {}'.format(acc.data.cpu().numpy()))
    
    print('Training time: {}'.format(time.time() - start))

train accuracy 0.8136474609375
train accuracy 0.87852294921875
train accuracy 0.95832763671875
train accuracy 0.9708203125
train accuracy 0.975986328125
train accuracy 0.978974609375
train accuracy 0.98043701171875
train accuracy 0.98189208984375
train accuracy 0.9811083984375
train accuracy 0.98144287109375
train accuracy 0.98181884765625
train accuracy 0.98208251953125
train accuracy 0.98263671875
train accuracy 0.98270751953125
train accuracy 0.98297607421875
train accuracy 0.98324462890625
train accuracy 0.9833056640625
train accuracy 0.9833935546875
train accuracy 0.98309814453125
train accuracy 0.98332763671875
val accuracy 0.924361526966095
Training time: 7.8452582359313965


## How does the result change with a changing network?

Now try using a bigger layersize and try adding dropout.
How can we change the training and validation loss?

- What happens if we add Dropout? [docs](https://pytorch.org/docs/stable/nn.html#dropout)
- What happens if you add momentum or weight decay to SGD? [docs](https://pytorch.org/docs/stable/optim.html#torch.optim.SGD)
- What happens if you use an Adam optimizer instead of SGD? [docs](https://pytorch.org/docs/stable/optim.html#torch.optim.Adam)
- What happens if we use other activation functions? [docs](https://pytorch.org/docs/stable/nn.html#non-linear-activations-weighted-sum-nonlinearity)

Hint You can add dropout using:
```
    self.dropout = nn.Dropout(how_many_percent_shall_be_dropped)
    
    def logits(x):
        out_layer1 = self.dropout(self.act1(self.linear1(x)))
        ...
```