## Multilayer Perceptron 

The following code for an MLP was very lightly modified from https://machinelearningmastery.com/pytorch-tutorial-develop-deep-learning-models/

- We Started with 2 epochs, 2 hidden layers, learning rate 0.015, 2 epochs. Got an F1 score of 0.901. 
- Upped epochs to 10, F1 score was still around 0.9. 
- Added a third hidden layer with RELU activation function. Got an F1 score of 0.97 with 2 epochs. The rest of the testing was done with the additional hidden layer as shown below.

In [1]:
import pandas as pd
# Load the data
data = pd.read_csv('..\Project\eventmatrixlabel.csv')

In [2]:
# pytorch mlp for binary classification
from numpy import vstack
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import sklearn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torch import Tensor
from torch.nn import Linear
from torch.nn import ReLU
from torch.nn import Sigmoid
from torch.nn import Module
from torch.optim import SGD
from torch.nn import BCELoss
from torch.nn.init import kaiming_uniform_
from torch.nn.init import xavier_uniform_
 
# dataset definition
class logDataset(Dataset):
    # load the dataset
    def __init__(self):
        # load the csv file as a dataframe
        # store the inputs and outputs
        self.X = data.drop(['Label', 'BlockId'], axis=1).values[:,:]
        self.y = y = data[['Label']].values[:,-1]
        # ensure input data is floats
        self.X = self.X.astype('float32')
        # label encode target and ensure the values are floats
        self.y = LabelEncoder().fit_transform(self.y)
        self.y = self.y.astype('float32')
        self.y = self.y.reshape((len(self.y), 1))
 
    # number of rows in the dataset
    def __len__(self):
        return len(self.X)
 
    # get a row at an index
    def __getitem__(self, idx):
        return [self.X[idx], self.y[idx]]
 
    # get indexes for train and test rows
    def get_splits(self, n_test=0.15, n_val = 0.05):
        # determine sizes
        test_size = round(n_test * len(self.X))
        val_size = round(n_val * len(self.X))
        train_size = len(self.X) - test_size - val_size
        # calculate the split
        return random_split(self, [train_size, val_size, test_size])
 
 # model definition
class MLP(Module):
    # define model elements
    def __init__(self, n_inputs):
        super(MLP, self).__init__()
        # input to first hidden layer
        self.hidden1 = Linear(n_inputs, 30)
        kaiming_uniform_(self.hidden1.weight, nonlinearity='relu')
        self.act1 = ReLU()
        # second hidden layer
        self.hidden2 = Linear(30, 20)
        kaiming_uniform_(self.hidden2.weight, nonlinearity='relu')
        self.act2 = ReLU()
        # third hidden layer
        self.hidden3 = Linear(20, 10)
        kaiming_uniform_(self.hidden3.weight, nonlinearity='relu')
        self.act3 = ReLU()
        # fourth hidden layer and output
        self.hidden4 = Linear(10, 1)
        xavier_uniform_(self.hidden3.weight)
        self.act4 = Sigmoid()
 
    # forward propagate input
    def forward(self, X):
        # input to first hidden layer
        X = self.hidden1(X)
        X = self.act1(X)
         # second hidden layer
        X = self.hidden2(X)
        X = self.act2(X)
        
        X = self.hidden3(X)
        X = self.act3(X)
        
        # third hidden layer and output
        X = self.hidden4(X)
        X = self.act4(X)
        return X
 
 # prepare the dataset
def prepare_data():
    # load the dataset
    dataset = logDataset()
    # calculate split
    train, val, test = dataset.get_splits()
    # prepare data loaders
    train_dl = DataLoader(train, batch_size=1024, shuffle=True)
    val_dl = DataLoader(train, batch_size=1024, shuffle=True)
    test_dl = DataLoader(test, batch_size=1024, shuffle=False)
    return train_dl, val_dl, test_dl
 
# train the model
def train_model(train_dl, model, num_epochs = 2, lr = 0.01):
    NUM_EPOCHS = 2
    # define the optimization
    criterion = BCELoss()
    optimizer = SGD(model.parameters(), lr=lr, momentum=0.9)
    # enumerate epochs
    for epoch in range(num_epochs):
        # enumerate mini batches
        for i, (inputs, targets) in enumerate(train_dl):
            # clear the gradients
            optimizer.zero_grad()
            # compute the model output
            yhat = model(inputs)
            # calculate loss
            loss = criterion(yhat, targets)
            # credit assignment
            loss.backward()
            # update model weights
            optimizer.step()
        print (f'Epoch: {epoch+1:03d}/{num_epochs:03d} | Loss: {loss:.4f}')
 
 # evaluate the model
def evaluate_model(test_dl, model):
    predictions, actuals = list(), list()
    for i, (inputs, targets) in enumerate(test_dl):
        # evaluate the model on the test set
        yhat = model(inputs)
        # retrieve numpy array
        yhat = yhat.detach().numpy()
        actual = targets.numpy()
        actual = actual.reshape((len(actual), 1))
        # round to class values
        yhat = yhat.round()
        # store
        predictions.append(yhat)
        actuals.append(actual)
    predictions, actuals = vstack(predictions), vstack(actuals)
    # calculate f1 score 
    acc = sklearn.metrics.f1_score(actuals, predictions)
    return acc
 
# make a class prediction for one row of data
def predict(row, model):
    # convert row to data
    row = Tensor([row])
    # make prediction
    yhat = model(row)
    # retrieve numpy array
    yhat = yhat.detach().numpy()
    return yhat
 
# prepare the data
train_dl, val_dl, test_dl = prepare_data()

In [3]:
# define the network
model = MLP(48)
# train the model
train_model(train_dl, model)
#evaluate the model
acc = evaluate_model(val_dl, model)
print('F1 Score: %.3f' % acc)

Epoch: 001/002 | Loss: 0.0520
Epoch: 002/002 | Loss: 0.0047
F1 Score: 0.997


In [4]:
model = MLP(48)
# train the model
train_model(train_dl, model, num_epochs = 10)
#evaluate the model
acc = evaluate_model(val_dl, model)
print('F1 Score: %.3f' % acc)

Epoch: 001/010 | Loss: 0.0706
Epoch: 002/010 | Loss: 0.0112
Epoch: 003/010 | Loss: 0.0016
Epoch: 004/010 | Loss: 0.0010
Epoch: 005/010 | Loss: 0.0020
Epoch: 006/010 | Loss: 0.0004
Epoch: 007/010 | Loss: 0.0012
Epoch: 008/010 | Loss: 0.0238
Epoch: 009/010 | Loss: 0.0001
Epoch: 010/010 | Loss: 0.0002
F1 Score: 1.000


In [5]:
model = MLP(48)
# train the model
train_model(train_dl, model, num_epochs = 20)
#evaluate the model
acc = evaluate_model(val_dl, model)
print('F1 Score: %.3f' % acc)

Epoch: 001/020 | Loss: 0.0790
Epoch: 002/020 | Loss: 0.0571
Epoch: 003/020 | Loss: 0.0074
Epoch: 004/020 | Loss: 0.0053
Epoch: 005/020 | Loss: 0.0033
Epoch: 006/020 | Loss: 0.0021
Epoch: 007/020 | Loss: 0.0011
Epoch: 008/020 | Loss: 0.0005
Epoch: 009/020 | Loss: 0.0192
Epoch: 010/020 | Loss: 0.0011
Epoch: 011/020 | Loss: 0.0002
Epoch: 012/020 | Loss: 0.0008
Epoch: 013/020 | Loss: 0.0009
Epoch: 014/020 | Loss: 0.0020
Epoch: 015/020 | Loss: 0.0004
Epoch: 016/020 | Loss: 0.0000
Epoch: 017/020 | Loss: 0.0004
Epoch: 018/020 | Loss: 0.0001
Epoch: 019/020 | Loss: 0.0003
Epoch: 020/020 | Loss: 0.0005
F1 Score: 1.000


In [6]:
model = MLP(48)
# train the model
train_model(train_dl, model, lr = 0.001, num_epochs = 20)
#evaluate the model
acc = evaluate_model(val_dl, model)
print('F1 Score: %.3f' % acc)

Epoch: 001/020 | Loss: 0.0991
Epoch: 002/020 | Loss: 0.0708
Epoch: 003/020 | Loss: 0.1562
Epoch: 004/020 | Loss: 0.0766
Epoch: 005/020 | Loss: 0.0702
Epoch: 006/020 | Loss: 0.0761
Epoch: 007/020 | Loss: 0.0247
Epoch: 008/020 | Loss: 0.0481
Epoch: 009/020 | Loss: 0.0384
Epoch: 010/020 | Loss: 0.0363
Epoch: 011/020 | Loss: 0.0296
Epoch: 012/020 | Loss: 0.0438
Epoch: 013/020 | Loss: 0.0334
Epoch: 014/020 | Loss: 0.0562
Epoch: 015/020 | Loss: 0.0091
Epoch: 016/020 | Loss: 0.0330
Epoch: 017/020 | Loss: 0.0152
Epoch: 018/020 | Loss: 0.0082
Epoch: 019/020 | Loss: 0.0053
Epoch: 020/020 | Loss: 0.0032
F1 Score: 0.999


In [7]:
model = MLP(48)
# train the model
train_model(train_dl, model, num_epochs = 15)
#evaluate the model
acc = evaluate_model(val_dl, model)
print('F1 Score: %.3f' % acc)

Epoch: 001/015 | Loss: 0.0527
Epoch: 002/015 | Loss: 0.0176
Epoch: 003/015 | Loss: 0.0068
Epoch: 004/015 | Loss: 0.0026
Epoch: 005/015 | Loss: 0.0036
Epoch: 006/015 | Loss: 0.0014
Epoch: 007/015 | Loss: 0.0107
Epoch: 008/015 | Loss: 0.0108
Epoch: 009/015 | Loss: 0.0011
Epoch: 010/015 | Loss: 0.0009
Epoch: 011/015 | Loss: 0.0012
Epoch: 012/015 | Loss: 0.0003
Epoch: 013/015 | Loss: 0.0004
Epoch: 014/015 | Loss: 0.0006
Epoch: 015/015 | Loss: 0.0003
F1 Score: 1.000


Since the final model with 15 epochs and a learning rate of 0.01 performed the best, we will use it as our final neural network and see how the test data performs on it. 

In [8]:
#evaluate the model on the test data. 
acc = evaluate_model(train_dl, model)
print('F1 Score: %.3f' % acc)

F1 Score: 1.000
