Environment: pytorch

# <font color = 'purple'> Feed Forward Neural Network
In this notebook, we build and tune a feed forward neural network to classify the MNIST samples as one of 10 digits: 0-9.

In [6]:
import pandas as pd
import numpy as np
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import optuna
from optuna.trial import TrialState

In [7]:
os.chdir(r"C:\Users\sharo\Documents\Postgrad\My Data Science Portfolio\Classification - MNIST")
os.getcwd()

'C:\\Users\\sharo\\Documents\\Postgrad\\My Data Science Portfolio\\Classification - MNIST'

In [8]:
my_seed = 101

## <font color = 'blue'> Import Training Data & Reserve Mock-Test data

In [9]:
def get_train_test_df(fp, label_colname, my_seed=None):
    """
    Function to import raw data, carry out pre-processing, and split into training and test datasets.
    Test data will be reserved for final evaluation of model performance (i.e. not for hyperparameter tuning)

    :param fp: filepath
    :param label_colname: name of column containing labels
    :param my_seed: integer to be used to fix random state for train_test_split

    :return: tuple of dataframes - training_df, test_df
    """

    # import data
    df = pd.read_csv(fp)

    # Standard scaling of features 
    scaler = StandardScaler()
    df[df.drop(columns=label_colname).columns] = scaler.fit_transform(df[df.drop(columns=label_colname).columns])

    # separate into training & test datasets.
    # Stratification is used to ensure training and test sets have representative proportions of all classes
    # Given the large size of the dataset, we are able to use reserve a slightly larger test set (30%) whilst
    # retaining adequate data for training
    training_df, test_df = train_test_split(df, test_size=0.3, random_state=my_seed, stratify=df[label_colname])

    return training_df, test_df

In [11]:
# we use the filtered training data - i.e. after removing pixels which are always zero
training_df, mytest_df = get_train_test_df("train_filtered.csv", label_colname='label', my_seed = my_seed)

In [12]:
training_df.head()

Unnamed: 0,label,pixel12,pixel13,pixel14,pixel15,pixel32,pixel33,pixel34,pixel35,pixel36,...,pixel770,pixel771,pixel772,pixel773,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779
26260,4,-0.005281,-0.006878,-0.00488,-0.00488,-0.00488,-0.005629,-0.009321,-0.0118,-0.016306,...,-0.060228,-0.056359,-0.051608,-0.042793,-0.034737,-0.02527,-0.018026,-0.011473,-0.009099,-0.006897
6235,2,-0.005281,-0.006878,-0.00488,-0.00488,-0.00488,-0.005629,-0.009321,-0.0118,-0.016306,...,-0.060228,-0.056359,-0.051608,-0.042793,-0.034737,-0.02527,-0.018026,-0.011473,-0.009099,-0.006897
36257,7,-0.005281,-0.006878,-0.00488,-0.00488,-0.00488,-0.005629,-0.009321,-0.0118,-0.016306,...,-0.060228,-0.056359,-0.051608,-0.042793,-0.034737,-0.02527,-0.018026,-0.011473,-0.009099,-0.006897
10353,1,-0.005281,-0.006878,-0.00488,-0.00488,-0.00488,-0.005629,-0.009321,-0.0118,-0.016306,...,-0.060228,-0.056359,-0.051608,-0.042793,-0.034737,-0.02527,-0.018026,-0.011473,-0.009099,-0.006897
17347,0,-0.005281,-0.006878,-0.00488,-0.00488,-0.00488,-0.005629,-0.009321,-0.0118,-0.016306,...,-0.060228,-0.056359,-0.051608,-0.042793,-0.034737,-0.02527,-0.018026,-0.011473,-0.009099,-0.006897


## <font color = 'blue'>Train Model
**Functions**

In [13]:
n_features = training_df.shape[1] - 1  # number of features in feature matrix.
n_classes = len(training_df['label'].unique())  # number of unique classes.

print(f"No. of features: {n_features}")
print(f"No. of classes: {n_classes}")

No. of features: 708
No. of classes: 10


In [14]:
class MyDataset(Dataset): # inherits properties of pytorch Dataset class
    def __init__(self, dataframe, label_colname=None, blind_test=False):
        """
            Class initialisation
            :param dataframe: pandas dataframe including features and labels
            :param label_colname: name of column containing labels
            :param blind_test: Boolean. True means dataframe does not include labels (i.e. test set)
            """
        self.blind_test = blind_test

        if blind_test:  # for blind test (i.e. no label, self.labels does not exist)
            self.features = dataframe.to_numpy()
        else:
            self.features = dataframe.drop(columns=[label_colname]).to_numpy()
            self.labels = dataframe[label_colname].to_numpy()


    def __len__(self):
        """
        :return: length of dataset
        """
        return len(self.features)

    def __getitem__(self, idx):
        """
        Fetches features and label(s) at requested index
        :param idx: requested index
        :return: tuple of numpy arrays - batch_features, batch_labels. For blind test, return only batch_features
        """
        batch_features = self.features[idx,:]
        if self.blind_test:
            return batch_features
        else:
            batch_labels = self.labels[idx]
            return batch_features, batch_labels

In [15]:
def get_train_val_dataloader(training_df, my_batchsize, label_colname, my_seed = None):
    """
    Function to split training data into training and validation subsets and format as dataloaders
    Model performance on validation set will be used for hyperparameter tuning.

    :param training_df: dataframe with full set of training data
    :param my_batchsize: batch size for pytorch DataLoader
    :param label_colname: name of column containing labels
    :param my_seed: optional integer to fix train test split random state

    :return: tuple of pytorch DataLoaders - train_dataloader, val_dataloader
    """

    # separate into training & validation datasets
    train_data, val_data = train_test_split(training_df, test_size = 0.2, random_state = my_seed, stratify=training_df[label_colname])

    #format as pytorch dataloader
    train, val = MyDataset(train_data, label_colname), MyDataset(val_data, label_colname)
    train_dataloader = DataLoader(train, batch_size=my_batchsize, shuffle=True)
    val_dataloader = DataLoader(val, batch_size=my_batchsize)

    return train_dataloader, val_dataloader

In [16]:
def count_correct(predictions, y):
    """
    Counts number of correct predictions in a batch

    :param predictions: 1D tensor with predictions
    :param y: 1D tensor with true classes

    :return: number of correct predictions (pred==y)
    """
    predictions = predictions.numpy()
    y = y.numpy()

    n_correct = (predictions == y).sum()

    return n_correct

In [17]:
def set_parameters(trial):
    """
    Set parameters for neural network, optimisation algorithm etc.

    :param trial: Optuna trial object

    :return: dictionary of parameters:
            - n_layers: number of layers in neural network
            - n_units_l{i}: number of units in layer i
            - dropout_l{i}: dropout probability for layer i (larger = greater regularisation)
            - lr: learning rate
            - batch_size: batch size
            - n_epochs: number of epochs (i.e. number of passes through training data to optimise weights)
            - optimiser: optimisation algorithm to be used
    """
    trial.suggest_int("n_layers", 1, 3)

    for i in range(trial.params['n_layers']):
        trial.suggest_int(f'n_units_l{i}', 2, 20)
        trial.suggest_float(f"dropout_l{i}", 0.1, 1)

    trial.suggest_float("lr", 1e-5, 1e-1, log=True)

    # TODO: try optimising these as well
    trial.suggest_int("batch_size", 100, 100)
    trial.suggest_int("n_epochs", 5, 5)
    trial.suggest_categorical("optimizer", ["SGD"])

    return trial.params

In [18]:
def define_model(my_params):
    """Defines feed-forward neural network based on set parameters

    :param my_params: dictionary of parameters (see set_parameters() for full list)

    :return: nn model
    """

    layers = []

    in_features = n_features  # number of input features for 1st layer = no. of features in feature matrix

    for i in range(my_params['n_layers']):
        # n_inputs = n_outputs of previous layer, n_outputs=no. of units in that lyr
        out_features = my_params[f'n_units_l{i}']
        layers.append(nn.Linear(in_features, out_features))

        layers.append(nn.ReLU())  # activation function

        # drop-out regularisation. (note: drop-out works by zeroing some elements of the tensor. tensor shape is unchanged)
        p = my_params[f"dropout_l{i}"]
        layers.append(nn.Dropout(p))

        in_features = out_features  # no. of inputs for next layer = no. of outputs of this layer

    layers.append(nn.Linear(in_features, n_classes))  # output layer. No. of outputs = no. of unique classes in dataset

    return nn.Sequential(*layers)

In [19]:
def objective(trial):
    """
    Objective for Optuna to optimise
    :param trial: Optuna trial object
    :return: accuracy - fraction of correctly labelled validation points. This is what Optuna seeks to maximise
    """

    #set parameters
    my_params = set_parameters(trial)

    # Instantiate model
    model = define_model(my_params)

    # Instantiate optimizer
    optimizer_name = my_params['optimizer']
    lr = my_params['lr']
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    # get data
    # TODO: notice for each trial, training and validation sets are different. 
    #       Hence, differences in performance may not be due to trial parameters alone. 
    #       Consider implementing cross-validation so that results are less dependent on individual train-test splits
    train_dataloader, val_dataloader = get_train_val_dataloader(training_df, 
                                                                my_batchsize=my_params['batch_size'],
                                                                label_colname='label')
    # train model
    for epoch in range(my_params['n_epochs']):

        #train
        model.train()
        for batch, (X, y) in enumerate(train_dataloader):
            # X and y are tensors. X.size() = (batch_size,n_features), y.size()=(batch_size,)
            # set datatype for compatibility with nn.
            X = X.float()
            y = y.long()

            # calculate model output and resulting loss
            model_output = model(X)  # tensor. size=(batch_size x n_classes)
            loss_fn = nn.CrossEntropyLoss() # instantiate loss function
            loss = loss_fn(model_output, y)

            # Backpropagation to update model weights
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # validate. We do this at each epoch to facilitate pruning:
        # i.e. early termination of trials which are clearly not going to be optimum
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch, (X, y) in enumerate(val_dataloader):
                X = X.float()
                y = y.long()

                # calculate model output and total number of correct predictions for this batch
                model_output = model(X)
                pred = torch.argmax(model_output, dim=1)  # prediction = class with highest output value
                correct += count_correct(pred, y)

        accuracy = correct / len(val_dataloader.dataset)

        # report accuracy to allow Optuna to decide whether to prune this trial
        trial.report(accuracy, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return accuracy  # return final validation accuracy after all epochs (unless pruned)


**Optimise Hyperparameters**

In [20]:
# instantiate optuna study
study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
# run study
study.optimize(objective, n_trials=200)  #, timeout=600)

[32m[I 2023-01-09 12:14:45,916][0m A new study created in memory with name: no-name-b9e41578-f6a4-40e4-a76a-65ce55030d9c[0m
[32m[I 2023-01-09 12:14:53,857][0m Trial 0 finished with value: 0.09642857142857143 and parameters: {'n_layers': 3, 'n_units_l0': 7, 'dropout_l0': 0.5779361244591678, 'n_units_l1': 10, 'dropout_l1': 0.695174009363966, 'n_units_l2': 6, 'dropout_l2': 0.6302159656807829, 'lr': 0.0005139775674364189, 'batch_size': 100, 'n_epochs': 5, 'optimizer': 'SGD'}. Best is trial 0 with value: 0.09642857142857143.[0m
[32m[I 2023-01-09 12:15:01,699][0m Trial 1 finished with value: 0.11156462585034013 and parameters: {'n_layers': 3, 'n_units_l0': 8, 'dropout_l0': 0.4858848631707058, 'n_units_l1': 19, 'dropout_l1': 0.9982793268743696, 'n_units_l2': 4, 'dropout_l2': 0.7491634181909738, 'lr': 0.03183177835097524, 'batch_size': 100, 'n_epochs': 5, 'optimizer': 'SGD'}. Best is trial 1 with value: 0.11156462585034013.[0m
[32m[I 2023-01-09 12:15:08,967][0m Trial 2 finished with

[32m[I 2023-01-09 12:18:00,774][0m Trial 33 finished with value: 0.9136054421768708 and parameters: {'n_layers': 1, 'n_units_l0': 14, 'dropout_l0': 0.16933007869741895, 'lr': 0.0405992582968844, 'batch_size': 100, 'n_epochs': 5, 'optimizer': 'SGD'}. Best is trial 32 with value: 0.9244897959183673.[0m
[32m[I 2023-01-09 12:18:07,603][0m Trial 34 finished with value: 0.911734693877551 and parameters: {'n_layers': 1, 'n_units_l0': 15, 'dropout_l0': 0.2052816430800662, 'lr': 0.043341510061566106, 'batch_size': 100, 'n_epochs': 5, 'optimizer': 'SGD'}. Best is trial 32 with value: 0.9244897959183673.[0m
[32m[I 2023-01-09 12:18:09,286][0m Trial 35 pruned. [0m
[32m[I 2023-01-09 12:18:11,048][0m Trial 36 pruned. [0m
[32m[I 2023-01-09 12:18:12,914][0m Trial 37 pruned. [0m
[32m[I 2023-01-09 12:18:14,651][0m Trial 38 pruned. [0m
[32m[I 2023-01-09 12:18:16,602][0m Trial 39 pruned. [0m
[32m[I 2023-01-09 12:18:20,917][0m Trial 40 pruned. [0m
[32m[I 2023-01-09 12:18:27,793][0m

[32m[I 2023-01-09 12:21:00,930][0m Trial 77 finished with value: 0.9229591836734694 and parameters: {'n_layers': 1, 'n_units_l0': 17, 'dropout_l0': 0.2114677247837765, 'lr': 0.09717617082720979, 'batch_size': 100, 'n_epochs': 5, 'optimizer': 'SGD'}. Best is trial 76 with value: 0.9324829931972789.[0m
[32m[I 2023-01-09 12:21:02,106][0m Trial 78 pruned. [0m
[32m[I 2023-01-09 12:21:04,741][0m Trial 79 pruned. [0m
[32m[I 2023-01-09 12:21:09,146][0m Trial 80 pruned. [0m
[32m[I 2023-01-09 12:21:13,472][0m Trial 81 finished with value: 0.922108843537415 and parameters: {'n_layers': 1, 'n_units_l0': 17, 'dropout_l0': 0.1693456553503564, 'lr': 0.05574670271836947, 'batch_size': 100, 'n_epochs': 5, 'optimizer': 'SGD'}. Best is trial 76 with value: 0.9324829931972789.[0m
[32m[I 2023-01-09 12:21:17,777][0m Trial 82 finished with value: 0.9258503401360544 and parameters: {'n_layers': 1, 'n_units_l0': 17, 'dropout_l0': 0.16256610957186032, 'lr': 0.054653899363573615, 'batch_size': 1

[32m[I 2023-01-09 12:23:09,756][0m Trial 115 finished with value: 0.9284013605442177 and parameters: {'n_layers': 1, 'n_units_l0': 19, 'dropout_l0': 0.14836953597807745, 'lr': 0.04229924337542643, 'batch_size': 100, 'n_epochs': 5, 'optimizer': 'SGD'}. Best is trial 76 with value: 0.9324829931972789.[0m
[32m[I 2023-01-09 12:23:13,985][0m Trial 116 finished with value: 0.9268707482993197 and parameters: {'n_layers': 1, 'n_units_l0': 20, 'dropout_l0': 0.1487183101847986, 'lr': 0.04097964079732403, 'batch_size': 100, 'n_epochs': 5, 'optimizer': 'SGD'}. Best is trial 76 with value: 0.9324829931972789.[0m
[32m[I 2023-01-09 12:23:15,062][0m Trial 117 pruned. [0m
[32m[I 2023-01-09 12:23:19,508][0m Trial 118 finished with value: 0.9222789115646258 and parameters: {'n_layers': 1, 'n_units_l0': 20, 'dropout_l0': 0.1166454256797414, 'lr': 0.029964395048632846, 'batch_size': 100, 'n_epochs': 5, 'optimizer': 'SGD'}. Best is trial 76 with value: 0.9324829931972789.[0m
[32m[I 2023-01-09 1

[32m[I 2023-01-09 12:25:09,354][0m Trial 148 finished with value: 0.9329931972789116 and parameters: {'n_layers': 1, 'n_units_l0': 20, 'dropout_l0': 0.13097693500982305, 'lr': 0.0984315854157318, 'batch_size': 100, 'n_epochs': 5, 'optimizer': 'SGD'}. Best is trial 144 with value: 0.938265306122449.[0m
[32m[I 2023-01-09 12:25:13,632][0m Trial 149 finished with value: 0.9312925170068027 and parameters: {'n_layers': 1, 'n_units_l0': 20, 'dropout_l0': 0.1320810720656615, 'lr': 0.09991899175708975, 'batch_size': 100, 'n_epochs': 5, 'optimizer': 'SGD'}. Best is trial 144 with value: 0.938265306122449.[0m
[32m[I 2023-01-09 12:25:17,954][0m Trial 150 finished with value: 0.9268707482993197 and parameters: {'n_layers': 1, 'n_units_l0': 20, 'dropout_l0': 0.13196830353763261, 'lr': 0.09576368327313779, 'batch_size': 100, 'n_epochs': 5, 'optimizer': 'SGD'}. Best is trial 144 with value: 0.938265306122449.[0m
[32m[I 2023-01-09 12:25:22,280][0m Trial 151 finished with value: 0.93197278911

[32m[I 2023-01-09 12:27:13,698][0m Trial 184 finished with value: 0.9307823129251701 and parameters: {'n_layers': 1, 'n_units_l0': 19, 'dropout_l0': 0.198274397563479, 'lr': 0.0838417632474021, 'batch_size': 100, 'n_epochs': 5, 'optimizer': 'SGD'}. Best is trial 144 with value: 0.938265306122449.[0m
[32m[I 2023-01-09 12:27:15,504][0m Trial 185 pruned. [0m
[32m[I 2023-01-09 12:27:19,693][0m Trial 186 finished with value: 0.9273809523809524 and parameters: {'n_layers': 1, 'n_units_l0': 20, 'dropout_l0': 0.1803569353809289, 'lr': 0.08053608343046846, 'batch_size': 100, 'n_epochs': 5, 'optimizer': 'SGD'}. Best is trial 144 with value: 0.938265306122449.[0m
[32m[I 2023-01-09 12:27:23,939][0m Trial 187 finished with value: 0.9297619047619048 and parameters: {'n_layers': 1, 'n_units_l0': 20, 'dropout_l0': 0.12031011524116846, 'lr': 0.06028130471683252, 'batch_size': 100, 'n_epochs': 5, 'optimizer': 'SGD'}. Best is trial 144 with value: 0.938265306122449.[0m
[32m[I 2023-01-09 12:2

In [21]:
# Display study results
pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("\nBest trial:")
best_trial = study.best_trial

print("  Validation Accuracy: ", best_trial.value)

print("  Params: ")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

Study statistics: 
  Number of finished trials:  200
  Number of pruned trials:  69
  Number of complete trials:  131

Best trial:
  Validation Accuracy:  0.938265306122449
  Params: 
    n_layers: 1
    n_units_l0: 20
    dropout_l0: 0.1304193070818448
    lr: 0.08749252019850264
    batch_size: 100
    n_epochs: 5
    optimizer: SGD


**Train final model using hyperparameters from best trial**

In [22]:
def df_to_dataloader(df, my_batchsize, my_shuffle, blind_test = False):
    """
    Function to format dataframe as dataloader
    :param df: dataframe
    :param my_batchsize: batch size for dataloader
    :param my_shuffle: whether to shuffle data at each epoch (True during training)
    :param blind_test: True if df has no labels
    :return: dataloader
    """
    data = MyDataset(df, 'label', blind_test)
    my_dataloader = DataLoader(data, batch_size=my_batchsize, shuffle=my_shuffle)

    return my_dataloader

In [23]:
def train_final_model(my_params):
    """
    Train final model using tuned hyperparameters from best Optuna trial
    :param my_params: dictionary of parameters from Optuna trial object that had best validation accuracy

    :return: pytorch neural network model
    """

    # Instantiate model
    model = define_model(my_params)

    # Instantiate optimizer
    optimizer_name = my_params['optimizer']
    lr = my_params['lr']
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    # get data. Entire training dataset is used here, including validation set
    train_dataloader = df_to_dataloader(training_df, my_batchsize=my_params['batch_size'],
                                                  my_shuffle=True)

    # train model
    for epoch in range(my_params['n_epochs']):
        model.train()
        for batch, (X, y) in enumerate(train_dataloader):
            # X and y are tensors. X.size() = (batch_size,n_features), y.size()=(batch_size,)
            # set datatype for compatibility with nn.
            X = X.float()
            y = y.long()

            # calculate model output and resulting loss
            model_output = model(X)  # tensor. size=(batch_size x n_classes)
            loss_fn = nn.CrossEntropyLoss()  # instantiate loss function
            loss = loss_fn(model_output, y)

            # Backpropagation to update model weights
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    return model

In [24]:
best_params = best_trial.params  # extract optimised hyperparameters
final_model = train_final_model(best_params)  # train final model

**Evaluate final trianing accuracy**

In [27]:
def predict_and_evaluate(model, df):
    """
    Function to run trained and tuned model on provided dataframe to obtain predictions and evaluate
    accuracy

    :param model: trained model
    :param df: dataframe including features and target/label

    :return: accuracy
    """
    my_dataloader = df_to_dataloader(df, my_batchsize=10, my_shuffle=False)

    model.eval()
    correct = 0
    with torch.no_grad():
        for batch, (X, y) in enumerate(my_dataloader):
            X = X.float()
            y = y.long()

            # calculate model output and total number of correct predictions for this batch
            model_output = model(X)
            pred = torch.argmax(model_output, dim=1)  # prediction = class with highest output value
            correct += count_correct(pred, y)

    accuracy = correct / len(my_dataloader.dataset)

    return accuracy

In [28]:
train_acc = predict_and_evaluate(final_model, training_df)
print(f"  Final Training Accuracy: {train_acc}")

  Final Training Accuracy: 0.9522789115646259


## <font color = 'blue'> Mock-Test
Evaluate accuracy on mock-test data - i.e. portion of training data which was reserved at the start, and **not** used for training

In [29]:
test_acc = predict_and_evaluate(final_model, mytest_df)
print(f"  Mock-test Accuracy: {test_acc}")

  Mock-test Accuracy: 0.9338095238095238


## <font color = 'blue'> Save Model

In [None]:
**STOP!! DO NOT OVERWRITE FILE!**
fp = r"Feed Forward Neural Network\ffnn_2.pth"
torch.save(final_model, fp)

## <font color = 'orange'> Test
Evaluate accuracy on kaggle test data

**Import Test Data**

In [30]:
fp = r"test_filtered.csv"
test_df = pd.read_csv(fp)
test_df.head()

Unnamed: 0,pixel12,pixel13,pixel14,pixel15,pixel32,pixel33,pixel34,pixel35,pixel36,pixel37,...,pixel770,pixel771,pixel772,pixel773,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [31]:
# convert to dataloader
test_dataloader = df_to_dataloader(test_df, my_batchsize=100, my_shuffle=False, blind_test=True)

**Import & Run trained model**

In [32]:
# Import trained model
fp = r"Feed Forward Neural Network\ffnn_2.pth"
model = torch.load(fp)

In [33]:
# make predictions
model.eval()
with torch.no_grad():
    for batch, X in enumerate(test_dataloader):
        X = X.float()
        model_output = model(X)
        pred = torch.argmax(model_output, dim=1).numpy()  # prediction = class with highest output value
        if batch==0:
            predictions = pred
        else:
            predictions = np.append(predictions, pred)

In [34]:
# format in kaggle's required format
image_id = np.arange(1,len(predictions)+1)
predictions_df = pd.DataFrame.from_dict(data={'ImageId': image_id, 'Label': predictions})

In [None]:
# Save predictions
**STOP!! DO NOT OVERWRITE FILE!**
fp = r"Feed Forward Neural Network\my_submission_ffnn2.csv"
predictions_df.to_csv(fp, index=False)

**<font color = 'orange'> Test Results (from Kaggle)**

In [35]:
my_results = {'ffnn_1':0.823,
             'ffnn_2': 0.84639}

myresults_df = pd.DataFrame.from_dict(my_results, 'index').reset_index().rename({'index':'model',0:'accuracy'},axis=1)
myresults_df

Unnamed: 0,model,accuracy
0,ffnn_1,0.823
1,ffnn_2,0.84639
