In [None]:
#python notebook for a 3-layer neural network on the MNIST dataset

In [1]:
import torch
import torch.nn as nn
import torch.utils.data as Data

import numpy as np
import time

In [2]:
class NeuralNetwork(nn.Module):

    def __init__(self, learning_rate):
        #call the base class's initialisation
        super().__init__()
        
        #learning rate
        self.lr = learning_rate
        
        #define the layers and their sizes
        self.input_layer = nn.Linear(784, 640, bias=False)
        self.hidden_layer = nn.Linear(640, 10, bias=False)
        
        #define activation function
        self.activation = nn.Sigmoid()
        
        #create error/loss function, crossEntropy will include softmax tranformation
        self.error_function = nn.CrossEntropyLoss()
        
        #create optimiser, using simple stochastic gradient descent
        self.optimizer = torch.optim.SGD(self.parameters(), self.lr)

    
    def forward(self, inputs_list):
        
        #combine input layer signals into hidden layer
        hidden_inputs = self.input_layer(inputs_list)
        #apply sigmiod activation function
        hidden_outputs = self.activation(hidden_inputs)
        
        #combine hidden layer signals into output layer
        final_inputs = self.hidden_layer(hidden_outputs)
        #apply sigmiod activation function
        final_outputs = final_inputs
        
        return final_outputs


In [3]:
# learning rate
learning_rate = 0.1

# create instance of neural network
n = NeuralNetwork(learning_rate)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
n = n.to(device)

In [None]:
#https://pjreddie.com/projects/mnist-in-csv/
#!wget https://pjreddie.com/media/files/mnist_train.csv
#!wget https://pjreddie.com/media/files/mnist_test.csv

In [4]:
#load the mnist training data CSV file into np array
train_data = np.genfromtxt("mnist_data/mnist_train.csv", delimiter=',', dtype=np.float32)
#784 columns of training data, normalized
x_train = train_data[:,1:]
x_train = x_train / 255.0
y_train = train_data[:,0]

test_data = np.genfromtxt("mnist_data/mnist_test.csv", delimiter=',', dtype=np.float32)
#test data - 784 columns, normalized
x_test = test_data[:,1:]
x_test = x_test / 255.0
y_test = test_data[:,0]

In [5]:
torch_x_train = torch.from_numpy(x_train)
torch_y_train = torch.from_numpy(y_train).type(torch.LongTensor) 
torch_x_test = torch.from_numpy(x_test)
torch_y_test = torch.from_numpy(y_test).type(torch.LongTensor) 

train_dataset = Data.TensorDataset(torch_x_train, torch_y_train)
test_dataset = Data.TensorDataset(torch_x_test, torch_y_test)

train_loader = Data.DataLoader(dataset=train_dataset, batch_size=100, num_workers=2)
test_loader = Data.DataLoader(dataset=test_dataset, batch_size=100, num_workers=2)

In [6]:
epochs = 30

for epoch in range(epochs):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    test_acc = 0.0
    test_loss = 0.0

    n.train()
    for i, data in enumerate(train_loader):
        inputs = data[0].to(device)
        labels = data[1].to(device)
        
        n.optimizer.zero_grad()
        train_predicted = n.forward(inputs)
        batch_loss = n.error_function(train_predicted, labels)
        batch_loss.backward()
        n.optimizer.step()

        train_acc += np.sum(np.argmax(train_predicted.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()
    
    with torch.no_grad():
        n.eval()
        for i, data in enumerate(test_loader):
            inputs = data[0].to(device)
            labels = data[1].to(device)
            
            test_predicted = n.forward(inputs)
            batch_loss = n.error_function(test_predicted, labels)

            test_acc += np.sum(np.argmax(test_predicted.cpu().data.numpy(), axis=1) == data[1].numpy())
            test_loss += batch_loss.item()

        #print result for each epoch
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch + 1, epochs, time.time()-epoch_start_time, \
             train_acc/train_dataset.__len__(), train_loss/train_dataset.__len__(), test_acc/test_dataset.__len__(), test_loss/test_dataset.__len__()))

[001/030] 6.37 sec(s) Train Acc: 0.685667 Loss: 0.010596 | Val Acc: 0.843600 loss: 0.005146
[002/030] 2.67 sec(s) Train Acc: 0.881850 Loss: 0.004227 | Val Acc: 0.884500 loss: 0.003822
[003/030] 2.65 sec(s) Train Acc: 0.896650 Loss: 0.003590 | Val Acc: 0.900200 loss: 0.003387
[004/030] 2.65 sec(s) Train Acc: 0.903367 Loss: 0.003328 | Val Acc: 0.908100 loss: 0.003178
[005/030] 2.54 sec(s) Train Acc: 0.907533 Loss: 0.003174 | Val Acc: 0.910200 loss: 0.003053
[006/030] 2.74 sec(s) Train Acc: 0.911250 Loss: 0.003067 | Val Acc: 0.912600 loss: 0.002968
[007/030] 2.83 sec(s) Train Acc: 0.913550 Loss: 0.002984 | Val Acc: 0.914200 loss: 0.002902
[008/030] 3.27 sec(s) Train Acc: 0.915900 Loss: 0.002915 | Val Acc: 0.916400 loss: 0.002847
[009/030] 2.86 sec(s) Train Acc: 0.918183 Loss: 0.002853 | Val Acc: 0.917700 loss: 0.002797
[010/030] 3.17 sec(s) Train Acc: 0.919883 Loss: 0.002795 | Val Acc: 0.920400 loss: 0.002748
[011/030] 2.84 sec(s) Train Acc: 0.921400 Loss: 0.002739 | Val Acc: 0.921800 los