# MNIST with SciKit-Learn and Skorch
This notebooks shows how to define and train a simple Neural-Network with PyTorch and use it via Skorch with SciKit-Learn.

In [1]:
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
import numpy as np

## Loading Data
Using SciKit-Learns ```fetch_mldata``` to load MNIST data.

In [2]:
mnist = fetch_mldata('MNIST original')

In [3]:
mnist

{'COL_NAMES': ['label', 'data'],
 'DESCR': 'mldata.org dataset: mnist-original',
 'data': array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ..., 
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
 'target': array([ 0.,  0.,  0., ...,  9.,  9.,  9.])}

In [4]:
mnist.data.shape

(70000, 784)

## Preprocessing Data
Each image of the MNIST dataset is encoded in a 784 dimensional vector, representing a 28 x 28 pixel image. Each pixel has a value between 0 and 255, corresponding to the grey-value of a pixel.<br />
If the above ```featch_mldata``` is used to load MNIST, ```data``` and ```target``` are encoded as ```uint8```, which cannot be processed by Skorch. Skorch accepts only ```float32``` or ```int64``` as types. As a consequence ```data``` is transformed to ```float32``` and scaled to [0, 1]. ```target``` is transformed to ```int64```. 

In [5]:
X = mnist.data.astype('float32')
y = mnist.target.astype('int64')

In [6]:
X /= 255.0

In [7]:
X.min(), X.mean(), X.std(), X.max()

(0.0, 0.13092543, 0.30844888, 1.0)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [9]:
assert(X_train.shape[0] + X_test.shape[0] == mnist.data.shape[0])

In [10]:
X_train.shape, y_train.shape

((52500, 784), (52500,))

## Build Neural Network with Torch
Simple, fully connected neural network with one hidden layer. Input layer has 784 dimensions (28x28), hidden layer has 98 (= 784 / 8) neurons, output layer 10 neurons, representing digits 0 - 9.

In [11]:
import torch
from torch import nn
import torch.nn.functional as F

In [12]:
torch.manual_seed(0);

In [13]:
mnist_dim = X.shape[1]
hidden_dim = int(mnist_dim/8)
output_dim = len(np.unique(mnist.target))

In [14]:
mnist_dim, hidden_dim, output_dim

(784, 98, 10)

A Neural network in PyTorch's framework.

In [15]:
class ClassifierModule(nn.Module):
    def __init__(
            self,
            input_dim=mnist_dim,
            hidden_dim=hidden_dim,
            output_dim=output_dim,
            nonlin=F.relu,
            dropout=0.5,
    ):
        super(ClassifierModule, self).__init__()
        self.nonlin = nonlin
        self.dropout = nn.Dropout(dropout)

        self.hidden = nn.Linear(input_dim, hidden_dim)
        self.output = nn.Linear(hidden_dim, output_dim)

    def forward(self, X, **kwargs):
        X = self.nonlin(self.hidden(X))
        X = self.dropout(X)
        #X = F.relu(self.hidden(X))
        X = F.softmax(self.output(X))
        return X

Skorch allows to use PyTorch's networks in the SciKit-Learn setting.

In [16]:
from skorch.net import NeuralNetClassifier

In [17]:
net = NeuralNetClassifier(
    ClassifierModule,
    max_epochs=20,
    lr=0.1,
    # use_cuda=True,  # uncomment this to train with CUDA
)

In [18]:
net.fit(X_train, y_train)

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m0.8588[0m       [32m0.9036[0m        [35m0.3782[0m  1.3989
      2        [36m0.4356[0m       [32m0.9226[0m        [35m0.2868[0m  1.6909
      3        [36m0.3639[0m       [32m0.9328[0m        [35m0.2432[0m  1.6691
      4        [36m0.3271[0m       [32m0.9404[0m        [35m0.2174[0m  1.5587
      5        [36m0.2963[0m       [32m0.9465[0m        [35m0.1957[0m  1.6026
      6        [36m0.2745[0m       [32m0.9499[0m        [35m0.1798[0m  1.5944
      7        [36m0.2576[0m       [32m0.9510[0m        [35m0.1730[0m  1.5852
      8        [36m0.2460[0m       [32m0.9550[0m        [35m0.1579[0m  1.6465
      9        [36m0.2332[0m       [32m0.9560[0m        [35m0.1498[0m  1.3179
     10        [36m0.2254[0m       [32m0.9594[0m        [35m0.1429[0m  1.6584
     11        [36m0.2186[0m       [32m0.96

<skorch.net.NeuralNetClassifier at 0x7f6239f7c278>

## Prediction

In [19]:
predicted = net.predict(X_test)

In [20]:
np.mean(predicted == y_test)

0.96537142857142855

An accuracy of nearly 97% for network with only one hidden layer is not too bad

# Convolutional Network

In [21]:
XCnn = X.reshape(-1, 1, 28, 28)

In [22]:
XCnn.shape

(70000, 1, 28, 28)

In [23]:
XCnn_train, XCnn_test, y_train, y_test = train_test_split(XCnn, y, test_size=0.25, random_state=42)

In [24]:
XCnn_train.shape, y_train.shape

((52500, 1, 28, 28), (52500,))

In [25]:
class Cnn(nn.Module):
    def __init__(self):
        super(Cnn, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(1600, 128) # 1600 = number channels * width * height
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, x.size(1) * x.size(2) * x.size(3)) # flatten over channel, width and height = 1600
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        x = F.softmax(x)
        return x

In [26]:
cnn = NeuralNetClassifier(
    Cnn,
    max_epochs=15,
    lr=1,
    optimizer=torch.optim.Adadelta,
    # use_cuda=True,  # uncomment this to train with CUDA
)

In [27]:
cnn.fit(XCnn_train, y_train)

  epoch    train_loss    valid_acc    valid_loss      dur
-------  ------------  -----------  ------------  -------
      1        [36m0.4442[0m       [32m0.9747[0m        [35m0.0861[0m  20.1571
      2        [36m0.1471[0m       [32m0.9802[0m        [35m0.0631[0m  19.8584
      3        [36m0.1094[0m       [32m0.9845[0m        [35m0.0515[0m  20.5175
      4        [36m0.0931[0m       [32m0.9849[0m        [35m0.0482[0m  20.1864
      5        [36m0.0814[0m       [32m0.9866[0m        [35m0.0388[0m  19.7448
      6        [36m0.0727[0m       [32m0.9871[0m        [35m0.0365[0m  20.1205
      7        [36m0.0684[0m       [32m0.9882[0m        [35m0.0357[0m  20.2718
      8        [36m0.0645[0m       [32m0.9887[0m        [35m0.0331[0m  20.6907
      9        [36m0.0575[0m       0.9880        0.0344  20.1551
     10        [36m0.0566[0m       [32m0.9894[0m        [35m0.0315[0m  20.5150
     11        [36m0.0523[0m       0.9891        

<skorch.net.NeuralNetClassifier at 0x7f6239f7ccc0>

In [28]:
cnn_pred = cnn.predict(XCnn_test)

In [29]:
np.mean(cnn_pred == y_test)

0.99085714285714288