In [1]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', as_frame=False, cache=False)

In [2]:
X = mnist.data.astype('float32')
y = mnist.target.astype('int64')

X /= 255

In [3]:
# Split the data into training and test data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42)

In [4]:
import torch
from torch import nn
import torch.nn.functional as F

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [6]:
import numpy as np

mnist_dim = X.shape[1]
hidden_dim = int(mnist_dim/8)
output_dim = len(np.unique(mnist.target))

In [7]:
mnist_dim, hidden_dim, output_dim

(784, 98, 10)

In [8]:
class ClassifierModule(nn.Module):
    def __init__(
            self,
            input_dim=mnist_dim,
            hidden_dim=hidden_dim,
            output_dim=output_dim,
            dropout=0.5,
    ):
        super(ClassifierModule, self).__init__()
        self.dropout = nn.Dropout(dropout)

        self.hidden = nn.Linear(input_dim, hidden_dim)
        self.output = nn.Linear(hidden_dim, output_dim)

    def forward(self, X, **kwargs):
        X = F.relu(self.hidden(X))
        X = self.dropout(X)
        X = F.softmax(self.output(X), dim=-1)
        return X

In [10]:
!pip install skorch

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting skorch
  Downloading skorch-0.12.0-py3-none-any.whl (185 kB)
[K     |████████████████████████████████| 185 kB 14.1 MB/s 
Installing collected packages: skorch
Successfully installed skorch-0.12.0


In [11]:
from skorch import NeuralNetClassifier

In [12]:
torch.manual_seed(0)

net = NeuralNetClassifier(
    ClassifierModule,
    max_epochs=20,
    lr=0.1,
    device=device,
)

In [13]:
net.fit(X_train, y_train)

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m0.7541[0m       [32m0.9033[0m        [35m0.3506[0m  3.4516
      2        [36m0.4041[0m       [32m0.9218[0m        [35m0.2732[0m  1.1447
      3        [36m0.3367[0m       [32m0.9332[0m        [35m0.2362[0m  1.1315
      4        [36m0.3015[0m       [32m0.9405[0m        [35m0.2089[0m  1.1368
      5        [36m0.2732[0m       [32m0.9446[0m        [35m0.1905[0m  1.1416
      6        [36m0.2544[0m       [32m0.9489[0m        [35m0.1786[0m  1.1011
      7        [36m0.2431[0m       [32m0.9523[0m        [35m0.1683[0m  1.1324
      8        [36m0.2298[0m       [32m0.9547[0m        [35m0.1585[0m  1.1253
      9        [36m0.2171[0m       [32m0.9562[0m        [35m0.1527[0m  1.1519
     10        [36m0.2106[0m       [32m0.9572[0m        [35m0.1464[0m  1.1333
     11        [36m0.2031[0m       [32m0.96

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=ClassifierModule(
    (dropout): Dropout(p=0.5, inplace=False)
    (hidden): Linear(in_features=784, out_features=98, bias=True)
    (output): Linear(in_features=98, out_features=10, bias=True)
  ),
)

In [14]:
y_predicted = net.predict(X_test)
# Calculate the accuracy of the prediction
from sklearn.metrics import accuracy_score
print("Accuracy = {} %".format(accuracy_score(y_test, y_predicted)*100))
# Cross validate the scores
from sklearn.metrics import classification_report
print("Classification Report \n {}".format(classification_report(y_test, y_predicted, labels=range(0,10))))

Accuracy = 96.48571428571428 %
Classification Report 
               precision    recall  f1-score   support

           0       0.98      0.98      0.98       346
           1       0.97      0.98      0.98       378
           2       0.94      0.97      0.95       332
           3       0.97      0.95      0.96       400
           4       0.96      0.97      0.96       295
           5       0.96      0.98      0.97       335
           6       0.96      0.98      0.97       351
           7       0.97      0.96      0.96       362
           8       0.96      0.95      0.96       348
           9       0.97      0.95      0.96       353

    accuracy                           0.96      3500
   macro avg       0.96      0.97      0.96      3500
weighted avg       0.96      0.96      0.96      3500



In [15]:
XCnn = X.reshape(-1, 1, 28, 28)

XCnn.shape

(70000, 1, 28, 28)

In [16]:
XCnn_train, XCnn_test, y_train, y_test = train_test_split(XCnn, y, test_size=0.25, random_state=42)

In [17]:
XCnn_train.shape, y_train.shape

((52500, 1, 28, 28), (52500,))

In [18]:
class Cnn(nn.Module):
    def __init__(self, dropout=0.5):
        super(Cnn, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.conv2_drop = nn.Dropout2d(p=dropout)
        self.fc1 = nn.Linear(1600, 100) # 1600 = number channels * width * height
        self.fc2 = nn.Linear(100, 10)
        self.fc1_drop = nn.Dropout(p=dropout)

    def forward(self, x):
        x = torch.relu(F.max_pool2d(self.conv1(x), 2))
        x = torch.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        
        # flatten over channel, height and width = 1600
        x = x.view(-1, x.size(1) * x.size(2) * x.size(3))
        
        x = torch.relu(self.fc1_drop(self.fc1(x)))
        x = torch.softmax(self.fc2(x), dim=-1)
        return x

In [19]:
torch.manual_seed(0)

cnn = NeuralNetClassifier(
    Cnn,
    max_epochs=10,
    lr=0.002,
    optimizer=torch.optim.Adam,
    device=device,
)

In [20]:
cnn.fit(XCnn_train, y_train);

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m0.4308[0m       [32m0.9724[0m        [35m0.0857[0m  5.4909
      2        [36m0.1625[0m       [32m0.9795[0m        [35m0.0649[0m  1.5242
      3        [36m0.1350[0m       [32m0.9834[0m        [35m0.0540[0m  1.5256
      4        [36m0.1123[0m       [32m0.9844[0m        [35m0.0498[0m  1.5075
      5        [36m0.1007[0m       [32m0.9856[0m        [35m0.0460[0m  1.5376
      6        [36m0.0951[0m       [32m0.9878[0m        [35m0.0389[0m  1.5311
      7        [36m0.0846[0m       0.9873        0.0408  1.5543
      8        [36m0.0839[0m       0.9878        0.0393  1.5213
      9        [36m0.0792[0m       [32m0.9884[0m        [35m0.0372[0m  1.4761
     10        [36m0.0764[0m       [32m0.9885[0m        0.0373  1.5016


In [23]:
y_predicted_cnn = cnn.predict(XCnn_test)
# Calculate the accuracy of the prediction
from sklearn.metrics import accuracy_score
print("Accuracy = {} %".format(accuracy_score(y_test, y_predicted_cnn)*100))
# Cross validate the scores
from sklearn.metrics import classification_report
print("Classification Report \n {}".format(classification_report(y_test, y_predicted_cnn, labels=range(0,10))))

Accuracy = 98.73142857142857 %
Classification Report 
               precision    recall  f1-score   support

           0       0.99      0.99      0.99      1714
           1       1.00      0.99      0.99      1977
           2       0.98      0.99      0.98      1761
           3       1.00      0.99      0.99      1806
           4       0.99      0.99      0.99      1587
           5       0.99      0.98      0.99      1607
           6       0.98      1.00      0.99      1761
           7       0.98      0.99      0.98      1878
           8       0.98      0.98      0.98      1657
           9       0.98      0.98      0.98      1752

    accuracy                           0.99     17500
   macro avg       0.99      0.99      0.99     17500
weighted avg       0.99      0.99      0.99     17500

