<a href="https://colab.research.google.com/github/omidkhalafbeigi/pytorch_mnist_classification/blob/main/PyTorch_MNIST_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch import nn
from torch import optim
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.datasets import load_digits
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [2]:
def shuffle(x : torch.Tensor, y : torch.Tensor):
  idx = torch.randperm(x.shape[0])
  x_randomized = x[idx]
  y_randomized = y[idx]
  
  return x_randomized, y_randomized

In [3]:
dataset = load_digits()
X, y = dataset.data, dataset.target

scaler = MinMaxScaler()
X = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=True)

if torch.cuda.is_available(): device = 'cuda'
else: device = 'cpu'

X_train, y_train = torch.from_numpy(X_train.astype(dtype=np.float32)).to(device), torch.from_numpy(y_train.astype(dtype=np.int64)).to(device)
X_test, y_test = torch.from_numpy(X_test.astype(dtype=np.float32)).to(device), torch.from_numpy(y_test.astype(dtype=np.int64)).to(device)

In [4]:
X_train = X_train.view(-1, 1, 8, 8)
X_test = X_test.view(-1, 1, 8, 8)

In [8]:
batch_size = 64
epochs = 1000

conv2d = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(2, 2), stride=1, device=device)
maxpool2d = nn.MaxPool2d(kernel_size=(2, 2), stride=1)
linear = nn.Linear(in_features=6 * 6 * 16, out_features=20, device=device)
output_layer = nn.Linear(in_features=20, out_features=10, device=device)
parameters = list(conv2d.parameters()) + list(maxpool2d.parameters()) + list(linear.parameters()) + list(output_layer.parameters())

sigmoid = nn.Sigmoid()
optimizer = optim.Adam(params=parameters, lr=0.01)
ce = nn.CrossEntropyLoss()
softmax = nn.Softmax(dim=1)

for epoch in range(epochs):
  X_train, y_train = shuffle(X_train, y_train)
  for batch in range(batch_size, X_train.shape[0], batch_size):
    X = X_train[batch - batch_size : batch]
    y = y_train[batch - batch_size : batch]

    output = conv2d(X)
    output = sigmoid(output)
    output = maxpool2d(output)
    output = output.view(-1, 6 * 6 * 16)
    output = linear(output)
    output = sigmoid(output)
    output = output_layer(output)
    output = softmax(output)
    
    loss = ce(output, y)
    loss.backward()

    optimizer.step()
    optimizer.zero_grad()

  if (epoch % 100) == 0:
    print(f'Epoch: {epoch + 100}')

Epoch: 100
Epoch: 200
Epoch: 300
Epoch: 400
Epoch: 500
Epoch: 600
Epoch: 700
Epoch: 800
Epoch: 900
Epoch: 1000


In [10]:
output = conv2d(X_test)
output = sigmoid(output)
output = maxpool2d(output)
output = output.view(-1, 6 * 6 * 16)
output = linear(output)
output = sigmoid(output)
output = output_layer(output)
output = softmax(output)

y_pred = np.array(output.detach().to('cpu'))
y_pred = np.argmax(y_pred, axis=1)

accuracy = accuracy_score(y_true=np.array(y_test.detach().to('cpu')), y_pred=y_pred)
print(f'Accuracy: {accuracy:0.3f}')

Accuracy: 0.961
