In [1]:
import torch
import torch.nn as nn
import pickle, gzip
from torch.utils.data import Dataset, DataLoader
from matplotlib import pyplot as plt

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
import gzip, pickle

with gzip.open("mnist.pkl.gz", "rb") as f:
    ((x_train, y_train), (x_test, y_test), _) = pickle.load(f, encoding="latin-1")

In [4]:
x_train, y_train, x_test, y_test = map(
    torch.tensor, (x_train, y_train, x_test, y_test)
)

In [5]:
def show(sample: torch.Tensor): plt.imshow(sample.reshape((28, 28)), cmap="gray")

In [6]:
class NumbersDataset(Dataset):
    def __init__(self, X=x_train, y=y_train):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return (self.X[idx], self.y[idx])

In [7]:
digits_train = DataLoader(NumbersDataset(x_train, y_train), batch_size=64, shuffle=True)
digits_test = DataLoader(NumbersDataset(x_test, y_test), batch_size=64)

In [8]:
for images, labels in digits_test:
    image = images
    label = labels
    break

In [8]:
class Model(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.classifier = nn.Sequential(
            nn.Linear(784, 128),
            nn.Linear(128, num_classes)
        )

    def forward(self, xb):
        return self.classifier(xb)

In [9]:
import torch.optim as optim

model = Model().to(device)

optimizer = optim.Adam(model.parameters(), lr=0.0001)
loss_func = nn.CrossEntropyLoss()

In [10]:
epochs = 100
train_losses = []

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for data, labels in digits_train:
        data, labels = data.to(device), labels.to(device)
        optimizer.zero_grad()
        preds = model(data).to(device)
        loss = loss_func(preds, labels.long())
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    train_loss = running_loss / len(digits_train.dataset)
    train_losses.append(train_loss)
    print(f"Epoch {epoch + 1}/{epochs} - Train loss: {train_loss}")

Epoch 1/100 - Train loss: 0.014484068942070008
Epoch 2/100 - Train loss: 0.006281161097288132
Epoch 3/100 - Train loss: 0.00529140009790659
Epoch 4/100 - Train loss: 0.004912906110286713
Epoch 5/100 - Train loss: 0.00469989223241806
Epoch 6/100 - Train loss: 0.004559207837581634
Epoch 7/100 - Train loss: 0.0044576895678043365
Epoch 8/100 - Train loss: 0.004383968220949173
Epoch 9/100 - Train loss: 0.004321312737315893
Epoch 10/100 - Train loss: 0.004262990467697382
Epoch 11/100 - Train loss: 0.004226037752628326
Epoch 12/100 - Train loss: 0.004179195389971137
Epoch 13/100 - Train loss: 0.004144041358232499
Epoch 14/100 - Train loss: 0.004114227118641138
Epoch 15/100 - Train loss: 0.0040870846429467205
Epoch 16/100 - Train loss: 0.004058111084625125
Epoch 17/100 - Train loss: 0.004030489988029003
Epoch 18/100 - Train loss: 0.004012233206480742
Epoch 19/100 - Train loss: 0.003996496716886759
Epoch 20/100 - Train loss: 0.003975837300419807
Epoch 21/100 - Train loss: 0.003954888993650675
E

In [11]:
torch.save(model.state_dict(), "./model.pth")

In [12]:
model.eval()
test_preds = model(digits_test.dataset[:][0].to(device))

In [84]:
digits_test.dataset[0][1]

tensor(3)

In [91]:
test_preds[0]

tensor([-2.7788,  2.5870,  3.7705,  5.7841, -4.1750,  1.9488, -0.3270, -7.4602,
         2.8783, -5.2233], device='cuda:0', grad_fn=<SelectBackward0>)

In [13]:
_, predicted_labels = torch.max(test_preds, 1)

In [14]:
true_labels = digits_test.dataset[:][1]

In [103]:
predicted_labels.size()

torch.Size([10000])

In [15]:
correct = (predicted_labels.to(device) == true_labels.to(device)).sum().item()

In [16]:
correct / 10000

0.9309