In [65]:
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from torch import nn
from pathlib import Path
import numpy as np

In [66]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
device

'mps'

In [67]:
RANDOM_SEED = 42

In [68]:
df = pd.read_csv('../excel_files/train.csv')

df

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41996,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41997,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41998,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [69]:
X = df.drop(columns='label')
y = df['label']

X

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41998,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [70]:
y

0        1
1        0
2        1
3        4
4        0
        ..
41995    0
41996    1
41997    7
41998    6
41999    9
Name: label, Length: 42000, dtype: int64

In [72]:
X.shape, y.shape, len(X), len(y)

((42000, 784), (42000,), 42000, 42000)

In [73]:
X_tensor = torch.tensor(X.values, dtype=torch.float).to(device)
y_tensor = torch.tensor(y.values, dtype=torch.float).to(device)

X_tensor.shape, y_tensor.shape, len(X_tensor), len(y_tensor), type(X_tensor), type(y_tensor), X_tensor.dtype, y_tensor.dtype, X_tensor.device, y_tensor.device

(torch.Size([42000, 784]),
 torch.Size([42000]),
 42000,
 42000,
 torch.Tensor,
 torch.Tensor,
 torch.float32,
 torch.float32,
 device(type='mps', index=0),
 device(type='mps', index=0))

In [74]:
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=RANDOM_SEED)

X_train.shape, y_train.shape, X_test.shape, y_test.shape, len(X_train), len(y_train), len(X_test), len(y_test), X_train.device

(torch.Size([33600, 784]),
 torch.Size([33600]),
 torch.Size([8400, 784]),
 torch.Size([8400]),
 33600,
 33600,
 8400,
 8400,
 device(type='mps', index=0))

In [85]:
X_train.shape[1], len(y_train.unique())

(784, 10)

In [111]:
class DigitRecognizerModelV0(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Linear(in_features=X_train.shape[1], out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128, out_features=64),
            nn.ReLU(),
            nn.Linear(in_features=64, out_features=32),
            nn.ReLU(),
            nn.Linear(in_features=32, out_features=16),
            nn.ReLU(),
            nn.Linear(in_features=16, out_features=8),
            nn.ReLU(),
            nn.Linear(in_features=8, out_features=len(y_train.unique()))
        )
    def forward(self, x):
        return self.layer_stack(x)

In [112]:
torch.manual_seed(RANDOM_SEED)

model_0 = DigitRecognizerModelV0()
model_0.to(device)
model_0, next(model_0.parameters()).device

(DigitRecognizerModelV0(
   (layer_stack): Sequential(
     (0): Linear(in_features=784, out_features=128, bias=True)
     (1): ReLU()
     (2): Linear(in_features=128, out_features=64, bias=True)
     (3): ReLU()
     (4): Linear(in_features=64, out_features=32, bias=True)
     (5): ReLU()
     (6): Linear(in_features=32, out_features=16, bias=True)
     (7): ReLU()
     (8): Linear(in_features=16, out_features=8, bias=True)
     (9): ReLU()
     (10): Linear(in_features=8, out_features=10, bias=True)
   )
 ),
 device(type='mps', index=0))

In [113]:
y_logits_temp = model_0(X_train)
torch.argmax(torch.softmax(y_logits_temp, dim=1), dim=1).unique()

tensor([0, 1, 2, 4, 5, 6, 7, 8, 9], device='mps:0')

In [115]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_0.parameters(), lr=0.01)

In [116]:
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

In [117]:
torch.manual_seed(RANDOM_SEED)
epochs = 10000
for epoch in range(epochs):
    model_0.train()
    y_logits = model_0(X_train).squeeze()
    loss = loss_fn(y_logits, y_train)
    y_pred = torch.argmax(torch.softmax(y_logits, dim=1), dim=1)
    acc = accuracy_fn(y_true=y_train, y_pred=y_pred)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    model_0.eval()
    with torch.inference_mode():
        test_logits = model_0(X_test).squeeze()
        test_loss = loss_fn(test_logits, y_test)
        test_pred = torch.argmax(torch.softmax(test_logits, dim=1), dim=1)
        test_acc = accuracy_fn(y_true=y_test, y_pred=test_pred)
    
    if (epoch+1) % 100 == 0:
        print(f"Epoch = {epoch+1}/{epochs} | {loss = :.2f} | {acc = :2f}% | {test_loss = :2f} | {test_acc = :2f}%")

Epoch = 100/10000 | loss = 0.96 | acc = 74.994048% | test_loss = 0.947038 | test_acc = 75.107143%
Epoch = 200/10000 | loss = 0.40 | acc = 88.779762% | test_loss = 0.422688 | test_acc = 87.892857%
Epoch = 300/10000 | loss = 0.25 | acc = 92.979167% | test_loss = 0.283766 | test_acc = 92.095238%
Epoch = 400/10000 | loss = 0.20 | acc = 94.437500% | test_loss = 0.236477 | test_acc = 93.309524%
Epoch = 500/10000 | loss = 0.17 | acc = 95.032738% | test_loss = 0.220623 | test_acc = 93.726190%
Epoch = 600/10000 | loss = 0.15 | acc = 95.717262% | test_loss = 0.201603 | test_acc = 94.309524%
Epoch = 700/10000 | loss = 0.12 | acc = 96.437500% | test_loss = 0.183356 | test_acc = 94.880952%
Epoch = 800/10000 | loss = 0.11 | acc = 97.035714% | test_loss = 0.172935 | test_acc = 95.214286%
Epoch = 900/10000 | loss = 0.09 | acc = 97.452381% | test_loss = 0.167027 | test_acc = 95.333333%
Epoch = 1000/10000 | loss = 0.18 | acc = 94.967262% | test_loss = 0.221783 | test_acc = 93.702381%
Epoch = 1100/10000 

In [121]:
MODEL_NAME = "digit_recognizer_model.pth"
MODEL_PATH = Path("../models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME
MODEL_SAVE_PATH

PosixPath('../models/digit_recognizer_model.pth')

In [122]:
torch.save(obj=model_0.state_dict(), f=MODEL_SAVE_PATH)

In [139]:
df_test = pd.read_csv('../excel_files/test.csv')
df_test

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27998,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [140]:
X_test_tensor = torch.tensor(df_test.values, dtype=torch.float).to(device)

X_test_tensor.shape, len(X_test_tensor), X_test_tensor.device, type(X_test_tensor)

(torch.Size([28000, 784]), 28000, device(type='mps', index=0), torch.Tensor)

In [141]:
torch.manual_seed(RANDOM_SEED)
model_0.eval()
with torch.inference_mode():
    y_result_logits = model_0(X_test_tensor).squeeze()
    y_result_labels = torch.argmax(torch.softmax(y_result_logits, dim=1), dim=1)
y_result_labels.unique() 

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], device='mps:0')

In [142]:
y_result_labels_np = y_result_labels.cpu().numpy()

y_result_labels_np

array([2, 0, 9, ..., 3, 9, 2])

In [143]:
df_test

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
27998,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [144]:
df_test['Label'] = y_result_labels_np

df_test

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,Label
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,9
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,9
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,9
27996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
27997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
27998,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,9


In [145]:
df_test.to_csv('../excel_files/result.csv', index=False)