In [57]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import PIL.Image as Image

Get data

In [58]:
IMAGE_SIZE = (28, 28)
EPOCH = 5
# When weights become NaN, try reducing the learning rate
LEARNING_RATE = 0.00001

# Import data
data = pd.read_csv(r"../digit-recognizer/train.csv")
data_test = pd.read_csv(r"../digit-recognizer/test.csv")    

# Get labels and image array from data
labels : np.ndarray = data.values[:, 0]
images : np.ndarray = data.values[:, 1:].astype('uint8')

images_test : np.ndarray = data_test.values.astype('uint8')

Process data

In [59]:
class Model(nn.Module):
    def __init__(self, input, hidden, output):
        super().__init__()
        self.layer1 = nn.Linear(input, hidden)
        self.layer2 = nn.Linear(hidden, output)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        # torch.sigmoid(x)

        return x

In [60]:
model = Model(IMAGE_SIZE[0] * IMAGE_SIZE[1], 1024, 10)
# Cross Entropy Loss already includes softmax
criterion = nn.CrossEntropyLoss()   
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)
for i in range(EPOCH):
    wrong = 0
    for j in range(len(images)):
        optimizer.zero_grad()
        res = model(torch.tensor(images[j], dtype=torch.float32))
        if (int(torch.argmax(res)) != labels[j]):
            wrong += 1

        # Create label for calculating loss, for sigmoid
        # label_tensor = np.zeros(10)
        # label_tensor[labels[j]] = 1
        # label_tensor = torch.tensor(label_tensor, dtype=torch.float32)
        # print(label_tensor)

        # CrossEntropyLoss, target tensor only contains one label output
        label_tensor = torch.tensor([labels[j]], dtype=torch.long)
        res = torch.reshape(res, (1, 10))
        loss = criterion(res, label_tensor)
        loss.backward()
        optimizer.step()
        
        # print(model.layer1.weight)
        if ((j+1) % 2000 == 0):
            print(f"Data {j+1}: Wrong = {wrong}, Accuracy: {100-wrong/j*100}%")
        
    print(f"Epoch: {i} --> Wrong: {wrong}, Accuracy: {100-wrong / labels.size * 100}%\n")    

# Test model
wrong = 0
for i in range(100):
    res = torch.argmax(model(torch.tensor(images_test[i], dtype=torch.float32)))
    img = Image.fromarray(images_test[i].reshape(IMAGE_SIZE))
    print(f"Image {i+1}: {res}")
    img.save(f"./test/{str(i+1)}_ans={res}.jpg")


Data 2000: Wrong = 600, Accuracy: 69.98499249624813%
Data 4000: Wrong = 970, Accuracy: 75.743935983996%
Data 6000: Wrong = 1295, Accuracy: 78.41306884480747%
Data 8000: Wrong = 1637, Accuracy: 79.53494186773347%
Data 10000: Wrong = 1925, Accuracy: 80.74807480748075%
Data 12000: Wrong = 2234, Accuracy: 81.38178181515127%
Data 14000: Wrong = 2549, Accuracy: 81.79155653975283%
Data 16000: Wrong = 2837, Accuracy: 82.26764172760798%
Data 18000: Wrong = 3132, Accuracy: 82.59903327962664%
Data 20000: Wrong = 3398, Accuracy: 83.00915045752288%
Data 22000: Wrong = 3696, Accuracy: 83.19923632892404%
Data 24000: Wrong = 3987, Accuracy: 83.38680778365764%
Data 26000: Wrong = 4264, Accuracy: 83.59936920650794%
Data 28000: Wrong = 4556, Accuracy: 83.72799028536733%
Data 30000: Wrong = 4839, Accuracy: 83.86946231541052%
Data 32000: Wrong = 5091, Accuracy: 84.09012781649426%
Data 34000: Wrong = 5355, Accuracy: 84.24953675108091%
Data 36000: Wrong = 5617, Accuracy: 84.39678879968888%
Data 38000: Wrong 