In [1]:
from fastai.vision.all import *
from fastbook import *
import pandas as pd

In [3]:
train = pd.read_csv("mnist_train.csv")
test = pd.read_csv("mnist_test.csv")

training = []
training_labels = []
validation = []
validation_labels = []
testing = []
testing_labels = []

def stacker(df):
    image_list = [torch.tensor(df.iloc[img].values) for img in range(len(df))]
    stacked = torch.stack(image_list).float()/255
    return stacked
def loader(data, labels):
    zipped_data = list(zip(torch.cat(data).view(-1, 28*28),tensor(labels).unsqueeze(1)))
    return DataLoader(zipped_data, batch_size=256, shuffle=True)

for i in range(10):
    
    test_images = test[test["label"] == i].iloc[:, 1:785]
    images = train[train["label"] == i].iloc[:, 1:785]
    
    validation_images = images.iloc[0:(round(len(images)*.2)+1),].reset_index().drop(labels ="index", axis=1)
    train_images = images.iloc[(len(validation_images)+1):,].reset_index().drop(labels ="index", axis=1)
    
    test_stacked = stacker(test_images)
    testing_labels.extend([i]*len(test_stacked))
    testing.append(test_stacked)
    
    valid_stacked = stacker(validation_images)
    validation_labels.extend([i]*len(valid_stacked))
    validation.append(valid_stacked)
    
    train_stacked = stacker(train_images)
    training_labels.extend([i]*len(train_stacked))
    training.append(train_stacked)


testing_data = loader(testing, testing_labels)
validation_data = loader(validation, validation_labels)
training_data = loader(training, training_labels)

In [50]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size1, output_size):
        super(SimpleNN, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size1)
        self.ReLU = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size1,output_size)
        
    def forward(self, x):
        x = self.linear1(x)
        x = self.ReLU(x)
        x = self.linear2(x)
        return x
    
def mnist_loss(prds,trgt):
        prds = prds.sigmoid()
        return torch.where(trgt==1, 1-prds, prds).mean()

def validate_epoch(model, valid_dl):
        accs = [batch_accuracy(model(x),y) for x,y in valid_dl]
        return round(torch.stack(accs).mean().item(), 4)
    
def batch_accuracy(x, y):
        preds = softmax(x)
        predicted_value = torch.argmax(preds, dim=1)
        trgts = y.flatten()
        bools = predicted_value == trgts
        acc = bools.to(torch.float).mean()
        return acc

def softmax(preds):
    preds = preds-torch.max(preds)
    return torch.exp(preds)/torch.sum(torch.exp(preds), dim=1).unsqueeze(1)


def cross_entropy_loss(preds, trgt):
        
        soft = softmax(preds)
        one_hot = torch.zeros(trgt.shape[0], soft.shape[1])
        
        for i in range(one_hot.size(0)):
            index = trgt[i, 0].item()
            one_hot[i, int(index)] = 1
        
        loss = -torch.sum(torch.log(soft)*one_hot)
        return loss

In [80]:
epochs = 1000
lr = 0.0001
model = SimpleNN(28*28, 30, 10)

opt = SGD(model.parameters(), lr)

In [81]:
import time
from IPython.display import display, clear_output

for i in range(epochs):
    for x,y in training_data:
        preds = model(x)
        loss = cross_entropy_loss(preds, y)
        loss.backward()
        for param in model.parameters():
            param.data -= lr*param.grad.data
            param.grad = None
    if loss < 1.5:
        break
    acc = torch.stack([batch_accuracy(softmax(model(x)), y) for x,y in validation_data]).mean()
    clear_output(wait=True)  
    print(f"Loss: {loss} \n model accuracy: {acc*100:.0f}%")
    time.sleep(.01)

Loss: 3.674091100692749 
 model accuracy: 96%
model accuracy: 96%
