In [None]:
import csv
from pathlib import Path

import numpy as np

import torch
from torch import nn
from torch.nn.functional import normalize
from torch.utils.data import DataLoader, Dataset

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"using: {device}")

using: cuda


In [None]:
def one_hot(n, i):
    q = np.zeros(n)
    q[i] = 1.0
    return q


class MNISTDataset(Dataset):
    def __init__(self, fp):

        xy = np.loadtxt(fp, delimiter=',', dtype=np.float32)

        self.x = torch.from_numpy(xy[:, 1:])
        self.x /= 255.0

        labels = []
        for i in xy[:, 0]:
            labels.append(one_hot(10, int(i)))

        self.y = torch.from_numpy(np.array(labels))
        self.n_samples = xy.shape[0]

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

    def __len__(self):
        return self.n_samples

In [None]:
train_dataset = MNISTDataset(Path('/content/sample_data/mnist_train_small.csv'))
test_dataset = MNISTDataset(Path('/content/sample_data/mnist_test.csv'))

In [None]:
batch_size = 128

train_dataloader = DataLoader(train_dataset, batch_size)
test_dataloader = DataLoader(test_dataset, batch_size)

In [None]:
class Perceptron(nn.Module):
    def __init__(self, n_layers, n_hidden, n_in, n_out):
        super().__init__()

        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.n_in = n_in
        self.n_out = n_out

        self.hidden = nn.ModuleList([
            nn.Sequential(
                nn.Linear(self.n_in, self.n_hidden),
                nn.ReLU(),
                nn.Linear(self.n_hidden, self.n_in)
            ) for i in range(self.n_layers)
        ])

        self.classifier = nn.Sequential(
            nn.Linear(self.n_in, self.n_out),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        h = [l(x) for l in self.hidden]
        h = torch.stack(h, 0)
        h = torch.sum(h, 0)
        # h = x + h

        y = self.classifier(h)

        return y

In [None]:
def train(dataloader, model, loss_fn, optimizer):
    model.train()

    for (x, y) in dataloader:
        x, y = x.to(device), y.to(device)
        y_pred = model(x)

        loss = loss_fn(y_pred, y)
        loss.backward()

        optimizer.step()
        optimizer.zero_grad()

In [None]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    nbatches = len(dataloader)

    model.eval()

    loss = 0
    true_positive = 0

    with torch.no_grad():
        for (x, y) in dataloader:
            x, y = x.to(device), y.to(device)

            y_pred = model(x)

            loss += loss_fn(y_pred, y).item()
            true_positive += (y_pred.argmax(1) == y.argmax(1)).type(torch.float).sum().item()

    return (loss / nbatches, true_positive / size)

In [None]:
n_epoch = 16
learning_rate = 1e-2

In [None]:
n_layers = 4
n_hidden = 8
n_in = 784
n_out = 10

model = Perceptron(n_layers, n_hidden, n_in, n_out).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

In [None]:
num_weights = n_in * n_hidden * 2 * n_layers + n_in * n_out
num_biases = n_hidden * n_layers + n_in * n_layers + n_out
model_size = num_weights + num_biases

print(f'model size: {model_size}')
print(f'total weights: {num_weights}')
print(f'total biases: {num_biases}')

model size: 61194
total weights: 58016
total biases: 3178


In [None]:
for i in range(n_epoch):
    train(train_dataloader, model, loss_fn, optimizer)
    loss, acc = test(test_dataloader, model, loss_fn)
    print(f"Epoch: {i + 1}; loss: {round(loss, 3)}; accuracy: {round(acc * 100, 2)}%")

Epoch: 1; loss: 1.563; accuracy: 90.73%
Epoch: 2; loss: 1.543; accuracy: 92.25%
Epoch: 3; loss: 1.538; accuracy: 92.59%
Epoch: 4; loss: 1.528; accuracy: 93.57%
Epoch: 5; loss: 1.528; accuracy: 93.44%
Epoch: 6; loss: 1.524; accuracy: 93.81%
Epoch: 7; loss: 1.523; accuracy: 93.86%
Epoch: 8; loss: 1.521; accuracy: 94.0%
Epoch: 9; loss: 1.519; accuracy: 94.17%
Epoch: 10; loss: 1.517; accuracy: 94.46%
Epoch: 11; loss: 1.514; accuracy: 94.78%
Epoch: 12; loss: 1.516; accuracy: 94.58%
Epoch: 13; loss: 1.511; accuracy: 94.99%
Epoch: 14; loss: 1.513; accuracy: 94.83%
Epoch: 15; loss: 1.513; accuracy: 94.88%
Epoch: 16; loss: 1.512; accuracy: 94.86%
