In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import torch
import torch.nn as nn
from mlutil.trainer import Trainer
from mlutil.datasets import FashionMNISTSet

In [3]:
class MLPFromScratch(nn.Module):
    def __init__(self, id: int, hd: int, od: int, sigma=0.01) -> None:
        super().__init__()
        
        self.id = id
        
        self.W1 = nn.Parameter(torch.randn(id, hd) * sigma)
        self.b1 = nn.Parameter(torch.zeros(hd))
        self.W2 = nn.Parameter(torch.randn(hd, od) * sigma)
        self.b2 = nn.Parameter(torch.zeros(od))

    def relu(x: torch.Tensor) -> torch.Tensor:
        return torch.max(x, torch.zeros_like(x))
    
    def loss(self, yhat, y):
        # Built in softmax capability
        return torch.nn.CrossEntropyLoss()(yhat, y)
    
    def config_optimizer(self, lr):
        self.opt = torch.optim.SGD(self.parameters(), lr=lr)
        
    def train_step(self, batch):
        X, y = batch
        out = self(X)
        loss = self.loss(out, y)
        loss.backward()
        self.opt.step()
        return loss.item()
    
    def val_step(self, batch):
        with torch.no_grad():
            X, y = batch
            out = self(X)
            loss = self.loss(out, y)
            return loss.item()
        
    def forward(self, X: torch.Tensor) -> torch.Tensor:
        X.resize_(-1, self.id)
        h = self.relu(torch.matmul(X, self.W1) + self.b1)
        return torch.matmul(h, self.W2) + self.b2

In [4]:
idim = 28
dataset = FashionMNISTSet(dim=(idim,idim), batch_size=128)

model = MLPFromScratch(idim*idim, 256, 10, sigma=0.01)
model.config_optimizer(lr=0.01)

trainer = Trainer(epochs=10)