In [2]:
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR

In [3]:
class MLP(nn.Module):
    def __init__(self):
        # super(MLP, self).__init__()
        super().__init__()
        self.fc1 = nn.Linear(28*28, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        # [batch_size, 28, 28]
        x = torch.flatten(x, 1)
        # [batch_size, 784]
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        # [batch_size, 10]
        output = F.log_softmax(x, dim=1)
        return output

In [5]:
batch_size = 4
test = torch.randn(batch_size , 28, 28)
# mlp = MLP()
mlp(test)



tensor([[-2.3046, -2.2313, -2.2200, -2.4302, -2.3385, -2.4150, -2.1886, -2.2187,
         -2.3863, -2.3274],
        [-2.3174, -2.3721, -2.3289, -2.2435, -2.2569, -2.3673, -2.2064, -2.3383,
         -2.4233, -2.1977],
        [-2.3678, -2.3552, -2.2300, -2.3164, -2.2365, -2.3648, -2.2031, -2.4101,
         -2.3163, -2.2483],
        [-2.3015, -2.3706, -2.3361, -2.2986, -2.2561, -2.2894, -2.2061, -2.3180,
         -2.3631, -2.2972]], grad_fn=<LogSoftmaxBackward0>)

In [15]:
transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])
dataset1 = datasets.MNIST('../data', train=True, download=True,
                    transform=transform)
dataset2 = datasets.MNIST('../data', train=False,
                    transform=transform)
# defaults to shuffle = true
train_loader = torch.utils.data.DataLoader(dataset1, batch_size = 64, shuffle= True)
test_loader = torch.utils.data.DataLoader(dataset2, batch_size = 64, shuffle = False )

In [17]:
next(iter(train_loader))

[tensor([[[[-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
           [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
           [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
           ...,
           [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
           [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
           [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242]]],
 
 
         [[[-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
           [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
           [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
           ...,
           [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
           [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
           [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242]]],
 
 
         [[[-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
       

In [20]:
# mlp = MLP().to(device="cuda")
mlp = MLP().cuda()
num_epochs = 10

optimizer = optim.Adam(mlp.parameters(), lr = 1e-4)

for epoch in range(num_epochs):
    for x, y in train_loader:
        x = x.cuda()
        y = y.cuda()
        optimizer.zero_grad()
        pred_y = mlp(x)
        loss = F.nll_loss(pred_y, y)
        loss.backward()
        optimizer.step()
    if epoch % 1 == 0:
        print("Epoch: ", epoch, " Loss = ", loss)


Epoch:  0  Loss =  tensor(0.1669, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch:  1  Loss =  tensor(0.0869, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch:  2  Loss =  tensor(0.0565, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch:  3  Loss =  tensor(0.0433, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch:  4  Loss =  tensor(0.0366, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch:  5  Loss =  tensor(0.0311, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch:  6  Loss =  tensor(0.0289, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch:  7  Loss =  tensor(0.0267, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch:  8  Loss =  tensor(0.0254, device='cuda:0', grad_fn=<NllLossBackward0>)
Epoch:  9  Loss =  tensor(0.0234, device='cuda:0', grad_fn=<NllLossBackward0>)


In [22]:
mlp.eval()

correct= 0

for x, y in test_loader:
    x = x.cuda()
    y = y.cuda()
    y_prob = mlp(x)
    # pred_y = torch.argmax(pred_y, dim = 1)
    pred = y_prob.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    correct += pred.eq(y.view_as(pred)).sum().item()

print("correct = ", correct/len(test_loader.dataset))



correct =  0.9673
