In [None]:
import torch
import torchvision
from torchvision.datasets import MNIST
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
%matplotlib inline

dataset = MNIST(root='data/', download=True)
test_dataset = MNIST(root='data/', train=False)

In [None]:
print(len(dataset))
len(test_dataset)

60000


10000

In [None]:
dataset = MNIST(root='data/', 
                train=True,
                transform=transforms.ToTensor())


In [None]:
from torch.utils.data import random_split

train_ds, val_ds = random_split(dataset, [50000, 10000])
len(train_ds), len(val_ds)

(50000, 10000)

In [None]:
from torch.utils.data import DataLoader

batch_size = 128

train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size)

In [None]:
import torch.nn as nn

input_size = 28*28
num_classes = 10

# Logistic regression model
model = nn.Linear(input_size, num_classes)

In [None]:
#flattens
class MnistModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size, num_classes)
        
    def forward(self, xb):
        xb = xb.reshape(-1, 784)
        out = self.linear(xb)
        return out
    
model = MnistModel()

In [None]:
print(model.linear.weight.shape, model.linear.bias.shape)
list(model.parameters())

torch.Size([10, 784]) torch.Size([10])


[Parameter containing:
 tensor([[ 0.0121, -0.0075,  0.0098,  ...,  0.0285,  0.0337,  0.0260],
         [ 0.0328,  0.0315,  0.0298,  ..., -0.0076, -0.0018,  0.0038],
         [-0.0005, -0.0271,  0.0356,  ..., -0.0047, -0.0268,  0.0060],
         ...,
         [-0.0076, -0.0037,  0.0033,  ..., -0.0309, -0.0321,  0.0104],
         [ 0.0192,  0.0298,  0.0166,  ...,  0.0136, -0.0147, -0.0135],
         [ 0.0148, -0.0314, -0.0150,  ..., -0.0333, -0.0070, -0.0040]],
        requires_grad=True), Parameter containing:
 tensor([ 0.0099, -0.0229,  0.0218,  0.0062, -0.0058,  0.0034, -0.0111,  0.0220,
         -0.0072, -0.0306], requires_grad=True)]

In [None]:
#MNIST model does the flattening
for images, labels in train_loader:
    outputs = model(images)
    break

print('outputs.shape : ', outputs.shape)
print('Sample outputs :\n', outputs[:2].data)

outputs.shape :  torch.Size([128, 10])
Sample outputs :
 tensor([[ 0.1571,  0.0111, -0.0137,  0.2876,  0.1059,  0.2053,  0.1246, -0.1611,
         -0.1704, -0.0261],
        [ 0.2393,  0.1599, -0.1481,  0.3315,  0.0063,  0.3176,  0.1417,  0.0335,
         -0.1298, -0.1271]])


In [None]:
import torch.nn.functional as F
# Apply softmax for each output row
probs = F.softmax(outputs, dim=1)

# Look at sample probabilities
print("Sample probabilities:\n", probs[:2].data)

# Add up the probabilities of an output row
print("Sum: ", torch.sum(probs[0]).item())

Sample probabilities:
 tensor([[0.1100, 0.0950, 0.0927, 0.1253, 0.1045, 0.1154, 0.1064, 0.0800, 0.0792,
         0.0915],
        [0.1152, 0.1064, 0.0782, 0.1264, 0.0913, 0.1246, 0.1045, 0.0938, 0.0797,
         0.0799]])
Sum:  0.9999999403953552


In [None]:
max_probs, preds = torch.max(probs, dim=1)
print(preds)
print(max_probs)

tensor([3, 3, 4, 6, 3, 7, 4, 3, 6, 6, 6, 7, 3, 8, 5, 5, 6, 4, 6, 3, 3, 6, 3, 3,
        4, 3, 3, 3, 0, 3, 5, 6, 3, 5, 6, 4, 5, 6, 3, 3, 5, 5, 6, 3, 5, 6, 4, 8,
        3, 3, 3, 5, 0, 5, 3, 6, 8, 3, 6, 5, 5, 3, 8, 3, 4, 3, 3, 4, 3, 5, 3, 3,
        3, 3, 3, 3, 7, 5, 3, 3, 0, 3, 5, 5, 7, 5, 0, 6, 5, 3, 6, 0, 4, 3, 6, 9,
        5, 6, 6, 3, 6, 3, 3, 6, 3, 3, 5, 3, 5, 3, 3, 6, 3, 6, 8, 6, 6, 3, 3, 3,
        6, 3, 5, 3, 6, 3, 3, 2])
tensor([0.1253, 0.1264, 0.1212, 0.1169, 0.1406, 0.1257, 0.1317, 0.1208, 0.1299,
        0.1504, 0.1293, 0.1153, 0.1347, 0.1199, 0.1234, 0.1130, 0.1484, 0.1382,
        0.1587, 0.1551, 0.1519, 0.1158, 0.1370, 0.1231, 0.1135, 0.1191, 0.1405,
        0.1365, 0.1226, 0.1474, 0.1327, 0.1375, 0.1637, 0.1103, 0.1463, 0.1250,
        0.1152, 0.1235, 0.1339, 0.1214, 0.1169, 0.1341, 0.1377, 0.1150, 0.1369,
        0.1339, 0.1111, 0.1169, 0.1235, 0.1352, 0.1256, 0.1281, 0.1254, 0.1341,
        0.1340, 0.1255, 0.1127, 0.1371, 0.1357, 0.1150, 0.1301, 0.1264, 0.1292,
       

In [None]:
labels

tensor([9, 8, 9, 4, 7, 0, 0, 1, 6, 6, 8, 7, 1, 3, 3, 1, 5, 8, 4, 6, 8, 1, 2, 1,
        2, 5, 5, 9, 5, 2, 7, 9, 1, 1, 0, 6, 3, 2, 5, 9, 1, 7, 0, 1, 3, 9, 1, 9,
        8, 1, 5, 7, 7, 3, 8, 2, 2, 5, 4, 3, 1, 1, 3, 3, 5, 2, 1, 8, 1, 1, 9, 6,
        1, 9, 7, 9, 4, 7, 3, 8, 7, 9, 9, 7, 4, 9, 9, 7, 3, 0, 4, 0, 8, 3, 7, 2,
        4, 6, 2, 6, 0, 2, 3, 6, 2, 9, 1, 1, 1, 4, 7, 5, 1, 3, 6, 0, 5, 9, 6, 2,
        8, 8, 1, 9, 6, 8, 7, 4])

In [None]:
def accuracy(outputs, labels):
    _,preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))



In [None]:
accuracy(outputs, labels)

tensor(0.0859)

In [None]:
loss_fn = F.cross_entropy
loss = loss_fn(outputs, labels)
print(loss)

tensor(2.3134, grad_fn=<NllLossBackward>)


In [None]:
class MnistModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size, num_classes)
        
    def forward(self, xb):
        xb = xb.reshape(-1, 784)
        out = self.linear(xb)
        return out
    
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss, 'val_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, result['val_loss'], result['val_acc']))
    
model = MnistModel()

In [None]:
def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [None]:
result0 = evaluate(model, val_loader)
result0

{'val_acc': 0.18334652483463287, 'val_loss': 2.2879199981689453}

In [None]:
history1 = fit(50, 0.001, model, train_loader, val_loader)

Epoch [0], val_loss: 0.5774, val_acc: 0.8611
Epoch [1], val_loss: 0.5715, val_acc: 0.8620
Epoch [2], val_loss: 0.5658, val_acc: 0.8629
Epoch [3], val_loss: 0.5604, val_acc: 0.8630
Epoch [4], val_loss: 0.5553, val_acc: 0.8638
Epoch [5], val_loss: 0.5504, val_acc: 0.8645
Epoch [6], val_loss: 0.5458, val_acc: 0.8658
Epoch [7], val_loss: 0.5413, val_acc: 0.8663
Epoch [8], val_loss: 0.5370, val_acc: 0.8668
Epoch [9], val_loss: 0.5329, val_acc: 0.8671
Epoch [10], val_loss: 0.5290, val_acc: 0.8672
Epoch [11], val_loss: 0.5252, val_acc: 0.8679
Epoch [12], val_loss: 0.5216, val_acc: 0.8688
Epoch [13], val_loss: 0.5181, val_acc: 0.8703
Epoch [14], val_loss: 0.5148, val_acc: 0.8712
Epoch [15], val_loss: 0.5115, val_acc: 0.8717
Epoch [16], val_loss: 0.5084, val_acc: 0.8723
Epoch [17], val_loss: 0.5054, val_acc: 0.8722
Epoch [18], val_loss: 0.5025, val_acc: 0.8722
Epoch [19], val_loss: 0.4997, val_acc: 0.8729
Epoch [20], val_loss: 0.4970, val_acc: 0.8735
Epoch [21], val_loss: 0.4943, val_acc: 0.873