In [1]:
import torch
import torchvision
import numpy as np

In [2]:
from torchvision.datasets import MNIST

In [3]:
dataset = MNIST("./DATA",train=True, download = True)

In [4]:
test_dataset = MNIST("./DATA",train=False, download = True)

In [5]:
len(dataset)

60000

In [6]:
len(test_dataset)

10000

In [7]:
import torchvision.transforms as transforms

In [8]:
dataset = MNIST(root='data/', 
                train=True,
                transform=transforms.ToTensor())

In [9]:
dataset[0][0].shape

torch.Size([1, 28, 28])

In [10]:
test_dataset = MNIST(root='data/', 
                train=False,
                transform=transforms.ToTensor())

In [11]:
test_dataset[0][0].shape

torch.Size([1, 28, 28])

In [12]:
img_tensor, label = dataset[0]
print(img_tensor.shape, label)

torch.Size([1, 28, 28]) 5


In [13]:
print(img_tensor[:,10:15,10:15])
print(torch.max(img_tensor), torch.min(img_tensor))

tensor([[[0.0039, 0.6039, 0.9922, 0.3529, 0.0000],
         [0.0000, 0.5451, 0.9922, 0.7451, 0.0078],
         [0.0000, 0.0431, 0.7451, 0.9922, 0.2745],
         [0.0000, 0.0000, 0.1373, 0.9451, 0.8824],
         [0.0000, 0.0000, 0.0000, 0.3176, 0.9412]]])
tensor(1.) tensor(0.)


In [14]:
import matplotlib.pyplot as plt

plt.imshow(img_tensor[0,10:15,10:15], cmap='gray')

<matplotlib.image.AxesImage at 0x1a8f11a9908>

In [15]:
from torch.utils.data import DataLoader, random_split

In [16]:
train_ds, val_ds = random_split(dataset, lengths=[50000, 10000])

In [17]:
len(train_ds), len(val_ds)

(50000, 10000)

In [18]:
batch_size = 128

train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size)

In [19]:
import torch.nn as nn

input_size = 28*28
num_classes = 10

# Logistic regression model
model = nn.Linear(input_size, num_classes)

In [20]:
print(model.weight.shape)
print(model.bias.shape)

torch.Size([10, 784])
torch.Size([10])


In [21]:
print(list(model.parameters()))

[Parameter containing:
tensor([[-0.0035,  0.0115,  0.0090,  ...,  0.0107, -0.0132, -0.0269],
        [ 0.0261, -0.0333, -0.0086,  ..., -0.0122, -0.0224,  0.0316],
        [-0.0308, -0.0129, -0.0043,  ..., -0.0084,  0.0321, -0.0037],
        ...,
        [-0.0301,  0.0316,  0.0161,  ...,  0.0013,  0.0281, -0.0340],
        [ 0.0239, -0.0131,  0.0100,  ..., -0.0324, -0.0034, -0.0139],
        [-0.0230,  0.0033,  0.0099,  ..., -0.0139, -0.0002,  0.0132]],
       requires_grad=True), Parameter containing:
tensor([ 0.0175,  0.0034, -0.0140, -0.0041, -0.0193,  0.0096, -0.0291,  0.0321,
        -0.0150, -0.0043], requires_grad=True)]


In [22]:
class Mnist_model(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size, num_classes)
    
    def forward(self, xb):
        xb = xb.reshape(-1, 784)
        out = self.linear(xb)
        return out
    
model = Mnist_model()

In [23]:
print(model.linear.weight.shape, model.linear.bias.shape)
list(model.parameters())

torch.Size([10, 784]) torch.Size([10])


[Parameter containing:
 tensor([[ 0.0299, -0.0216, -0.0330,  ..., -0.0030,  0.0144, -0.0019],
         [-0.0047,  0.0352, -0.0098,  ..., -0.0258,  0.0150,  0.0294],
         [-0.0022,  0.0276,  0.0116,  ...,  0.0135,  0.0327,  0.0068],
         ...,
         [-0.0018, -0.0093, -0.0017,  ...,  0.0122,  0.0213, -0.0144],
         [-0.0057,  0.0061,  0.0331,  ...,  0.0027, -0.0032, -0.0274],
         [ 0.0223,  0.0065,  0.0101,  ..., -0.0247,  0.0226,  0.0291]],
        requires_grad=True), Parameter containing:
 tensor([-0.0245, -0.0230,  0.0331, -0.0053,  0.0017,  0.0143, -0.0135,  0.0294,
          0.0078, -0.0107], requires_grad=True)]

In [29]:
import torch.nn.functional as F

In [30]:
for images, labels in train_loader:
    outputs = model(images)
    break

print('outputs.shape : ', outputs.shape)
print('Sample outputs :\n', outputs[:2].data)

outputs.shape :  torch.Size([128, 10])
Sample outputs :
 tensor([[ 0.0298, -0.1496,  0.1138,  0.2199, -0.3235, -0.0331,  0.2815, -0.0655,
         -0.0118,  0.1558],
        [ 0.1059, -0.0663, -0.0028, -0.0043, -0.1502,  0.2219,  0.0995, -0.1235,
         -0.0382, -0.1590]])


In [31]:
probs = F.softmax(outputs, dim=1)

print("Sample probabilities:\n", probs[:2].data)

print("Sum: ", torch.sum(probs[0]).item())

Sample probabilities:
 tensor([[0.0994, 0.0831, 0.1081, 0.1202, 0.0698, 0.0933, 0.1278, 0.0903, 0.0953,
         0.1127],
        [0.1117, 0.0940, 0.1002, 0.1000, 0.0865, 0.1254, 0.1110, 0.0888, 0.0967,
         0.0857]])
Sum:  1.0000001192092896


In [27]:
max_probs, preds = torch.max(probs, dim=1)
print(preds)
print(max_probs)

tensor([2, 3, 6, 8, 5, 5, 5, 5, 7, 9, 5, 5, 6, 5, 5, 6, 1, 6, 5, 5, 5, 6, 9, 8,
        5, 3, 5, 6, 5, 5, 2, 4, 6, 6, 5, 0, 0, 8, 4, 6, 8, 6, 9, 0, 6, 5, 6, 8,
        6, 1, 2, 3, 0, 6, 6, 8, 5, 5, 0, 6, 5, 0, 5, 2, 5, 3, 9, 9, 6, 5, 3, 6,
        3, 6, 6, 2, 5, 5, 8, 2, 8, 5, 8, 5, 5, 9, 3, 8, 0, 5, 2, 8, 5, 4, 5, 3,
        9, 6, 9, 5, 5, 2, 6, 9, 4, 8, 6, 3, 6, 5, 8, 6, 5, 9, 6, 5, 5, 2, 0, 5,
        6, 9, 0, 5, 6, 5, 5, 8])
tensor([0.1203, 0.1205, 0.1177, 0.1243, 0.1236, 0.1310, 0.1402, 0.1374, 0.1205,
        0.1229, 0.1137, 0.1285, 0.1406, 0.1469, 0.1557, 0.1218, 0.1179, 0.1131,
        0.1317, 0.1378, 0.1268, 0.1516, 0.1233, 0.1221, 0.1149, 0.1390, 0.1227,
        0.1260, 0.1385, 0.1336, 0.1402, 0.1270, 0.1364, 0.1225, 0.1390, 0.1216,
        0.1285, 0.1318, 0.1105, 0.1271, 0.1201, 0.1371, 0.1212, 0.1228, 0.1211,
        0.1119, 0.1298, 0.1211, 0.1413, 0.1116, 0.1173, 0.1195, 0.1224, 0.1509,
        0.1289, 0.1320, 0.1121, 0.1292, 0.1516, 0.1137, 0.1427, 0.1164, 0.1309,
       

In [40]:
labels

tensor([8, 3, 9, 2, 8, 2, 4, 5, 2, 0, 3, 7, 3, 3, 8, 2, 0, 1, 2, 9, 9, 9, 8, 2,
        9, 7, 8, 4, 2, 0, 9, 4, 9, 9, 4, 5, 9, 8, 5, 8, 2, 7, 1, 8, 1, 7, 6, 7,
        1, 3, 2, 6, 2, 2, 6, 2, 1, 2, 8, 6, 4, 8, 9, 3, 9, 9, 0, 7, 2, 2, 1, 3,
        8, 6, 1, 7, 4, 2, 0, 5, 0, 2, 0, 1, 4, 8, 4, 9, 4, 8, 7, 8, 8, 2, 9, 4,
        4, 7, 4, 7, 3, 7, 8, 6, 8, 2, 5, 3, 8, 0, 9, 6, 7, 4, 3, 7, 3, 8, 3, 9,
        9, 6, 9, 0, 7, 3, 4, 0])

In [41]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

In [42]:
accuracy(outputs, labels)

tensor(0.0859)

In [43]:
loss_fn = F.cross_entropy

In [44]:
loss = loss_fn(outputs, labels)
print(loss)

tensor(2.3010, grad_fn=<NllLossBackward>)


In [51]:
class MnistModel():
    
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size, num_classes)
        
    def forward(self, xb):
        xb = xb.reshape(-1, 784)
        out = self.linear(xb)
        return out
    
    def training_step(self, batch):
        images, labels = batch
        out = self.linear(images)
        loss = F.cross_entropy(out, labels)
        return loss
    
    def validation_step(self, batch):
        images, labels = batch
        out = self.linear(images)
        loss = F.cross_entropy(out, labels)
        acc = accuracy(out, labels)
        return {'val_loss': loss, 'val_acc': acc}
    
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print(f"Epoch [{epoch}], val_loss:{result['val_loss']:.4f}, val_acc: {result['val_acc']:.4f}")

model = MnistModel()

In [52]:
def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [53]:
result0 = evaluate(model, val_loader)
result0

RuntimeError: size mismatch, m1: [3584 x 28], m2: [784 x 10] at ..\aten\src\TH/generic/THTensorMath.cpp:41