<a href="https://colab.research.google.com/github/rajgupt/dl-notebooks/blob/main/pytorch/3_ann.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
import os, torch, numpy as np
from torch import nn
from torch.utils.data import DataLoader
from torchvision import transforms, datasets

In [30]:
from torchvision.transforms import ToTensor, Lambda

training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

In [31]:
train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

In [32]:
# create a nn

class NeuralNetwork(nn.Module):
  def __init__(self):
    super().__init__()
    self.flatten = nn.Flatten()
    self.linear_relu_stack = nn.Sequential(
        nn.Linear(28*28, 512),
        nn.ReLU(),
        nn.Linear(512, 512),
        nn.ReLU(),
        nn.Linear(512,10),
        nn.ReLU(),
    )

  def forward(self, x):
    x = self.flatten(x)
    return self.linear_relu_stack(x)


In [33]:
model = NeuralNetwork()
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
)


In [34]:
for m in model.parameters():
  print(m.shape)

torch.Size([512, 784])
torch.Size([512])
torch.Size([512, 512])
torch.Size([512])
torch.Size([10, 512])
torch.Size([10])


In [35]:
print("Model structure: ", model, "\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")


Model structure:  NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
) 


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[-0.0312,  0.0065, -0.0286,  ..., -0.0001, -0.0326,  0.0282],
        [ 0.0191, -0.0165,  0.0154,  ...,  0.0212,  0.0077,  0.0296]],
       grad_fn=<SliceBackward>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0325,  0.0265], grad_fn=<SliceBackward>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[-0.0241,  0.0045, -0.0297,  ...,  0.0364, -0.0039,  0.0006],
        [ 0.0160, -0.0060,  0.0305,  ..., -0.0346, -0.0047, -0.0286]],
       grad_fn=<SliceBackward>) 

Layer: linear_relu_

In [36]:
learning_rate = 1e-3
batch_size = 64
epochs = 5

In [37]:
# Initialize the loss function
loss_fn = nn.CrossEntropyLoss()

In [38]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [28]:
def train(dl, model, loss_fn, opt):
  size = len(dl.dataset)
  for batch, (X,y) in enumerate(dl):
    pred = model(X)
    loss = loss_fn(pred, y)

    # backprop
    opt.zero_grad()
    loss.backward()
    opt.step()

    if batch % 100 == 0:
      loss, current = loss.item(), batch * len(X)
      print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


In [39]:
def test(dl, model, loss_fn):
  size = len(dl.dataset)
  num_batches = len(dl)
  test_loss, correct = 0,0

  with torch.no_grad():
    for X,y in dl:
      pred = model(X)
      test_loss += loss_fn(pred,y).item()
      correct += (pred.argmax(1)==y).type(torch.float).sum().item()
  test_loss /= num_batches
  correct /= size
  print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [41]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.296246  [    0/60000]
loss: 2.289865  [ 6400/60000]
loss: 2.278299  [12800/60000]
loss: 2.274534  [19200/60000]
loss: 2.261336  [25600/60000]
loss: 2.241245  [32000/60000]
loss: 2.253399  [38400/60000]
loss: 2.225457  [44800/60000]
loss: 2.227302  [51200/60000]
loss: 2.206322  [57600/60000]
Test Error: 
 Accuracy: 47.4%, Avg loss: 2.196274 

Epoch 2
-------------------------------
loss: 2.206214  [    0/60000]
loss: 2.200494  [ 6400/60000]
loss: 2.152970  [12800/60000]
loss: 2.169409  [19200/60000]
loss: 2.130964  [25600/60000]
loss: 2.087498  [32000/60000]
loss: 2.134934  [38400/60000]
loss: 2.058026  [44800/60000]
loss: 2.082789  [51200/60000]
loss: 2.049318  [57600/60000]
Test Error: 
 Accuracy: 50.4%, Avg loss: 2.015581 

Epoch 3
-------------------------------
loss: 2.044283  [    0/60000]
loss: 2.026758  [ 6400/60000]
loss: 1.916263  [12800/60000]
loss: 1.974349  [19200/60000]
loss: 1.889786  [25600/60000]
loss: 1.820868  [32000/600