<a href="https://colab.research.google.com/github/sanikamal/deep-learning-atoz/blob/master/pytorch_example/Optimizers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Optimizers

In [0]:
# Setting seeds to try and ensure we have the same results - this is not guaranteed across PyTorch releases.
import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

import numpy as np
np.random.seed(0)

In [2]:
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch import nn

mean, std = (0.5,), (0.5,)

# Create a transform and normalise data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize(mean, std)
                              ])

# Download FMNIST training dataset and load training data
trainset = datasets.FashionMNIST('~/.pytorch/FMNIST/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download FMNIST test dataset and load test data
testset = datasets.FashionMNIST('~/.pytorch/FMNIST/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

  0%|          | 98304/26421880 [00:00<00:27, 943938.05it/s]

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz


26427392it [00:00, 84222659.69it/s]                             


Extracting /root/.pytorch/FMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw


32768it [00:00, 569331.72it/s]
 10%|▉         | 425984/4422102 [00:00<00:00, 4236381.00it/s]

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz
Extracting /root/.pytorch/FMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


4423680it [00:00, 31021326.60it/s]                           
8192it [00:00, 199940.29it/s]


Extracting /root/.pytorch/FMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting /root/.pytorch/FMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw
Processing...
Done!


In [0]:
class FMNIST(nn.Module):
  def __init__(self):
    super().__init__()
    self.fc1 = nn.Linear(784, 128)
    self.fc2 = nn.Linear(128,64)
    self.fc3 = nn.Linear(64,10)
    
  def forward(self, x):
    x = x.view(x.shape[0], -1)
    
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    x = F.log_softmax(x, dim=1)
    
    return x
    
#model = FMNIST()   

In [0]:
model = nn.Sequential(nn.Linear(784, 128),
                      nn.ReLU(),
                      nn.Linear(128, 64),
                      nn.ReLU(),
                      nn.Linear(64, 10),
                      nn.LogSoftmax(dim=1))

In [0]:
images, labels = next(iter(trainloader))
images = images.view(images.shape[0], -1)

In [0]:
from torch import optim

criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)


In [7]:
output = model(images)
loss = criterion(output, labels)
loss.backward()
print('Initial weights : ',model[0].weight)
print('Initial weights gradient : ',model[0].weight.grad)
        

Initial weights :  Parameter containing:
tensor([[-0.0003,  0.0192, -0.0294,  ...,  0.0219,  0.0037,  0.0021],
        [-0.0198, -0.0150, -0.0104,  ..., -0.0203, -0.0060, -0.0299],
        [-0.0201,  0.0149, -0.0333,  ..., -0.0203,  0.0012,  0.0080],
        ...,
        [ 0.0018, -0.0295,  0.0085,  ..., -0.0037,  0.0036,  0.0300],
        [-0.0233, -0.0220, -0.0064,  ...,  0.0115, -0.0324, -0.0158],
        [ 0.0309,  0.0066,  0.0125,  ...,  0.0286,  0.0350, -0.0105]],
       requires_grad=True)
Initial weights gradient :  tensor([[-0.0004, -0.0004, -0.0004,  ..., -0.0007, -0.0006, -0.0004],
        [ 0.0069,  0.0069,  0.0069,  ...,  0.0072,  0.0070,  0.0069],
        [-0.0015, -0.0015, -0.0015,  ..., -0.0016, -0.0015, -0.0015],
        ...,
        [ 0.0018,  0.0018,  0.0018,  ...,  0.0017,  0.0017,  0.0018],
        [ 0.0019,  0.0019,  0.0019,  ...,  0.0019,  0.0019,  0.0019],
        [ 0.0017,  0.0017,  0.0017,  ...,  0.0016,  0.0017,  0.0017]])


In [0]:
optimizer.step()

In [9]:
print('Initial weights : ',model[0].weight)
print('Initial weights gradient : ',model[0].weight.grad)

Initial weights :  Parameter containing:
tensor([[-0.0003,  0.0192, -0.0294,  ...,  0.0219,  0.0037,  0.0021],
        [-0.0198, -0.0151, -0.0105,  ..., -0.0203, -0.0060, -0.0300],
        [-0.0201,  0.0149, -0.0333,  ..., -0.0203,  0.0012,  0.0080],
        ...,
        [ 0.0018, -0.0296,  0.0085,  ..., -0.0037,  0.0036,  0.0300],
        [-0.0233, -0.0221, -0.0064,  ...,  0.0115, -0.0324, -0.0158],
        [ 0.0309,  0.0066,  0.0125,  ...,  0.0285,  0.0350, -0.0105]],
       requires_grad=True)
Initial weights gradient :  tensor([[-0.0004, -0.0004, -0.0004,  ..., -0.0007, -0.0006, -0.0004],
        [ 0.0069,  0.0069,  0.0069,  ...,  0.0072,  0.0070,  0.0069],
        [-0.0015, -0.0015, -0.0015,  ..., -0.0016, -0.0015, -0.0015],
        ...,
        [ 0.0018,  0.0018,  0.0018,  ...,  0.0017,  0.0017,  0.0018],
        [ 0.0019,  0.0019,  0.0019,  ...,  0.0019,  0.0019,  0.0019],
        [ 0.0017,  0.0017,  0.0017,  ...,  0.0016,  0.0017,  0.0017]])


In [0]:
optimizer.zero_grad()

In [11]:
print('Initial weights : ',model[0].weight)
print('Initial weights gradient : ',model[0].weight.grad)

Initial weights :  Parameter containing:
tensor([[-0.0003,  0.0192, -0.0294,  ...,  0.0219,  0.0037,  0.0021],
        [-0.0198, -0.0151, -0.0105,  ..., -0.0203, -0.0060, -0.0300],
        [-0.0201,  0.0149, -0.0333,  ..., -0.0203,  0.0012,  0.0080],
        ...,
        [ 0.0018, -0.0296,  0.0085,  ..., -0.0037,  0.0036,  0.0300],
        [-0.0233, -0.0221, -0.0064,  ...,  0.0115, -0.0324, -0.0158],
        [ 0.0309,  0.0066,  0.0125,  ...,  0.0285,  0.0350, -0.0105]],
       requires_grad=True)
Initial weights gradient :  tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


In [12]:
model = FMNIST()
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

num_epochs = 1

for i in range(num_epochs):
    cum_loss = 0
    batch_num=0

    for batch_num,(images, labels) in enumerate(trainloader,1):
        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        cum_loss += loss.item()
        print(f'Batch : {batch_num}, Loss : {loss.item()}')
     
    print(f"Training loss: {cum_loss/len(trainloader)}")

Batch : 1, Loss : 2.3072664737701416
Batch : 2, Loss : 2.288365125656128
Batch : 3, Loss : 2.3049063682556152
Batch : 4, Loss : 2.3015007972717285
Batch : 5, Loss : 2.278282642364502
Batch : 6, Loss : 2.277097702026367
Batch : 7, Loss : 2.2820229530334473
Batch : 8, Loss : 2.302107810974121
Batch : 9, Loss : 2.2785263061523438
Batch : 10, Loss : 2.27801513671875
Batch : 11, Loss : 2.2610418796539307
Batch : 12, Loss : 2.2764644622802734
Batch : 13, Loss : 2.2731387615203857
Batch : 14, Loss : 2.2578248977661133
Batch : 15, Loss : 2.2600483894348145
Batch : 16, Loss : 2.261315107345581
Batch : 17, Loss : 2.2721023559570312
Batch : 18, Loss : 2.24147629737854
Batch : 19, Loss : 2.2405261993408203
Batch : 20, Loss : 2.235086441040039
Batch : 21, Loss : 2.2544260025024414
Batch : 22, Loss : 2.2362046241760254
Batch : 23, Loss : 2.231415271759033
Batch : 24, Loss : 2.2389469146728516
Batch : 25, Loss : 2.2141687870025635
Batch : 26, Loss : 2.233987331390381
Batch : 27, Loss : 2.217863559722