<a href="https://colab.research.google.com/github/SnehhaPadmanabhan/Bertelsmann-AI-Challenge/blob/master/Network_Architecture.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets, transforms

#Transforms are the methods which can be used to transform data from the dataset
# transforms.compose chains several trasforms together - so here we normalize the dataset and then convert it to a tensor
# normalization refers to the process of converting the dataset to a given range by specifying the mean and std dev as parameters 
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,)),]) 

#downloading and loading the data
trainset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

0it [00:00, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:01, 9250575.40it/s]                            


Extracting /root/.pytorch/MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw


  0%|          | 0/28881 [00:00<?, ?it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz


32768it [00:00, 142745.37it/s]           
  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting /root/.pytorch/MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:00, 2235144.61it/s]                            
0it [00:00, ?it/s]

Extracting /root/.pytorch/MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


8192it [00:00, 52833.42it/s]            


Extracting /root/.pytorch/MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw
Processing...
Done!


In [2]:
# nn.Sequential is a Module which contains other Modules, and applies them in sequence to produce its output
# using the following expression, we are defining a neural netwrok that creates a layer, applies relu on them, creates another layer...and so on. These are don in sequential manner
model = nn.Sequential(nn.Linear(784, 128),nn.ReLU(),nn.Linear(128, 64),nn.ReLU(), nn.Linear(64, 10))

# Define the loss
# crossentropyloss is used for multiclass classification
criterion = nn.CrossEntropyLoss()

# Get our data
images, labels = next(iter(trainloader))
# Flatten images - Flattening transforms a two-dimensional matrix of features into a vector that can be fed into a fully connected neural network classifier
images = images.view(images.shape[0], -1)

# Forward pass, get our logits 
# The logit L of a probability p is defined as. L=lnp1−p
logits = model(images)
# Calculate the loss with the logits and the labels
loss = criterion(logits, labels)

print(loss)

tensor(2.3298, grad_fn=<NllLossBackward>)


In [4]:
# This cell uses log of softmax instead of cross entropy loss
model = nn.Sequential(nn.Linear(784, 128), nn.ReLU(),nn.Linear(128, 64),nn.ReLU(),nn.Linear(64, 10), nn.LogSoftmax(dim=1))

# Define the loss
# Diff between CrossEntropyLoss and NLLLoss
# If you apply Pytorch’s CrossEntropyLoss to your output layer, you get the same result as applying Pytorch’s NLLLoss to a LogSoftmax layer added after your original output layer.
# using CrossEntropyLoss will be more efficient because it can collapse some calculations together, and doesn’t introduce an additional layer.
#  Pytorch’s CrossEntropyLoss implicitly adds a soft-max that “normalizes” your output layer into such a probability distribution.
criterion = nn.NLLLoss()

# Get our data
images, labels = next(iter(trainloader))
# Flatten images
images = images.view(images.shape[0], -1)

# Forward pass, get our log-probabilities
logps = model(images)
# Calculate the loss with the logps and the labels
loss = criterion(logps, labels)

print(loss)

tensor(2.3005, grad_fn=<NllLossBackward>)


In [5]:
# Torch provides a module, autograd, for automatically calculating the gradients of tensors, by keeping track of the ops and going back
# The following cell works with autograd
# set value of requires_grad to true so that pytorch keeps track of ops

# gradient remains zero until backward mthod is called
print('Before backward pass: \n', model[0].weight.grad)

loss.backward()

print('After backward pass: \n', model[0].weight.grad)


Before backward pass: 
 None
After backward pass: 
 tensor([[-7.1711e-04, -7.1711e-04, -7.1711e-04,  ..., -7.1711e-04,
         -7.1711e-04, -7.1711e-04],
        [ 1.0540e-03,  1.0540e-03,  1.0540e-03,  ...,  1.0540e-03,
          1.0540e-03,  1.0540e-03],
        [ 1.9436e-03,  1.9436e-03,  1.9436e-03,  ...,  1.9436e-03,
          1.9436e-03,  1.9436e-03],
        ...,
        [-1.4674e-03, -1.4674e-03, -1.4674e-03,  ..., -1.4674e-03,
         -1.4674e-03, -1.4674e-03],
        [ 2.8099e-04,  2.8099e-04,  2.8099e-04,  ...,  2.8099e-04,
          2.8099e-04,  2.8099e-04],
        [-2.4669e-06, -2.4669e-06, -2.4669e-06,  ..., -2.4669e-06,
         -2.4669e-06, -2.4669e-06]])


In [0]:
from torch import optim

# Optimizers require the parameters to optimize and a learning rate
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [8]:
print('Initial weights - ', model[0].weight)

images, labels = next(iter(trainloader))
images.resize_(64, 784)

# Clear the gradients, do this because gradients are accumulated
optimizer.zero_grad()

# Forward pass, then backward pass, then update weights
output = model(images)
loss = criterion(output, labels)
loss.backward()
print('Gradient -', model[0].weight.grad)

# Take an update step and few the new weights
optimizer.step()
print('Updated weights - ', model[0].weight)

Initial weights -  Parameter containing:
tensor([[-1.2932e-02, -3.2581e-02, -1.7747e-02,  ..., -2.0534e-02,
          8.2711e-03,  2.4116e-02],
        [-5.8541e-03,  3.3008e-02, -9.6355e-05,  ...,  3.4666e-02,
         -2.6694e-02, -2.1089e-02],
        [-2.6945e-02,  2.5507e-03, -3.2220e-02,  ..., -8.4227e-03,
         -3.3144e-02,  9.1304e-03],
        ...,
        [ 2.9816e-02,  3.5643e-02,  2.1215e-02,  ...,  2.1243e-02,
          9.8461e-03,  5.3338e-03],
        [-4.8168e-03, -2.7114e-02,  1.5364e-02,  ...,  2.6974e-02,
         -1.0573e-02, -6.9814e-03],
        [-2.2617e-02,  2.3300e-02,  6.2110e-03,  ..., -3.2765e-03,
         -1.5521e-03, -5.6709e-03]], requires_grad=True)
Gradient - tensor([[-0.0005, -0.0005, -0.0005,  ..., -0.0005, -0.0005, -0.0005],
        [-0.0012, -0.0012, -0.0012,  ..., -0.0012, -0.0012, -0.0012],
        [ 0.0005,  0.0005,  0.0005,  ...,  0.0005,  0.0005,  0.0005],
        ...,
        [-0.0016, -0.0016, -0.0016,  ..., -0.0016, -0.0016, -0.0016],
   

In [9]:
model = nn.Sequential(nn.Linear(784, 128),
                      nn.ReLU(),
                      nn.Linear(128, 64),
                      nn.ReLU(),
                      nn.Linear(64, 10),
                      nn.LogSoftmax(dim=1))

criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.003)

epochs = 5
for e in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        # Flatten MNIST images into a 784 long vector
        images = images.view(images.shape[0], -1)

        optimizer.zero_grad()
        
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    else:
        print(f"Training loss: {running_loss/len(trainloader)}")

Training loss: 1.8675068458006072
Training loss: 0.8276712825176304
Training loss: 0.5246770918083343
Training loss: 0.43434125930070877
Training loss: 0.3927920936648525
