In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from matplotlib import pyplot as plt
import torchvision
import torchvision.transforms as transforms
import numpy as np
from torch.utils.data import DataLoader,Dataset

    

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
# hyper parameters

num_epochs = 4
learning_rate = 0.001
batchsize = 10


# define a transform remember how we commented it out in Feed_forward_NN

tf = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])  # transformations are applied in the order they are listed in the transform.



What does this transform do ?

toTensor : This transformation converts a PIL Image or a NumPy ndarray into a PyTorch tensor.
For images, it changes the shape from (H, W, C) (Height, Width, Channels) to (C, H, W) and scales the pixel values from the range [0, 255] to [0, 1].

Normalize : This transformation normalizes the tensor image with mean and standard deviation. The mean and standard deviation are provided for each channel (RGB) respectively.
Mean: (0.5, 0.5, 0.5) for the R, G, B channels.
Standard Deviation: (0.5, 0.5, 0.5) for the R, G, B channels.
The normalization is performed using the formula:

normalized_pixel = (pixel−mean)/std 

Given the mean and standard deviation of 0.5, this normalization maps the pixel values from [0, 1] to [-1, 1].

why ? 

lower bound = 0-0.5(mean)/0.5(std) = -1 
upper bound = 1-0.5/0.5 = 1 

what does transformation achieve ? 

This preprocessing step is crucial for training neural networks, as it ensures that the input data is in a consistent format and within a range that is suitable for the network's activation functions.


In [5]:
# prepare datasets 

train_dataset = torchvision.datasets.CIFAR10(
    root = './data/CIFAR',
    train=True,
    transform=tf,
    download=True
    )

test_dataset = torchvision.datasets.CIFAR10(
    root='./data/CIFAR',
    train = False,
    transform=tf,
    download=True
    )

# prepare dataloaders

train_loader = DataLoader(dataset=train_dataset,batch_size=batchsize, shuffle=True)
test_loader = DataLoader(dataset=test_dataset,batch_size=batchsize,shuffle=True)

classes = ('plane','car','bird','cat','deer','dog','frog','horse','ship','truck')






Files already downloaded and verified
Files already downloaded and verified


In [6]:
# set up a convolutional model
# model set up main remember initialisation and forward.

class ConvNet(nn.Module):
    
    def __init__(self):
        
        super(ConvNet,self).__init__()
        self.conv1 = nn.Conv2d(3,6,3)    # input channels, output channels, kernel size.
        self.pool = nn.MaxPool2d(2,2)   # kernel size, stride - we move 2 to the right remember the diagram?
        self.conv2 = nn.Conv2d(6,16,4)
        self.fc1 = nn.Linear(16*6*6,120)
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)
        
    def forward(self,x):
        x = self.pool(F.relu(self.conv1(x)))
        
        x = self.pool(F.relu(self.conv2(x)))
        
        x = x.view(-1,16*6*6)            # x.view different from reshape as it returns a new tensor without changes to old one.
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = (self.fc3(x))       # no activation or softmax cause it is directly applied/evaluated by the loss fucntion
        return x
    
    
        
        
               

        



Formula to calculate ouputs from convulation :-

((Input-Filtersize+2*padding)/Stride) + 1

here 
input = 32
filter = 3
padding = 0
stride = 1
final size = 32-3+1 = 30*30

after 2,2 pooling each 2*2 square gets converted to 1 square = max of all 4.

after pooling = 15*15  : no of blocks decrease by /4.

after another conv with kernel size 4

15-4+1 = 12

another max pool -> final size = 6*6


In [7]:
#initialise our model

model = ConvNet().to(device)

# again what do we define after this?

criterion = nn.CrossEntropyLoss()
optimizer  = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [14]:
total_steps = len(train_loader)

for epoch in range(num_epochs):
    for i,(images,labels) in enumerate(train_loader):
        
        # forward pass :- put input in model and calculate loss
        output = model(images)
        loss = criterion(output,labels)
        
        #backward pass :-
        optimizer.zero_grad()       #learn these three steps in backpass - zerograd, backward se gradients calc, step se weight update
        loss.backward()
        optimizer.step()
        
        if (i%100 == 0) :
            print(f'epoch = {epoch+1}/{num_epochs}, step = {i+1}/{total_steps}, loss = {loss : .2f}')
            
        
        
        
        
        
        
        

epoch = 1/4, step = 1/5000, loss =  2.30
epoch = 1/4, step = 101/5000, loss =  2.31
epoch = 1/4, step = 201/5000, loss =  2.30
epoch = 1/4, step = 301/5000, loss =  2.30
epoch = 1/4, step = 401/5000, loss =  2.29
epoch = 1/4, step = 501/5000, loss =  2.30
epoch = 1/4, step = 601/5000, loss =  2.29
epoch = 1/4, step = 701/5000, loss =  2.30
epoch = 1/4, step = 801/5000, loss =  2.29
epoch = 1/4, step = 901/5000, loss =  2.31
epoch = 1/4, step = 1001/5000, loss =  2.29
epoch = 1/4, step = 1101/5000, loss =  2.30
epoch = 1/4, step = 1201/5000, loss =  2.29
epoch = 1/4, step = 1301/5000, loss =  2.31
epoch = 1/4, step = 1401/5000, loss =  2.29
epoch = 1/4, step = 1501/5000, loss =  2.29
epoch = 1/4, step = 1601/5000, loss =  2.30
epoch = 1/4, step = 1701/5000, loss =  2.31
epoch = 1/4, step = 1801/5000, loss =  2.29
epoch = 1/4, step = 1901/5000, loss =  2.30
epoch = 1/4, step = 2001/5000, loss =  2.30
epoch = 1/4, step = 2101/5000, loss =  2.32
epoch = 1/4, step = 2201/5000, loss =  2.30


In [16]:
# accuracy kya hai model ki
with torch.no_grad():
    correct = 0;
    total = 0;
    for (images,labels) in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        
        _, predicted = torch.max(outputs.data,1)
        
        total += labels.shape[0]
        correct += (predicted == labels).sum()
        
        
    acc = (correct/total)*100
    print(acc) 
        
        
          

tensor(35.2100)


accuracy is low due to less number of epochs. 

In [8]:
# Save model 

torch.save(model.state_dict(),"Cifar_model.pt")

In [9]:
#Load model 

model.load_state_dict(torch.load("Cifar_model.pt"))

<All keys matched successfully>