## Image classification using convolutional neural networks

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.utils.data import random_split,DataLoader
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import tarfile
from torchvision.datasets.utils import download_url

In [None]:
project_name="5_cifar10"

### Download and setup dataset

In [None]:

dataset_url="https://s3.amazonaws.com/fast-ai-imageclas/cifar10.tgz"
download_url(dataset_url,"./data")

In [None]:
with tarfile.open("./data/cifar10.tgz","r:gz") as tar:
    tar.extractall(path="./data")

In [None]:
import os
print(os.listdir("./data/cifar10"))
print(os.listdir("./data/cifar10/train"))

In [None]:
print(len(os.listdir("./data/cifar10/train/cat")))

this directory structure is used by many datasets,pytorch provide a utility ImageFolder

In [None]:
from torchvision.datasets import ImageFolder
dataset=ImageFolder("./data/cifar10/train",transform=transforms.ToTensor())

In [None]:
print(len(dataset))
dataset[0]

In [None]:
dataset.classes

In [None]:
image,label=dataset[0]
print(image.shape)
plt.imshow(image.permute(1,2,0))

Training set-Used to train and compute loss and adjust weights of the model   
Validation_set-Used to evaluate the model while training,adjust hyperparameters and pick the best version of model  
Test set-used to compare different models or different modelling approches adn report final accuracy of the model  

In [None]:
#create train and validation spilt

train_ds,val_ds=random_split(dataset,[40000,10000])

In [None]:
#create dataLoaders
train_dl=DataLoader(train_ds,batch_size=128,num_workers=4,pin_memory=True,shuffle=True)
val_dl=DataLoader(val_ds,batch_size=128,num_workers=4,pin_memory=True)

In [None]:
test_ds=ImageFolder("./data/cifar10/test",transform=transforms.ToTensor())
test_dl=DataLoader(test_ds,batch_size=256,num_workers=4,pin_memory=True)

In [None]:
from torchvision.utils import make_grid
def show_batch(dl):
    for images,labels in dl:
        fig,ax=plt.subplots(figsize=(12,6))
        ax.set_xticks([])
        ax.set_yticks([])
        ax.imshow(make_grid(images,nrow=16).permute(1,2,0))
        break

In [None]:
show_batch(train_dl)

### Defining the Model (convolutional neural network)

<b>Convolution</b>  
-> kernel is small matrix of weights.this kernel slides over the 2D input data ,performing an elementwise multiplication with the part of the input it is currently on,and then summing up the results into a single output pixel    
-> for multichannel images, a diffent kernel is applied to each channels, and the outputs(feature maps) are added together pixel wise,we get output map  
->the output maps are new channels  
->no of kernels== no of new channels  
->trying to increse the no of channels by using kernels  
->as edges covering fewer times than other pixels by kernels we add padding <i>(if padding=1 then input dim=ouput dim)</i> 
 
->RESOURCE  
1.intuitively understanding convolutions for deep learning by irhum shafkat    
2.convolutions in depth by sylvian gugger   

<b>Pooling</b>  
we want an output with lower size than the input.so,reduce size of spatial dimensions when number of channels increses.one way of accomplish this is by using pooling layer(eg:taking the average/max of every 2X2 grid to reduce each spatial dimensions in half) and another way is stride  
-> to move kernel more then one position at a time we add stride 

Advantages of CNN  
-fewer parameters  
-sparsity of connections:In each layer,each otp element only depends on small num of inputs elements which makes the forward and backward pass more efficient  
-parameter sharing and spatial invariance: the features learned by kernel in one part of the image can be used to detect similar pattern in a differnt  part of another image

In [None]:
conv=nn.Conv2d(3,8,kernel_size=3,stride=1,padding=1) #input channel ,output channel/num_kernels
pool=nn.MaxPool2d(2,2)
for images,labels in train_dl:
    print(images.shape)
    out=conv(images)
    print(out.shape)
    out=pool(out)
    print(out.shape)
    break

In [None]:
conv.weight #diff weights for diff channel 

the conv2d layer transforms a 3 channel image to n-channel feature map and the maxpool layer halves the height and width.the feature map gets smaller as we add more layers until we are finlly left with a small feature map,which can be flattened into vector.we can then add some fully connected layers at the end to get vector of size 10 for each image

In [None]:
#image classification base
class ImageClassificationBase(nn.Module):
    def training_step(self,batch):
        images,labels=batch
        outputs=self(images)
        loss=F.cross_entropy(outputs,labels)
        return loss
    def validation_step(self,batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss, 'val_acc': acc}
    def validation_epoch_end(self,outputs):
        acc=[x["val_acc"] for x in outputs]
        loss=[x['val_loss'] for x in outputs]

        mean_acc=torch.stack(acc).mean()
        mean_loss=torch.stack(loss).mean()
        return {"val_loss":mean_loss.item(),"val_acc":mean_acc.item()}
    def epoch_end(self,epoch,result):
        print("Epoch [{}], train_loss:{:.4f} val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, result["train_loss"],result['val_loss'], result['val_acc']))


def accuracy(outputs,labels):
    _,preds=torch.max(outputs,dim=1)
    return torch.tensor(torch.sum(preds==labels).item()/len(preds))


In [None]:
class Cifar10cnnModel(ImageClassificationBase):
    def __init__(self):
        super().__init__()
        self.network=nn.Sequential(
            #input 3X32X32
            nn.Conv2d(3,32,kernel_size=3,stride=1,padding=1),
            #output 32X32X32
            nn.ReLU(),
            #output 32X32X32
            nn.Conv2d(32,64,kernel_size=3,stride=1,padding=1),
            #output 64X32X32
            nn.ReLU(),
            #output 64X32X32
            nn.MaxPool2d(2,2),#64X16X16

            nn.Conv2d(64,128,kernel_size=3,stride=1,padding=1),
            nn.ReLU(),
            nn.Conv2d(128,128,kernel_size=3,stride=1,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),#128X8X8

            nn.Conv2d(128,256,kernel_size=3,stride=1,padding=1),
            nn.ReLU(),
            nn.Conv2d(256,256,kernel_size=3,stride=1,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),#256X4X4

            nn.Flatten(),
            nn.Linear(256*4*4,1024),
            nn.ReLU(),
            nn.Linear(1024,512),
            nn.ReLU(),
            nn.Linear(512,10))
    def forward(self,xb):
        return self.network(xb)
   

In [None]:
model=Cifar10cnnModel()
model

### Using GPU


In [None]:
def get_default_device():
    if torch.cuda.is_available():
        return torch.device("cuda")
    else:
        return torch.device("cpu")

In [None]:
device=get_default_device()
device

In [None]:
def to_device(data,device):
    if(isinstance(data,(list,tuple))):
        return [to_device(x,device) for x in data]
    return data.to(device,non_blocking=True)

In [None]:
class Device_Data_Loader():
    def __init__(self,dl,device):
        self.dl=dl
        self.device=device
    def __iter__(self):
        for x in self.dl:
            yield to_device(x,device)
    def __len__(self):
        return len(self.dl)
        
        

In [None]:
train_dl=Device_Data_Loader(train_dl,device)
val_dl=Device_Data_Loader(val_dl,device)

### Training

In [None]:
def fit(epochs,lr,model,train_dl,val_dl,opt_fn=torch.optim.SGD):
    opt_fn=opt_fn(model.parameters(),lr)
    history=[]
    
    #training phase
    for epoch in range(epochs):
        model.train()
        train_losses=[]
        for batch in train_dl:
            loss=model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            opt_fn.step()
            opt_fn.zero_grad()
        #validation phase
        result=evaluate(model,val_dl)
        result["train_loss"]=torch.stack(train_losses).mean().item()
        model.epoch_end(epoch,result)
        history.append(result)
    return history

In [None]:
@torch.no_grad()
def evaluate(model,val_dl):
    model.eval()
    outputs= [model.validation_step(batch) for batch in val_dl]
    return model.validation_epoch_end(outputs)

In [None]:
model=to_device(model,device)

In [None]:
hist=[evaluate(model,val_dl)]
print(hist)

In [None]:
for images,labels in train_dl:
    print(images.shape)
    out=model(images)
    print(out.shape)
    break

In [None]:
num_epochs=10
opt_func=torch.optim.Adam
lr=0.001

In [None]:
hist=fit(num_epochs,lr,model,train_dl,val_dl,opt_func)

In [None]:
def plot_Accuracies(history):
    plt.plot([x["val_acc"] for x in history])
    plt.title("acc vs num_epochs")

In [None]:
plot_Accuracies(hist)

In [None]:
def plot_losses(history):
    train_losses=[x["train_loss"] for x in history]
    val_losses=[x["val_loss"] for x in history]
    plt.plot(train_losses,"-bx")
    plt.plot(val_losses,"-rx")
    plt.xlabel("epoch")
    plt.ylabel("loss")
    plt.legend(["Training","Validation"])
    plt.title("loss vs No of epochs")

In [None]:
plot_losses(history)


here overfitting occur  
- gathering and generating more dat  
-regularization  
-early stopping