<a href="https://colab.research.google.com/github/martinpius/DEEP-CNN-TRANSFER/blob/main/Tranfer_Learning_(Fine_turning_the_Neural_network).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
from google.colab import drive
drive.mount("/content/drive", force_remount = True)
try:
  COLAB = True
  import torch
  print(f">>>> You are on CoLaB with torch version {torch.__version__}")
except Exception as e:
  print(f">>>> {type(e)}: {e}\n>>>> please correct {type(e)} and reload your drive")
  COLAB = False
def time_fmt(t: float = 123.917)->float:
  h = int(t / (60 * 60))
  m = int(t % (60 * 60) / 60)
  s = int(t % 60)
  return f"hrs: {h} min: {m:>02} sec: {s:05.2f}"
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")
print(f">>>> time formating\t.......................\n>>> time elapsed\t{time_fmt()}")

Mounted at /content/drive
>>>> You are on CoLaB with torch version 1.8.1+cu101
>>>> time formating	.......................
>>> time elapsed	hrs: 0 min: 02 sec: 03.00


In [13]:
#In this notebook we are going to perform a transfer learning approach(fine-turning)
#The CNN with an application on the CIFAR10 dataset:

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
import time, sys, datetime
import torchvision

In [7]:
#We first train a simple CNN on CIFAR10 dataset then finally we fine-turning using
#pre-trained models such as inception-v3/vgg16 ect

In [22]:
#Hyperparameters and other features
EPOCHS = 2
batch_size = 128
learning_rate = 1e-3
num_classes = 10
in_channels = 3


In [9]:
class SimpleCNN(nn.Module):
  def __init__(self, in_channels, num_classes):
    super(SimpleCNN, self).__init__()
    self.layer1 = nn.Conv2d(in_channels = in_channels, 
                            out_channels = 8,
                            kernel_size = (3,3),
                            padding = (1,1),
                            stride = (1,1))
    self.relu = nn.ReLU()
    self.maxpool = nn.MaxPool2d(kernel_size = (2,2), stride = (2,2))
    self.layer2 = nn.Conv2d(in_channels = 8, 
                            out_channels = 16,
                            kernel_size = (3,3),
                            stride = (1,1),
                            padding = (1,1))
    self.bn = nn.BatchNorm2d(num_features = 8)
    self.fc1 = nn.Linear(in_features = 8*8*16, out_features = 512)
    self.drp = nn.Dropout(p = 0.5)
    self.fc2 = nn.Linear(in_features = 512, out_features = 128)
    self.softmax = nn.Softmax()
    self.outputs = nn.Linear(in_features = 128, out_features = num_classes)
  
  def forward(self, input_tensor):
    x = self.relu(self.layer1(input_tensor))
    x = self.maxpool(x)
    x = self.bn(x)
    x = self.relu(self.layer2(x))
    x = self.maxpool(x)
    x = x.reshape(x.shape[0], -1)
    x = self.relu(self.fc1(x))
    x = self.drp(x)
    x = self.relu(self.fc2(x))
    x = self.softmax(self.outputs(x))
    return x

In [10]:
#Instantiating the model class and testing on a random data.
rnd_data = torch.randn(size = (64, 3, 32, 32)).to(device = device)
model = SimpleCNN(in_channels = in_channels, num_classes = num_classes).to(device = device)
print(f">>>> The desired output shape is: {model(rnd_data).shape}")
print(model)

>>>> The desired output shape is: torch.Size([64, 10])
SimpleCNN(
  (layer1): Conv2d(3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu): ReLU()
  (maxpool): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (layer2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=1024, out_features=512, bias=True)
  (drp): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=512, out_features=128, bias=True)
  (softmax): Softmax(dim=None)
  (outputs): Linear(in_features=128, out_features=10, bias=True)
)




In [11]:
#Import the data from torch-vission and preproocess using transform and Dataloader methods:

In [12]:
train_dfm = datasets.CIFAR10(root = "train_cifar10/", train = True, transform = transforms.ToTensor(), download = True)
test_dfm = datasets.CIFAR10(root = "test_cifar10/", train = False, transform = transforms.ToTensor(), download = True)
train_loader = DataLoader(dataset = train_dfm, shuffle = True, batch_size = batch_size)
test_loader = DataLoader(dataset = test_dfm, shuffle = False, batch_size = batch_size)
x_train_batch, y_train_batch = next(iter(train_loader))
print(f">>>> x_train_batch_shape: {x_train_batch.shape}\ty_train_batch_shape: {y_train_batch.shape}")


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to train_cifar10/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting train_cifar10/cifar-10-python.tar.gz to train_cifar10/
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to test_cifar10/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting test_cifar10/cifar-10-python.tar.gz to test_cifar10/
>>>> x_train_batch_shape: torch.Size([128, 3, 32, 32])	y_train_batch_shape: torch.Size([128])


In [13]:
#get the loss and optimizer objects
loss_obj = nn.CrossEntropyLoss()
optimizer = optim.Adam(params = model.parameters(), lr = learning_rate)

In [22]:
#Training loop:
tic = time.time()
for epoch in range(EPOCHS):
  losses = []
  print(f"\n>>>> training starts for epoch {epoch + 1}\n>>>> please wait while the model is training....................")
  for idx, (data, target) in enumerate(tqdm(train_loader)):
    data = data.to(device = device)
    target = target.to(device = device)
    preds = model(data)
    train_loss = loss_obj(preds, target)
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()
    losses.append(train_loss.item())
  mean_loss = float(sum(losses)/len(losses))
  print(f"\n>>>> the average training loss: {mean_loss:.4f}")

def __monitor__(loader, model):
  if loader.dataset.train:
    print(f"\n>>>> monitor the performance over the training dataset\n>>>> please wait................")
  else:
    print(f"\n>>>> monitor the performance over the test dataset\n>>>> please wait...................")
  num_correct = 0
  num_examples = 0
  model.eval()
  with torch.no_grad():
    for idx, (x, y) in enumerate(tqdm(test_loader)):
      x = x.to(device = device)
      y = y.to(device = device)
      preds = model(x)
      _, predictions = preds.max(1)
      num_correct+=(predictions == y).sum()
      num_examples+= predictions.size(0)
      acc = float((num_correct/num_examples) * 100)
  return acc
  model.train()

print(f"\n>>>> accuracy over the training data: {__monitor__(train_loader, model)}")
print(f"\n>>>> accuracy over the validation data: {__monitor__(test_loader, model)}")
toc = time.time()
print(f"\n>>>> for the simple CNN time elapsed is: {time_fmt(toc - tic)}")


  1%|          | 4/391 [00:00<00:11, 33.50it/s]


>>>> training starts for epoch 1
>>>> please wait while the model is training....................


100%|██████████| 391/391 [00:11<00:00, 34.44it/s]
  1%|          | 4/391 [00:00<00:10, 35.77it/s]


>>>> the average training loss: 2.0766

>>>> training starts for epoch 2
>>>> please wait while the model is training....................


100%|██████████| 391/391 [00:11<00:00, 34.61it/s]
  1%|          | 4/391 [00:00<00:12, 32.21it/s]


>>>> the average training loss: 1.9814

>>>> training starts for epoch 3
>>>> please wait while the model is training....................


100%|██████████| 391/391 [00:11<00:00, 34.69it/s]
  1%|          | 4/391 [00:00<00:11, 34.77it/s]


>>>> the average training loss: 1.9419

>>>> training starts for epoch 4
>>>> please wait while the model is training....................


100%|██████████| 391/391 [00:11<00:00, 34.59it/s]
  1%|          | 4/391 [00:00<00:11, 33.33it/s]


>>>> the average training loss: 1.9132

>>>> training starts for epoch 5
>>>> please wait while the model is training....................


100%|██████████| 391/391 [00:11<00:00, 34.67it/s]
  1%|          | 4/391 [00:00<00:11, 34.38it/s]


>>>> the average training loss: 1.8932

>>>> training starts for epoch 6
>>>> please wait while the model is training....................


100%|██████████| 391/391 [00:11<00:00, 34.59it/s]
  1%|          | 4/391 [00:00<00:11, 34.66it/s]


>>>> the average training loss: 1.8738

>>>> training starts for epoch 7
>>>> please wait while the model is training....................


100%|██████████| 391/391 [00:11<00:00, 34.68it/s]
  1%|          | 4/391 [00:00<00:10, 35.45it/s]


>>>> the average training loss: 1.8587

>>>> training starts for epoch 8
>>>> please wait while the model is training....................


100%|██████████| 391/391 [00:11<00:00, 34.77it/s]
  1%|          | 4/391 [00:00<00:11, 35.04it/s]


>>>> the average training loss: 1.8468

>>>> training starts for epoch 9
>>>> please wait while the model is training....................


100%|██████████| 391/391 [00:11<00:00, 34.70it/s]
  1%|          | 4/391 [00:00<00:11, 33.22it/s]


>>>> the average training loss: 1.8422

>>>> training starts for epoch 10
>>>> please wait while the model is training....................


100%|██████████| 391/391 [00:11<00:00, 34.67it/s]
  6%|▋         | 5/79 [00:00<00:01, 48.35it/s]


>>>> the average training loss: 1.8312

>>>> monitor the performance over the training dataset
>>>> please wait................


100%|██████████| 79/79 [00:01<00:00, 48.55it/s]
  6%|▋         | 5/79 [00:00<00:01, 48.96it/s]


>>>> accuracy over the training data: 60.0099983215332

>>>> monitor the performance over the test dataset
>>>> please wait...................


100%|██████████| 79/79 [00:01<00:00, 49.74it/s]


>>>> accuracy over the validation data: 60.0099983215332

>>>> for the simple CNN time elapsed is: hrs: 0 min: 01 sec: 56.00





In [23]:
#####Model Turning using tranfer learning technique######

In [14]:
model_turned = torchvision.models.vgg16(pretrained = True)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


HBox(children=(FloatProgress(value=0.0, max=553433881.0), HTML(value='')))




In [15]:
print(model_turned)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [16]:
#Modify the model to suit our need: We change number of classes from 100 to 10 and we also
#remove the average pool layer:


In [23]:
class AVG_Removal(nn.Module):
  def __init__(self):
    super(AVG_Removal, self).__init__()
  
  def forward(self, input_tensor):
    return input_tensor

In [24]:
model_turned.avgpool = AVG_Removal() #Applying the identity class to remove the average pool layer


In [25]:
#Modify the classifier to have 10 classes to suit our demand
model_turned.classifier = nn.Linear(512, 10)

In [26]:
#print the model to see if the changes were correctly made
model_turned = model_turned.to(device = device)
print(model_turned)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [27]:
#Training loop:
tic = time.time()
for epoch in range(EPOCHS):
  losses = []
  print(f"\n>>>> training starts for epoch {epoch + 1}\n>>>> please wait while the model is training....................")
  for idx, (data, target) in enumerate(tqdm(train_loader)):
    data = data.to(device = device)
    target = target.to(device = device)
    preds = model_turned(data)
    train_loss = loss_obj(preds, target)
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()
    losses.append(train_loss.item())
  mean_loss = float(sum(losses)/len(losses))
  print(f"\n>>>> the average training loss: {mean_loss:.4f}")

def __monitor__(loader, model):
  if loader.dataset.train:
    print(f"\n>>>> monitor the performance over the training dataset\n>>>> please wait................")
  else:
    print(f"\n>>>> monitor the performance over the test dataset\n>>>> please wait...................")
  num_correct = 0
  num_examples = 0
  model.eval()
  with torch.no_grad():
    for idx, (x, y) in enumerate(tqdm(test_loader)):
      x = x.to(device = device)
      y = y.to(device = device)
      preds = model(x)
      _, predictions = preds.max(1)
      num_correct+=(predictions == y).sum()
      num_examples+= predictions.size(0)
      acc = float((num_correct/num_examples) * 100)
  return acc
  model.train()

print(f"\n>>>> accuracy over the training data: {__monitor__(train_loader, model)}")
print(f"\n>>>> accuracy over the validation data: {__monitor__(test_loader, model)}")
toc = time.time()
print(f"\n>>>> for the turned model time elapsed is: {time_fmt(toc - tic)}")

  0%|          | 0/391 [00:00<?, ?it/s]


>>>> training starts for epoch 1
>>>> please wait while the model is training....................


100%|██████████| 391/391 [02:57<00:00,  2.20it/s]
  0%|          | 0/391 [00:00<?, ?it/s]


>>>> the average training loss: 2.4273

>>>> training starts for epoch 2
>>>> please wait while the model is training....................


100%|██████████| 391/391 [02:53<00:00,  2.25it/s]
  8%|▊         | 6/79 [00:00<00:01, 52.97it/s]


>>>> the average training loss: 2.4273

>>>> monitor the performance over the training dataset
>>>> please wait................


100%|██████████| 79/79 [00:01<00:00, 51.12it/s]
  8%|▊         | 6/79 [00:00<00:01, 53.18it/s]


>>>> accuracy over the training data: 10.84999942779541

>>>> monitor the performance over the test dataset
>>>> please wait...................


100%|██████████| 79/79 [00:01<00:00, 50.00it/s]


>>>> accuracy over the validation data: 10.84999942779541

>>>> for the turned model time elapsed is: hrs: 0 min: 05 sec: 54.00



