## FineTune pre-trained GoogleNet model on Intel Image Classification dataset :



### 0. Steps to download Kaggle datasets in Google Colab
1.   Go to kaggle account *Profile>Settings>API* **Expire Token** and then **Create New Token** and it will download ***kaggle.json*** fie on your machine.
2.   Go to your google colab project file and run folowing commands:
  *   Install kaggle API client `!pip install -q kaggle`
  *   Upload the kaggle.json file here
  *   Kaggle API client expects the file to be in ~/.kaggle so move it there
  *   `!mkdir -p ~/.kaggle`
  *   `!mv kaggle.json ~/.kaggle`
  *   `!chmod 600 ~/.kaggle/kaggle.json`











### 1. Download NCT-CRC-HE-100K dataset from kaggle



In [None]:
!pip install -q kaggle

In [None]:
# Upload the kaggle.json file here
# Following command fill prompt you to upload the kaggle.json file
from google.colab import files
files.upload()

In [None]:
# Kaggle API client expects the file to be in ~/.kaggle so move it there
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets list


In [None]:
!kaggle datasets download -d puneet6060/intel-image-classification -p /content --force

In [None]:
from zipfile import ZipFile
with ZipFile("/content/intel-image-classification.zip") as file:
  file.extractall("/content")

In [None]:
!mkdir -p /content/data

In [None]:
!mv /content/seg_pred/seg_pred /content/data/

In [None]:
!mv /content/seg_train/seg_train /content/data

In [None]:
!mv /content/seg_test/seg_test /content/data

In [None]:
!rmdir /content/seg_test

In [None]:
!rmdir /content/seg_pred

In [None]:
!rmdir /content/seg_train

In [None]:
!rm -rf /content/intel-image-classification.zip

In [None]:
train_dir = "/content/data/seg_train"
test_dir = "/content/data/seg_test"
pred_dir = "/content/data/seg_pred"

### 1. Imports

In [None]:
!pip install tqdm

In [None]:
import os
import time

import pandas as pd
import numpy as np

import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
import torch.nn.functional as F

import matplotlib.pyplot as plt
from PIL import Image

from typing import Tuple, Dict, List
import pathlib

from tqdm.auto import tqdm
from timeit import default_timer as timer

In [None]:
## SETTINGS ##
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Hyperparameter
BATCH_SIZE = 128
NUM_EPOCHS = 10
LEARNING_RATE = 0.001

# Architecture
NUM_CLASSES = 6
IN_CHANNELS = 3

# Other
RANDOM_SEED = 42

### 2. Exploring dataset for all images

In [None]:
import os
def walkt_through_dir(dir_path):
  for dirpath, dirnames, filenames in os.walk(dir_path):
    print(dirpath)
    print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}")

In [None]:
walkt_through_dir("/content/data")

### 3. Create a Custom Dataset to replicate ImageFolder

In [None]:

def find_classes(directory: str) -> Tuple[List[str], Dict[str, int]]:
  """ Finds the class names in a target directory
      by scanning through target directory"""

  classes = sorted(entry.name for entry in os.scandir(directory) if entry.is_dir())
  class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
  return classes, class_to_idx

In [None]:
# Write a custom dataset

# 1. Subclass a torch.tuils.data.Dataset
class ImageFolder(Dataset):

  # 2. Initialize our custom dataset
  def __init__(self,
               targ_dir: str,
               transform=None):
    # 3. Create class attributes
    # Get all of the image path

    self.paths = list(pathlib.Path(targ_dir).glob("*/*.jpg"))

    # Setup transform
    self.transform = transform

    # Create classes and class_to_idx attributes
    self.classes, self.class_to_idx = find_classes(targ_dir)

  # 4. Create a function to load images
  def load_image(self, index: int) -> Image.Image:
    "Opens and transforms an image into a PyTorch image"
    image_path = self.paths[index]
    return Image.open(image_path)

  # 5. Overwrite __len__()
  def __len__(self) -> int:
    return len(self.paths)

  # 6. Overwrite __get_item() method to return a particular sample
  def __getitem__(self, index: int) -> Tuple[torch.Tensor, int]:
    img = self.load_image(index)
    class_name = self.paths[index].parent.name
    class_idx = self.class_to_idx[class_name]

    # Transform if necessary
    if self.transform:
      return self.transform(img), class_idx
    else:
      return img, class_idx

In [None]:
find_classes(train_dir)

In [None]:
# Create a transform

train_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                      std=[0.229, 0.224, 0.225])

])

test_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [None]:
# Test out ImageFolder

train_data = ImageFolder(
    train_dir,
    transform=train_transform
)

test_data = ImageFolder(
    test_dir,
    transform=test_transform
)


In [None]:
len(train_data), len(test_data)

In [None]:
train_loader = DataLoader(
    train_data,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2,
)

test_loader = DataLoader(
    test_data,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=2
)

In [None]:
for batch_idx, (img, label) in enumerate(train_loader):
  print(f"Image shape: {img.shape}")
  print(f"Label shape: {label.shape}")
  break

for batch_idx, (img, label) in enumerate(test_loader):
  print(f"Image shape: {img.shape}")
  print(f"Label shape: {label.shape}")
  break

#### 3.1 Show sample images

In [None]:
images, labels = next(iter(train_loader))
images_10 = images[:10]
labels_10 = labels[:10]

class_names = train_data.classes

# Plot 10 images
plt.figure(figsize=(15, 6))
for i in range(10):
  img = images_10[i].permute(1, 2, 0).numpy()
  plt.subplot(2, 5, i+1)
  plt.imshow(img)
  plt.title(class_names[labels_10[i]])
  plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
class_names

### 4. Transfer learning from pre trained GoogleNet

In [None]:
model = models.googlenet(pretrained=True)

In [None]:
# Freeze eearly layers

for layer in model.modules():
  if not isinstance(layer, nn.Linear):
    layer.requires_grad_(False)
  else:
    print(layer)

In [None]:
model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)

In [None]:
model.fc

In [None]:
# Setup loss function and optimizer
model.to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Set random seeds
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)

#### 4.1 Create train and test loop functions

In [None]:
def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer):
  # Put model in train mode
  model.train()
  model = model.to(device)

  # Setup train loss and train accuracy values
  train_loss, train_acc = 0, 0

  # Loop through data loader data batches
  for batch, (X, y) in enumerate(dataloader):
    # Send data to target device
    X, y = X.to(device), y.to(device)

    # 1. Forward pass
    y_pred = model(X)

    # 2.Calculate the loss
    loss = loss_fn(y_pred, y)
    train_loss += loss.item()

    # 3. Optimizer zero_grad()
    optimizer.zero_grad()

    # 4. Loss backward
    loss.backward()

    # 5. Optimizer step
    optimizer.step()

    # Calculate and accumulate accuracy metrics accross all batches
    y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
    train_acc += (y_pred_class == y).sum().item()/len(y_pred)

  # Adjust metrics to get avarage loss and accuracy per batch
  train_loss = train_loss / len(dataloader)
  train_acc = train_acc / len(dataloader)
  return train_loss, train_acc


In [None]:
def test_step(
    model: torch.nn.Module,
    dataloader: torch.utils.data.DataLoader,
    loss_fn: torch.nn.Module):
  # Put model in eval mode
  model.eval()
  model.to(device)

  # Set up test loss an test accuracy values
  test_loss, test_acc = 0, 0

  # Turn on inference context manager
  with torch.inference_mode():
    # Loop through DataLoader batches
    for batch, (X, y) in enumerate(dataloader):
      # Send data to target device
      X, y = X.to(device), y.to(device)

      # 1. Forward pass
      test_pred_logits = model(X)

      # 2. Calculate the loss
      loss = loss_fn(test_pred_logits, y)
      test_loss += loss.item()

      # 3. Calculate the accuracy
      test_pred_labels = torch.argmax(torch.softmax(test_pred_logits, dim=1), dim=1)
      test_acc += (test_pred_labels == y).sum().item() / len(test_pred_labels)

    # Adjust metrics to get avarage loss and accuracy per batch
    test_loss = test_loss / len(dataloader)
    test_acc = test_acc / len(dataloader)

    return test_loss, test_acc

In [None]:
# Create a train() by combining train_step() and test_step()
def train(
    model: torch.nn.Module,
    train_dataloader: torch.utils.data.DataLoader,
    test_dataloader: torch.utils.data.DataLoader,
    optimizer: torch.optim.Optimizer,
    loss_fn: torch.nn.Module,
    epochs: int):

  # 1. Create empty results dictionary
  results = {
      "train_loss": [],
      "train_acc": [],
      "test_loss": [],
      "test_acc": []
  }

  # 2. Loop through training and testing steps for a number of epochs
  for epoch in tqdm(range(epochs)):
    train_loss, train_acc = train_step(model=model,
                                       dataloader=train_dataloader,
                                       loss_fn=loss_fn,
                                       optimizer=optimizer)
    test_loss, test_acc = test_step(model=model,
                                    dataloader=test_dataloader,
                                    loss_fn=loss_fn)
    # 4. Print out what's happening
    print(f"Epoch: {epoch+1} | "
    f"train_loss: {train_loss:.4f} | "
    f"train_acc: {train_acc:.4f} | "
    f"test_loss: {test_loss:.4f} | "
    f"test_acc: {test_acc:.4f}")

    # 5. Update results dictionary
    # We have to make sure all data is moved to CPU and converted to float for storage
    results["train_loss"].append(train_loss.item() if isinstance(train_loss, torch.Tensor) else train_loss)
    results["train_acc"].append(train_acc.item() if isinstance(train_acc, torch.Tensor) else train_acc)
    results["test_loss"].append(test_loss.item() if isinstance(test_loss, torch.Tensor) else test_loss)
    results["test_loss"].append(test_loss.item() if isinstance(test_loss, torch.Tensor) else test_loss)

  # 6. Return filled results at the end of the epoch
  return results


In [None]:
# Start the timer
start_time = timer()

# Train model
model_results = train(
    model=model,
    train_dataloader=train_loader,
    test_dataloader=test_loader,
    optimizer=optimizer,
    loss_fn=loss_fn,
    epochs=NUM_EPOCHS
)

# End the timer and print out how long it took
end_time = timer()
print(f"Total training time: {end_time-start_time:.3f} seconds.")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
jupyter nbconvert --ClearMetadataPreprocessor.enabled=True --ClearOutputPreprocessor.enabled=True --to notebook --inplace /content/drive/MyDrive/Colab Notebooks/GoogleNet_FineTune_Pre_Trained_Model.ipynb