# Intel Image Classification with Transfer Learning

In this notebook, two different models using transfer learning are used to perform image classification on the Intel Image Classification dataset.

In [1]:
import numpy as np
import os
import pandas as pd
from PIL import Image
import torch
import torchvision.transforms as transforms
from matplotlib import pyplot as plt
from torch import nn
from torchvision import datasets, transforms, models
from torchvision.io import read_image
from torchvision.transforms import ToTensor
from torch.utils.data import Dataset, DataLoader, random_split

### Part 1: Datasets and Dataloaders

In [2]:
# Define dataset class
class IntelImageDataset(Dataset):
    """
    Custom class to wrap around Intel Image dataset.
    """
    def __init__(self, root_dir, split="seg_train", transform=None):
        """
        Init function for the class
        """
        self.root_dir = root_dir
        self.split = split
        self.transform = transform

        # Define class labels
        self.classes = ["buildings", "forest", "glacier", "mountain", "sea", "street"]
        self.class_indices =  {cls: idx for idx, cls in enumerate(self.classes)}

        # Init images and labels
        self.images = []
        self.labels = []

        self.data_dir = os.path.join(root_dir, split)
        for class_name in self.classes:
            class_dir = os.path.join(self.data_dir, class_name)
            
            # Handle non existant paths
            if not os.path.exists(class_dir):
                continue
            
            # Parse paths
            for image_path in os.listdir(class_dir):                
                self.images.append(os.path.join(class_dir, image_path))
                self.labels.append(self.class_indices[class_name])

    def __len__(self):
        """
        Len member function
        """
        return len(self.images)


    def __getitem__(self, idx):
        """
        Get member function
        """
        if torch.is_tensor(idx):
            idx = idx.tolist()

        # Load image
        image_path = self.images[idx]
        image = Image.open(image_path).convert("RGB")
        label = self.labels[idx]

        # Apply transforms
        if self.transform:
            image = self.transform(image)

        return image, label

In [3]:
# Transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalization
]) 

In [4]:
# Initialize Datasets
train_data = IntelImageDataset("./data/seg_train/", "seg_train/", transform)
test_data = IntelImageDataset("./data/seg_test/", "seg_test/", transform)

In [5]:
# Create split for train and validation
dataset_size = len(train_data)
train_size = int(0.85 * dataset_size)
val_size = dataset_size - train_size
train_data, val_data = random_split(train_data, [train_size, val_size])

# Initialize Dataloaders
train_dataloader = DataLoader(train_data, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_data, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=32, shuffle=False)

### Part 2: Train Functions

In [6]:
def train_loop(dataloader, model, loss_fn, optimizer, device="cpu"):
    """
    Model training loop
    """
    model.train()
    size = len(dataloader.dataset)
    running_loss = 0

    for batch, (inputs, labels) in enumerate(dataloader):
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        loss = loss_fn(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()

        running_loss += loss.item()

        if (batch+1) % 100 == 0:
            print(f"Step [{batch+1}/{len(dataloader)}], Loss: {running_loss/100:.4f}")
            running_loss = 0.0

In [7]:
def test_loop(dataloader, model, loss_fn, device):
    """
    Model test loop
    """
    model.eval()
    size = len(dataloader)
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)

            pred = outputs
            _, pred = torch.max(outputs, dim=1)
            
            total += labels.size(0)
            correct += (pred == labels).sum().item()
    
    correct /= total
    print(f"Accuracy: {100*correct:>1f}%")

### Part 3: Transfer Learning

In [8]:
# First try with ResNet-50 as a fixed feature extractor
model_fixed = models.resnet50(pretrained=True)

# Freeze model parameters
for param in model_fixed.parameters():
    param.requires_grad = False

# Set outputlayer to be compatible with Intel Image 
num_ftrs = model_fixed.fc.in_features
num_labels = len(test_data.classes)
model_fixed.fc = nn.Linear(num_ftrs, num_labels)



In [9]:
# Initialize loss function and optimizer
criterion_fixed = nn.CrossEntropyLoss()
optimizer_fixed = torch.optim.SGD(model_fixed.fc.parameters(), lr=0.0001, momentum=0.9)

In [10]:
# Check for CUDA availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_fixed.to(device)
print(f"CUDA Available: {torch.cuda.is_available()}")

CUDA Available: True


In [11]:
# Run training/test loops
num_epochs = 10
for epoch in range(num_epochs):
    print(f"Epoch: [{epoch+1}/{num_epochs}]")
    train_loop(train_dataloader, model_fixed, criterion_fixed, optimizer_fixed, device)
    test_loop(test_dataloader, model_fixed, criterion_fixed, device)

# Save out model
torch.save(model_fixed.state_dict(), "model_fixed.pth")

Epoch: [1/10]
Step [100/373], Loss: 1.8117
Step [200/373], Loss: 1.6536
Step [300/373], Loss: 1.5342
Accuracy: 69.566667%
Epoch: [2/10]
Step [100/373], Loss: 1.3511
Step [200/373], Loss: 1.2717
Step [300/373], Loss: 1.1923
Accuracy: 82.366667%
Epoch: [3/10]
Step [100/373], Loss: 1.0900
Step [200/373], Loss: 1.0096
Step [300/373], Loss: 0.9597
Accuracy: 85.533333%
Epoch: [4/10]
Step [100/373], Loss: 0.8883
Step [200/373], Loss: 0.8425
Step [300/373], Loss: 0.8178
Accuracy: 86.900000%
Epoch: [5/10]
Step [100/373], Loss: 0.7648
Step [200/373], Loss: 0.7353
Step [300/373], Loss: 0.7134
Accuracy: 86.966667%
Epoch: [6/10]
Step [100/373], Loss: 0.6614
Step [200/373], Loss: 0.6454
Step [300/373], Loss: 0.6471
Accuracy: 87.800000%
Epoch: [7/10]
Step [100/373], Loss: 0.6107
Step [200/373], Loss: 0.5853
Step [300/373], Loss: 0.5832
Accuracy: 87.733333%
Epoch: [8/10]
Step [100/373], Loss: 0.5592
Step [200/373], Loss: 0.5414
Step [300/373], Loss: 0.5452
Accuracy: 88.366667%
Epoch: [9/10]
Step [100/

In [12]:
# Cuda management
torch.cuda.init()
torch.cuda.synchronize()

# Finetuning ResNet50
model_finetuning = models.resnet50(pretrained=True)

# Set outputlayer to be compatible with Intel Image 
num_ftrs = model_finetuning.fc.in_features
num_labels = len(test_data.classes)
model_finetuning.fc = nn.Linear(num_ftrs, num_labels)

# Initialize loss function and optimizer
criterion_finetuning = nn.CrossEntropyLoss()
optimizer_finetuning = torch.optim.SGD(model_finetuning.parameters(), lr=0.0001, momentum=0.9)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()
model_finetuning.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [13]:
# Run training/test loop
num_epochs = 10
for epoch in range(num_epochs):
    print(f"Epoch: [{epoch+1}/{num_epochs}]")
    train_loop(train_dataloader, model_finetuning, criterion_finetuning, optimizer_finetuning, device)
    test_loop(test_dataloader, model_finetuning, criterion_finetuning, device)

# Save out fine-tuned model
torch.save(model_finetuning.state_dict(), "model_finetuning.pth")

Epoch: [1/10]
Step [100/373], Loss: 1.7574
Step [200/373], Loss: 1.6242
Step [300/373], Loss: 1.5069
Accuracy: 72.433333%
Epoch: [2/10]
Step [100/373], Loss: 1.3254
Step [200/373], Loss: 1.2341
Step [300/373], Loss: 1.1498
Accuracy: 83.366667%
Epoch: [3/10]
Step [100/373], Loss: 1.0298
Step [200/373], Loss: 0.9591
Step [300/373], Loss: 0.9081
Accuracy: 86.066667%
Epoch: [4/10]
Step [100/373], Loss: 0.8097
Step [200/373], Loss: 0.7682
Step [300/373], Loss: 0.7178
Accuracy: 88.333333%
Epoch: [5/10]
Step [100/373], Loss: 0.6596
Step [200/373], Loss: 0.6282
Step [300/373], Loss: 0.5877
Accuracy: 89.266667%
Epoch: [6/10]
Step [100/373], Loss: 0.5451
Step [200/373], Loss: 0.5184
Step [300/373], Loss: 0.5008
Accuracy: 89.666667%
Epoch: [7/10]
Step [100/373], Loss: 0.4708
Step [200/373], Loss: 0.4634
Step [300/373], Loss: 0.4334
Accuracy: 89.733333%
Epoch: [8/10]
Step [100/373], Loss: 0.4181
Step [200/373], Loss: 0.4004
Step [300/373], Loss: 0.4055
Accuracy: 90.533333%
Epoch: [9/10]
Step [100/

In [14]:
# Validation Comparison

# Fixed Validation Accuracy
print(f"Fixed Feature Extractor")
fixed_val_accuracy = test_loop(val_dataloader, model_fixed, criterion_fixed, device)

# Fintuned Validation Accuracy
print(f"Finetuned ResNet Model")
finetuned_val_accuracy = test_loop(val_dataloader, model_finetuning, criterion_finetuning, device)

"""
Fixed Model Accuracy: 89.60%
Finetuned Model Accuracy: 92.07%

As expected, the finetuning the entire ResNet model resulted in an overall higher final accuracy.
However, the fixed feature extractor performed quite well and finetuning only resulted in an
increase in approximately 3%. This demonstrates the significant potential for transfer learning.
"""


Fixed Feature Extractor
Accuracy: 88.461538%
Finetuned ResNet Model
Accuracy: 91.358025%


'\nFixed Model Accuracy: 89.60%\nFinetuned Model Accuracy: 92.07%\n\nAs expected, the finetuning the entire ResNet model resulted in an overall higher final accuracy.\nHowever, the fixed feature extractor performed quite well and finetuning only resulted in an\nincrease in approximately 3%. This demonstrates the significant potential for transfer learning.\n'

Both transfer learning models performed significantly well, achieving 89.60% and 92.07% accuracy respectively. Future work would include training the models over more epochs and comparing the perfromance of a non pretrained model.