In [1]:
import torch
import os
import numpy as np
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset
from torchvision import datasets, transforms, models
from torchvision.transforms import ToTensor
from matplotlib import pyplot as plt

In [2]:
from torchvision.io import read_image
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import nn
import torchvision.transforms as transforms

### Part 1: Datasets and Dataloaders

In [3]:
# Define dataset class
class IntelImageDataset(Dataset):
    """
    Custom class to wrap around Intel Image dataset.
    """
    def __init__(self, root_dir, split="seg_train", transform=None):
        """
        Init function for the class
        """
        self.root_dir = root_dir
        self.split = split
        self.transform = transform

        # Define class labels
        self.classes = ["buildings", "forest", "glacier", "mountain", "sea", "street"]
        self.class_indices =  {cls: idx for idx, cls in enumerate(self.classes)}

        # Init images and labels
        self.images = []
        self.labels = []

        self.data_dir = os.path.join(root_dir, split)
        for class_name in self.classes:
            class_dir = os.path.join(self.data_dir, class_name)
            
            # Handle non existant paths
            if not os.path.exists(class_dir):
                continue
            
            # Parse paths
            for image_path in os.listdir(class_dir):
                # if image_path.endswith(".jpg", ".jpeg", "png"):
                
                self.images.append(os.path.join(class_dir, image_path))
                self.labels.append(self.class_indices[class_name])

    def __len__(self):
        """
        Len member function
        """
        return len(self.images)


    def __getitem__(self, idx):
        """
        Get member function
        """
        if torch.is_tensor(idx):
            idx = idx.tolist()

        # Load image
        image_path = self.images[idx]
        image = Image.open(image_path).convert("RGB")
        label = self.labels[idx]

        # Apply transforms
        if self.transform:
            image = self.transform(image)

        return image, label

In [4]:
# TODO: Implement transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalization (ImageNet)
]) 

In [5]:
# Initialize Datasets
train_data = IntelImageDataset("./data/seg_train/", "seg_train/", transform)
test_data = IntelImageDataset("./data/seg_test/", "seg_test/", transform)

In [None]:
from torch.utils.data import random_split
# Initialize Dataloaders

dataset_size = len(train_data)
train_size = int(0.85 * dataset_size)
val_size = dataset_size - train_size

train_data, val_data = random_split(train_data, [train_size, val_size])

train_dataloader = DataLoader(train_data, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_data, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=32, shuffle=False)

### Part 2: Train Functions

In [7]:
def train_loop(dataloader, model, loss_fn, optimizer, device="cpu"):
    """
    Model training loop
    """
    model.train()
    size = len(dataloader.dataset)
    running_loss = 0

    for batch, (inputs, labels) in enumerate(dataloader):
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()

        running_loss += loss.item()

        if (batch+1) % 100 == 0:
            print(f"Step [{batch+1}/{len(dataloader)}], Loss: {running_loss/100:.4f}")
            # print(f"Epoch [{epoch+1}/{num_epochs}], Step [{batch+1}/{len(dataloader)}], "
            #       f"Loss: {running_loss / 100:.4f}")
            running_loss = 0.0

In [8]:
def test_loop(dataloader, model, loss_fn, device):
    """
    Model test loop
    """
    model.eval()
    size = len(dataloader)
    # test_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)

            pred = outputs
            _, pred = torch.max(outputs, dim=1) # TODO: check functionality
            

            # test_loss += loss_fn(pred, labels).item()
            total += labels.size(0)
            correct += (pred == labels).sum().item()
    
    # test_loss /= size
    correct /= total
    print(f"Test Error: \n Accuracy: {100*correct:>1f}%")

### Part 3: Transfer Learning

In [12]:
# First try with ResNet-50 as a fixed feature extractor
model_fixed = models.resnet50(pretrained=True)

for param in model_fixed.parameters():
    param.requires_grad = False

num_ftrs = model_fixed.fc.in_features
num_labels = 6  # TODO: make this dynamic
model_fixed.fc = nn.Linear(num_ftrs, num_labels)



In [None]:
criterion_fixed = nn.CrossEntropyLoss()
optimizer_fixed = torch.optim.SGD(model_fixed.fc.parameters(), lr=0.0001, momentum=0.9)

In [15]:
# Check for CUDA availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_fixed.to(device)
print(f"CUDA Available: {torch.cuda.is_available()}")

CUDA Available: True


In [None]:
num_epochs = 10
for epoch in range(num_epochs):
    print(f"Epoch: [{epoch+1}/{num_epochs}]")
    train_loop(train_dataloader, model_fixed, criterion_fixed, optimizer_fixed, device)
    test_loop(test_dataloader, model_fixed, criterion_fixed, device)

Epoch: [1/10]


KeyboardInterrupt: 

In [18]:
# Validation Accuracy
test_loop(val_dataloader, model, criterion, device)

Test Error: 
 Accuracy: 85.137702%


In [9]:
# Finetuning ResNet50
model = models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
num_labels = 6  # TODO: make this dynamic
model.fc = nn.Linear(num_ftrs, num_labels)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()
model.to(device)



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [10]:
num_epochs = 10
for epoch in range(num_epochs):
    print(f"Epoch: [{epoch+1}/{num_epochs}]")
    train_loop(train_dataloader, model, criterion, optimizer, device)
    test_loop(test_dataloader, model, criterion, device)

Epoch: [1/10]
Step [100/373], Loss: 1.8326
Step [200/373], Loss: 1.6776
Step [300/373], Loss: 1.5676
Test Error: 
 Accuracy: 65.933333%
Epoch: [2/10]
Step [100/373], Loss: 1.3759
Step [200/373], Loss: 1.2891
Step [300/373], Loss: 1.2031
Test Error: 
 Accuracy: 81.500000%
Epoch: [3/10]
Step [100/373], Loss: 1.0665
Step [200/373], Loss: 0.9967
Step [300/373], Loss: 0.9286
Test Error: 
 Accuracy: 85.433333%
Epoch: [4/10]
Step [100/373], Loss: 0.8279
Step [200/373], Loss: 0.7947
Step [300/373], Loss: 0.7431
Test Error: 
 Accuracy: 86.933333%
Epoch: [5/10]
Step [100/373], Loss: 0.6845
Step [200/373], Loss: 0.6412
Step [300/373], Loss: 0.6141
Test Error: 
 Accuracy: 88.033333%
Epoch: [6/10]
Step [100/373], Loss: 0.5592
Step [200/373], Loss: 0.5614
Step [300/373], Loss: 0.5167
Test Error: 
 Accuracy: 89.400000%
Epoch: [7/10]
Step [100/373], Loss: 0.4738
Step [200/373], Loss: 0.4568
Step [300/373], Loss: 0.4542
Test Error: 
 Accuracy: 89.266667%
Epoch: [8/10]
Step [100/373], Loss: 0.4213
Step 

In [11]:
# Validation Accuracy
test_loop(val_dataloader, model, criterion, device)

Test Error: 
 Accuracy: 89.981007%
