In [1]:
import torch
import os
import numpy as np
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset
from torchvision import datasets, transforms, models
from torchvision.transforms import ToTensor
from matplotlib import pyplot as plt

In [2]:
from torchvision.io import read_image
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import nn
import torchvision.transforms as transforms

### Part 1: Datasets and Dataloaders

In [3]:
# Define dataset class
class IntelImageDataset(Dataset):
    """
    Custom class to wrap around Intel Image dataset.
    """
    def __init__(self, root_dir, split="seg_train", transform=None):
        """
        Init function for the class
        """
        self.root_dir = root_dir
        self.split = split
        self.transform = transform

        # Define class labels
        self.classes = ["buildings", "forest", "glacier", "mountain", "sea", "street"]
        self.class_indices =  {cls: idx for idx, cls in enumerate(self.classes)}

        # Init images and labels
        self.images = []
        self.labels = []

        self.data_dir = os.path.join(root_dir, split)
        for class_name in self.classes:
            class_dir = os.path.join(self.data_dir, class_name)
            
            # Handle non existant paths
            if not os.path.exists(class_dir):
                continue
            
            # Parse paths
            for image_path in os.listdir(class_dir):
                # if image_path.endswith(".jpg", ".jpeg", "png"):
                
                self.images.append(os.path.join(class_dir, image_path))
                self.labels.append(self.class_indices[class_name])

    def __len__(self):
        """
        Len member function
        """
        return len(self.images)


    def __getitem__(self, idx):
        """
        Get member function
        """
        if torch.is_tensor(idx):
            idx = idx.tolist()

        # Load image
        image_path = self.images[idx]
        image = Image.open(image_path).convert("RGB")
        label = self.labels[idx]

        # Apply transforms
        if self.transform:
            image = self.transform(image)

        return image, label

In [4]:
# TODO: Implement transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalization (ImageNet)
]) 

In [13]:
# Initialize Datasets
train_data = IntelImageDataset("./data/seg_train/", "seg_train/", transform)
test_data = IntelImageDataset("./data/seg_test/", "seg_test/", transform)

In [14]:
from torch.utils.data import random_split
# Initialize Dataloaders
# indices = list(range(len(train_data.labels)))
# np.random.shuffle(indices)



dataset_size = len(train_data)
train_size = int(0.85 * dataset_size)
val_size = dataset_size - train_size
# train_indices = indices[:split]
# val_indices = indices[split:]

train_data, val_data = random_split(train_data, [train_size, val_size])

# train_sampler = SubsetRandomSampler(train_indices)
# val_sampler = SubsetRandomSampler(val_indices)

train_dataloader = DataLoader(train_data, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_data, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=32, shuffle=False)

### Part 2: Train Functions

In [15]:
def train_loop(dataloader, model, loss_fn, optimizer, device="cpu"):
    """
    Model training loop
    """
    model.train()
    size = len(dataloader.dataset)
    running_loss = 0

    for batch, (inputs, labels) in enumerate(dataloader):
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()

        running_loss += loss.item()

        if (batch+1) % 100 == 0:
            print(f"Step [{batch+1}/{len(dataloader)}], Loss: {running_loss/100:.4f}")
            # print(f"Epoch [{epoch+1}/{num_epochs}], Step [{batch+1}/{len(dataloader)}], "
            #       f"Loss: {running_loss / 100:.4f}")
            running_loss = 0.0

In [16]:
def test_loop(dataloader, model, loss_fn, device):
    """
    Model test loop
    """
    model.eval()
    size = len(dataloader)
    # test_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)

            pred = outputs
            _, pred = torch.max(outputs, dim=1) # TODO: check functionality
            

            # test_loss += loss_fn(pred, labels).item()
            total += labels.size(0)
            correct += (pred == labels).sum().item()
    
    # test_loss /= size
    correct /= total
    print(f"Test Error: \n Accuracy: {100*correct:>1f}%")

### Part 3: Transfer Learning

In [17]:
# First try with ResNet-50
model = models.resnet50(pretrained=True)

num_ftrs = model.fc.in_features
num_labels = 6  # TODO: make this dynamic
model.fc = nn.Linear(num_ftrs, num_labels)

In [18]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)

In [19]:
# Check for CUDA availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"CUDA Available: {torch.cuda.is_available()}")

CUDA Available: True


In [20]:
num_epochs = 10
for epoch in range(num_epochs):
    print(f"Epoch: [{epoch+1}/{num_epochs}]")
    train_loop(train_dataloader, model, criterion, optimizer, device)
    test_loop(test_dataloader, model, criterion, device)

Epoch: [1/10]


Step [100/373], Loss: 1.7907
Step [200/373], Loss: 1.6560
Step [300/373], Loss: 1.5360
Test Error: 
 Accuracy: 65.800000%
Epoch: [2/10]
Step [100/373], Loss: 1.3472
Step [200/373], Loss: 1.2633
Step [300/373], Loss: 1.1733
Test Error: 
 Accuracy: 81.433333%
Epoch: [3/10]
Step [100/373], Loss: 1.0609
Step [200/373], Loss: 0.9892
Step [300/373], Loss: 0.9336
Test Error: 
 Accuracy: 86.066667%
Epoch: [4/10]
Step [100/373], Loss: 0.8447
Step [200/373], Loss: 0.8105
Step [300/373], Loss: 0.7685
Test Error: 
 Accuracy: 86.933333%
Epoch: [5/10]
Step [100/373], Loss: 0.7102
Step [200/373], Loss: 0.6618
Step [300/373], Loss: 0.6393
Test Error: 
 Accuracy: 88.400000%
Epoch: [6/10]
Step [100/373], Loss: 0.5907
Step [200/373], Loss: 0.5727
Step [300/373], Loss: 0.5544
Test Error: 
 Accuracy: 89.033333%
Epoch: [7/10]
Step [100/373], Loss: 0.5129
Step [200/373], Loss: 0.4978
Step [300/373], Loss: 0.4775
Test Error: 
 Accuracy: 89.466667%
Epoch: [8/10]
Step [100/373], Loss: 0.4565
Step [200/373], Los

In [21]:
# Validation Accuracy
test_loop(val_dataloader, model, criterion, device)

Test Error: 
 Accuracy: 91.737892%
