In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms
import torchvision.models as models
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import os
from collections import defaultdict
import random
from sklearn.preprocessing import StandardScaler

In [2]:
from LoadData import SceneDataset
from Models import ShallowCNN, ImprovedCNN, AlexNetTransfer, AlexNetFeatureExtractor, DAG_SVM
from ModelUtilities import train_model, evaluate_model, extract_features
from Utilities import plot_training_history, plot_confusion_matrix

In [3]:
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# Data transforms
# Basic transform for Task 1 (anisotropic rescaling)
basic_transform = transforms.Compose([
    transforms.Resize((64, 64)),  # Anisotropic rescaling
    transforms.Lambda(lambda img: torch.from_numpy(np.array(img)).permute(2, 0, 1).float())
    #transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Augmented transform for Task 2
augmented_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.RandomHorizontalFlip(p=0.5),  # Left-right reflection
    transforms.Lambda(lambda img: torch.from_numpy(np.array(img)).permute(2, 0, 1).float())
    #transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])


imagenet_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

Using device: cpu


In [None]:
train_dataset = SceneDataset('data/train', transform=basic_transform)
test_dataset = SceneDataset('data/test', transform=basic_transform)

In [None]:
train_size = int(0.85 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_subset, val_subset = random_split(train_dataset, [train_size, val_size])

# Create data loaders
train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
shallow_model = ShallowCNN(num_classes=15)
history = train_model(shallow_model, train_loader, val_loader, num_epochs=12, learning_rate=1e-2, 
                device='cpu', patience=10, lr_min=5e-3)
plot_training_history(history)

In [None]:
accuracy_shallow, cm_shallow, all_predictions_shallow, all_labels_shallow = evaluate_model(shallow_model, test_loader, device)

In [None]:
accuracy_shallow

In [None]:
plot_confusion_matrix(cm_shallow, np.unique(np.array(all_labels_shallow)))

In [None]:
train_dataset_improved = SceneDataset('data/train', transform=augmented_transform)
test_dataset_improved = SceneDataset('data/test', transform=augmented_transform)

In [None]:
train_size_improved = int(0.85 * len(train_dataset_improved))
val_size_improved = len(train_dataset_improved) - train_size_improved
train_subset_improved, val_subset_improved = random_split(train_dataset_improved, [train_size_improved, val_size_improved])

# Create data loaders
train_loader_improved = DataLoader(train_subset_improved, batch_size=32, shuffle=True)
val_loader_improved = DataLoader(val_subset_improved, batch_size=32, shuffle=False)
test_loader_improved = DataLoader(test_dataset_improved, batch_size=32, shuffle=False)

In [None]:
improved_model = ImprovedCNN(num_classes=15)
history = train_model(improved_model, train_loader_improved, val_loader_improved, num_epochs=12, learning_rate=5e-3, 
                device='cpu', patience=10, lr_min=5e-4)
plot_training_history(history)

In [None]:
accuracy_improved, cm_improved, all_predictions_improved, all_labels_improved = evaluate_model(improved_model, test_loader_improved, device)

In [None]:
accuracy_improved

In [None]:
plot_confusion_matrix(cm_improved, np.unique(np.array(all_labels_improved)))

In [5]:
train_dataset_imn = SceneDataset('data/train', transform=imagenet_transform)
test_dataset_imn  = SceneDataset('data/test', transform=imagenet_transform)

In [6]:
train_size_imn = int(0.85 * len(train_dataset_imn))
val_size_imn = len(train_dataset_imn) - train_size_imn
train_subset_imn, val_subset_imn = random_split(train_dataset_imn, [train_size_imn, val_size_imn])

# Create data loaders
train_loader_imn = DataLoader(train_subset_imn, batch_size=32, shuffle=True)
val_loader_imn = DataLoader(val_subset_imn, batch_size=32, shuffle=False)
test_loader_imn = DataLoader(test_dataset_imn, batch_size=32, shuffle=False)

In [None]:
# Initialize model
model_imn = AlexNetTransfer(num_classes=15, freeze_features=True)
model_imn = model_imn.to(device)

In [None]:
history = train_model(model_imn, train_loader_imn, val_loader_imn, num_epochs=12, learning_rate=5e-3, 
                device='cpu', patience=10, lr_min=5e-4)
plot_training_history(history)

In [None]:
accuracy_imn, cm_imn, all_predictions_imn, all_labels_imn = evaluate_model(model_imn, test_loader_imn, device)

In [None]:
accuracy_imn

In [None]:
plot_confusion_matrix(cm_imn, np.unique(np.array(all_labels_imn)))

In [7]:
feature_extractor = AlexNetFeatureExtractor(layer_name='fc2')  # Using fc2 layer (4096 features)
feature_extractor = feature_extractor.to(device)



In [8]:
train_features, train_labels = extract_features(feature_extractor, train_loader_imn, device)

val_features, val_labels = extract_features(feature_extractor, val_loader_imn, device)

test_features, test_labels = extract_features(feature_extractor, test_loader_imn, device)


Extracting features from 40 batches...
Processed batch 0/40
Extracting features from 8 batches...
Processed batch 0/8
Extracting features from 94 batches...
Processed batch 0/94
Processed batch 50/94


In [9]:
scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
val_features_scaled = scaler.transform(val_features)
test_features_scaled = scaler.transform(test_features)

In [10]:
dag_svm = DAG_SVM(kernel='rbf')
dag_svm.fit(train_features_scaled, train_labels)

Training 105 binary SVM classifiers...
Training classifier 1/105: Class 0 vs Class 1
Training classifier 2/105: Class 0 vs Class 2
Training classifier 3/105: Class 0 vs Class 3
Training classifier 4/105: Class 0 vs Class 4
Training classifier 5/105: Class 0 vs Class 5
Training classifier 6/105: Class 0 vs Class 6
Training classifier 7/105: Class 0 vs Class 7
Training classifier 8/105: Class 0 vs Class 8
Training classifier 9/105: Class 0 vs Class 9
Training classifier 10/105: Class 0 vs Class 10
Training classifier 11/105: Class 0 vs Class 11
Training classifier 12/105: Class 0 vs Class 12
Training classifier 13/105: Class 0 vs Class 13
Training classifier 14/105: Class 0 vs Class 14
Training classifier 15/105: Class 1 vs Class 2
Training classifier 16/105: Class 1 vs Class 3
Training classifier 17/105: Class 1 vs Class 4
Training classifier 18/105: Class 1 vs Class 5
Training classifier 19/105: Class 1 vs Class 6
Training classifier 20/105: Class 1 vs Class 7
Training classifier 21/10

In [12]:
dag_test_predictions = dag_svm.predict(test_features_scaled)

dag_test_acc = accuracy_score(test_labels, dag_test_predictions)


In [13]:
dag_test_acc

0.8036850921273032