In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
import os
import copy
import numpy as np

In [2]:
# ================= CONFIGURATION =================
CLEAN_DATA_DIR = "./ImageNet_images"
LOW_LIGHT_DIR = "./Distorted_Images/Low_Light"
WATER_OCCLUSION_DIR = "./Distorted_Images/Water_Occlusion"

MODEL_SAVE_PATH = "resnet50_clean_baseline2.pth"

BATCH_SIZE = 32
NUM_EPOCHS = 10 
LEARNING_RATE = 0.001
NUM_CLASSES = 5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# =================================================

In [3]:
def get_transforms():
    # Standard ImageNet stats
    normalize = transforms.Normalize([0.485, 0.456, 0.406], 
                                     [0.229, 0.224, 0.225])
    
    return {
        'train': transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize
        ]),
        'val': transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize
        ]),
    }

In [4]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    print(f"\n[Step 1] Starting Training on CLEAN data for {num_epochs} epochs...")
    best_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        running_corrects = 0

        # Training Phase
        for inputs, labels in train_loader:
            inputs = inputs.to(DEVICE)
            labels = labels.to(DEVICE)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = running_corrects.double() / len(train_loader.dataset)

        # Validation Phase (Check against Clean Baseline)
        model.eval()
        val_corrects = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(DEVICE)
                labels = labels.to(DEVICE)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                val_corrects += torch.sum(preds == labels.data)
        
        val_acc = val_corrects.double() / len(val_loader.dataset)

        print(f'Epoch {epoch+1}/{num_epochs} | Train Acc: {epoch_acc:.4f} | Val Acc (Clean): {val_acc:.4f}')

        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())

    print(f"Training Complete. Best Clean Accuracy: {best_acc:.4f}")
    model.load_state_dict(best_model_wts)
    return model

In [5]:
def evaluate_model(model, dataloader, description):
    model.eval()
    corrects = 0
    total = 0
    
    print(f"--> Evaluating on {description}...")
    
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(DEVICE)
            labels = labels.to(DEVICE)
            
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            
            corrects += torch.sum(preds == labels.data)
            total += inputs.size(0)
            
    acc = corrects.double() / total
    print(f"    Accuracy: {acc:.4f} ({corrects}/{total})")
    return acc.item()

## MAIN EXECUTION 

In [6]:
# --- 1. PREPARE DATA ---
data_transforms = get_transforms()

print("--- DATA PREPARATION ---")

# A. CLEAN DATA (Train & Val)
if os.path.exists(CLEAN_DATA_DIR):
    full_clean_dataset_train = datasets.ImageFolder(CLEAN_DATA_DIR, transform=data_transforms['train'])
    full_clean_dataset_val   = datasets.ImageFolder(CLEAN_DATA_DIR, transform=data_transforms['val'])
    
    # Split Clean Data into Train (80%) and Validation (20%)
    targets = full_clean_dataset_train.targets
    train_idx, val_idx = train_test_split(
        np.arange(len(targets)), 
        test_size=0.2, 
        random_state=42, 
        stratify=targets
    )
    
    train_subset     = Subset(full_clean_dataset_train, train_idx)
    clean_val_subset = Subset(full_clean_dataset_val, val_idx)
    
    train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True)
    clean_val_loader = DataLoader(clean_val_subset, batch_size=BATCH_SIZE, shuffle=False)
    
    print(f"Clean Images Found: {len(full_clean_dataset_train)}")
    print(f"--> Training set: {len(train_subset)} images")
    print(f"--> Clean Validation set (Baseline R): {len(clean_val_subset)} images")
else:
    raise FileNotFoundError(f"Clean data directory not found at {CLEAN_DATA_DIR}")

--- DATA PREPARATION ---
Clean Images Found: 610
--> Training set: 488 images
--> Clean Validation set (Baseline R): 122 images


In [7]:
# B. LOW LIGHT DATA
low_light_loader = None
if os.path.exists(LOW_LIGHT_DIR):
    low_light_dataset = datasets.ImageFolder(LOW_LIGHT_DIR, transform=data_transforms['val'])
    low_light_loader = DataLoader(low_light_dataset, batch_size=BATCH_SIZE, shuffle=False)
    print(f"--> Low Light Test set: {len(low_light_dataset)} images")
else:
    print(f"WARNING: Low Light directory {LOW_LIGHT_DIR} not found. Skipping Low Light eval.")

--> Low Light Test set: 684 images


In [8]:
# C. WATER OCCLUSION DATA
water_loader = None
if os.path.exists(WATER_OCCLUSION_DIR):
    water_dataset = datasets.ImageFolder(WATER_OCCLUSION_DIR, transform=data_transforms['val'])
    water_loader = DataLoader(water_dataset, batch_size=BATCH_SIZE, shuffle=False)
    print(f"--> Water Occlusion Test set: {len(water_dataset)} images")
else:
    print(f"WARNING: Water Occlusion directory {WATER_OCCLUSION_DIR} not found. Skipping Water eval.")

--> Water Occlusion Test set: 530 images


In [9]:
# --- 2. INITIALIZE & TRAIN MODEL ---
print("\n--- MODEL TRAINING ---")
print("Initializing ResNet-50...")
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)

# Replace final layer
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, NUM_CLASSES)
model = model.to(DEVICE)

# Define Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)

# Train
model = train_model(model, train_loader, clean_val_loader, criterion, optimizer, NUM_EPOCHS)

# Save Model
torch.save(model.state_dict(), MODEL_SAVE_PATH)
print(f"Model saved to {MODEL_SAVE_PATH}")


--- MODEL TRAINING ---
Initializing ResNet-50...

[Step 1] Starting Training on CLEAN data for 10 epochs...
Epoch 1/10 | Train Acc: 0.5984 | Val Acc (Clean): 0.9426
Epoch 2/10 | Train Acc: 0.8996 | Val Acc (Clean): 0.9590
Epoch 3/10 | Train Acc: 0.9221 | Val Acc (Clean): 0.9590
Epoch 4/10 | Train Acc: 0.9549 | Val Acc (Clean): 0.9590
Epoch 5/10 | Train Acc: 0.9365 | Val Acc (Clean): 0.9672
Epoch 6/10 | Train Acc: 0.9549 | Val Acc (Clean): 0.9754
Epoch 7/10 | Train Acc: 0.9344 | Val Acc (Clean): 0.9918
Epoch 8/10 | Train Acc: 0.9693 | Val Acc (Clean): 0.9918
Epoch 9/10 | Train Acc: 0.9508 | Val Acc (Clean): 0.9836
Epoch 10/10 | Train Acc: 0.9734 | Val Acc (Clean): 0.9918
Training Complete. Best Clean Accuracy: 0.9918
Model saved to resnet50_clean_baseline2.pth


In [10]:
# --- 3. COMPREHENSIVE EVALUATION ---
print("\n" + "="*60)
print("FINAL RESULTS: COMPREHENSIVE DEGRADATION ANALYSIS")
print("="*60)

# 1. Baseline Accuracy (R)
acc_clean = evaluate_model(model, clean_val_loader, "Clean Validation Set (Baseline R)")

# 2. Low Light Accuracy (R'1) & Drop
if low_light_loader:
    print("-" * 30)
    acc_low_light = evaluate_model(model, low_light_loader, "Low Light Set (Degraded R')")
    drop_low = acc_clean - acc_low_light
    print(f"PERFORMANCE DROP (Low Light): {drop_low*100:.2f}%")

# 3. Water Occlusion Accuracy (R'2) & Drop
if water_loader:
    print("-" * 30)
    acc_water = evaluate_model(model, water_loader, "Water Occlusion Set (Degraded R'')")
    drop_water = acc_clean - acc_water
    print(f"PERFORMANCE DROP (Water):     {drop_water*100:.2f}%")

print("="*60)
print(f"Baseline Clean Accuracy:   {acc_clean*100:.2f}%")
if low_light_loader:
    print(f"Low Light Accuracy:        {acc_low_light*100:.2f}%  (Drop: {drop_low*100:.2f}%)")
if water_loader:
    print(f"Water Occlusion Accuracy:  {acc_water*100:.2f}%  (Drop: {drop_water*100:.2f}%)")
print("="*60)


FINAL RESULTS: COMPREHENSIVE DEGRADATION ANALYSIS
--> Evaluating on Clean Validation Set (Baseline R)...
    Accuracy: 0.9918 (121/122)
------------------------------
--> Evaluating on Low Light Set (Degraded R')...
    Accuracy: 0.8450 (578/684)
PERFORMANCE DROP (Low Light): 14.68%
------------------------------
--> Evaluating on Water Occlusion Set (Degraded R'')...
    Accuracy: 0.4943 (262/530)
PERFORMANCE DROP (Water):     49.75%
Baseline Clean Accuracy:   99.18%
Low Light Accuracy:        84.50%  (Drop: 14.68%)
Water Occlusion Accuracy:  49.43%  (Drop: 49.75%)
