In [2]:
# prompt: mount drive

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Pre-train CNN -Resnet18 on Degrade Dataset

In [3]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import torch.optim as optim
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from datetime import datetime
import os

In [4]:
# 1. Load CSV
df = pd.read_csv('/content/drive/MyDrive/AML-PROJECT/iris_degrade.csv')

# 2. Extract ID
df['ID'] = df['Label'].str.extract(r'(\d+)', expand=False)

# Count the number of occurrences of each ID (i.e. the number of samples per person)
id_counts = df['ID'].value_counts()

# Select the IDs with occurrences ≥ 10, and take the first 100
valid_ids = id_counts[id_counts >= 10].head(100).index

# Filter data for these IDs
df = df[df['ID'].isin(valid_ids)]

# Optional: Save filtered train/val
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['ID'], random_state=42)

# 3. Encode labels
le = LabelEncoder()
train_df['encoded_label'] = le.fit_transform(train_df['Label'])
val_df['encoded_label'] = le.transform(val_df['Label'])

# 4. Dataset class
class IrisDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.iloc[idx]['Path']
        label = self.df.iloc[idx]['encoded_label']
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

# 5.Define transformations and loaders
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

train_dataset = IrisDataset(train_df, transform=transform)
val_dataset = IrisDataset(val_df, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


# 6. Load ResNet18 and modify final layer
model = models.resnet18(pretrained=True)
num_classes = len(le.classes_)
model.fc = nn.Linear(model.fc.in_features, num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)


# 7.Loss function
# Define Focal Loss
class FocalLoss(nn.Module):
    def __init__(self, gamma=2.0):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.ce = nn.CrossEntropyLoss(reduction='none')

    def forward(self, input, target):
        ce_loss = self.ce(input, target)
        pt = torch.exp(-ce_loss)
        focal_loss = ((1 - pt) ** self.gamma) * ce_loss
        return focal_loss.mean()

# Define Label Smoothing Loss
class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes, smoothing=0.1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=-1)
        with torch.no_grad():
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=-1))

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 177MB/s]


In [5]:
# 8. Evaluation Function
def evaluate(model, loader, device):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            outputs = model(x)
            preds = outputs.argmax(dim=1)
            y_true.extend(y.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
    return {
        'accuracy': accuracy_score(y_true, y_pred),
        'f1': f1_score(y_true, y_pred, average='weighted',zero_division=0),
        'precision': precision_score(y_true, y_pred, average='weighted',zero_division=0),
        'recall': recall_score(y_true, y_pred, average='weighted',zero_division=0)
    }

#9. Training Function
def train_resnet18(loss_fn, optimizer_cls, epochs=5):
    model = models.resnet18(pretrained=True)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    model = model.to(device)

    optimizer = optimizer_cls(model.parameters(), lr=1e-3)

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for x, y in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(x)
            loss = loss_fn(outputs, y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        metrics = evaluate(model, val_loader, device)
        print(f"Epoch {epoch+1} | Loss: {total_loss/len(train_loader):.4f} | "
              f"Acc: {metrics['accuracy']:.4f} | F1: {metrics['f1']:.4f}")

    final_metrics = evaluate(model, val_loader, device)


    timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
    model_name = f"resnet18__{loss_fn.__class__.__name__}__{optimizer_cls.__name__}__{timestamp}.pth"

    save_dir = "/content/drive/MyDrive/AML-PROJECT/pretrained_cnn_Ly/Resnet18_Degrade"
    os.makedirs(save_dir, exist_ok=True)
    model_path = os.path.join(save_dir, model_name)

    torch.save(model.state_dict(), model_path)
    print(f" Saved model to: {model_path}")


    return {
        'loss_fn': loss_fn.__class__.__name__,
        'optimizer': optimizer_cls.__name__ if hasattr(optimizer_cls, '__name__') else str(optimizer_cls),
        'accuracy': final_metrics['accuracy'],
        'f1': final_metrics['f1'],
        'precision': final_metrics['precision'],
        'recall': final_metrics['recall'],
        'timestamp': datetime.now().isoformat()
    }

In [6]:
#10. Run Experiments
loss_fns = [
    nn.CrossEntropyLoss(),
    LabelSmoothingLoss(classes=num_classes),
    FocalLoss(gamma=2)
]

optimizers = [optim.Adam, optim.SGD]

results = []

for loss_fn in loss_fns:
    for opt in optimizers:
        try:
            print(f"\nRunning: {loss_fn.__class__.__name__} + {opt.__name__}")
            result = train_resnet18(loss_fn, opt, epochs=5)
            results.append(result)
        except Exception as e:
            print(f"[ERROR] Skipped: {loss_fn}, {opt} — {e}")


Running: CrossEntropyLoss + Adam


Epoch 1/5: 100%|██████████| 50/50 [09:11<00:00, 11.03s/it]


Epoch 1 | Loss: 4.7159 | Acc: 0.2325 | F1: 0.1949


Epoch 2/5: 100%|██████████| 50/50 [00:15<00:00,  3.29it/s]


Epoch 2 | Loss: 1.7428 | Acc: 0.7475 | F1: 0.7253


Epoch 3/5: 100%|██████████| 50/50 [00:15<00:00,  3.28it/s]


Epoch 3 | Loss: 0.3406 | Acc: 0.9575 | F1: 0.9540


Epoch 4/5: 100%|██████████| 50/50 [00:15<00:00,  3.28it/s]


Epoch 4 | Loss: 0.0534 | Acc: 0.9900 | F1: 0.9882


Epoch 5/5: 100%|██████████| 50/50 [00:15<00:00,  3.29it/s]


Epoch 5 | Loss: 0.0139 | Acc: 0.9950 | F1: 0.9948




 Saved model to: /content/drive/MyDrive/AML-PROJECT/pretrained_cnn_Ly/Resnet18_Degrade/resnet18__CrossEntropyLoss__Adam__20250705-163214.pth

Running: CrossEntropyLoss + SGD


Epoch 1/5: 100%|██████████| 50/50 [00:15<00:00,  3.29it/s]


Epoch 1 | Loss: 5.4472 | Acc: 0.0025 | F1: 0.0010


Epoch 2/5: 100%|██████████| 50/50 [00:15<00:00,  3.28it/s]


Epoch 2 | Loss: 5.3449 | Acc: 0.0000 | F1: 0.0000


Epoch 3/5: 100%|██████████| 50/50 [00:15<00:00,  3.28it/s]


Epoch 3 | Loss: 5.2451 | Acc: 0.0150 | F1: 0.0108


Epoch 4/5: 100%|██████████| 50/50 [00:15<00:00,  3.27it/s]


Epoch 4 | Loss: 5.1513 | Acc: 0.0075 | F1: 0.0058


Epoch 5/5: 100%|██████████| 50/50 [00:15<00:00,  3.27it/s]


Epoch 5 | Loss: 5.0623 | Acc: 0.0225 | F1: 0.0162




 Saved model to: /content/drive/MyDrive/AML-PROJECT/pretrained_cnn_Ly/Resnet18_Degrade/resnet18__CrossEntropyLoss__SGD__20250705-163352.pth

Running: LabelSmoothingLoss + Adam


Epoch 1/5: 100%|██████████| 50/50 [00:15<00:00,  3.25it/s]


Epoch 1 | Loss: 4.5854 | Acc: 0.3500 | F1: 0.2719


Epoch 2/5: 100%|██████████| 50/50 [00:15<00:00,  3.26it/s]


Epoch 2 | Loss: 2.0273 | Acc: 0.8300 | F1: 0.8157


Epoch 3/5: 100%|██████████| 50/50 [00:15<00:00,  3.30it/s]


Epoch 3 | Loss: 1.1614 | Acc: 0.9475 | F1: 0.9429


Epoch 4/5: 100%|██████████| 50/50 [00:15<00:00,  3.29it/s]


Epoch 4 | Loss: 1.0284 | Acc: 0.9725 | F1: 0.9680


Epoch 5/5: 100%|██████████| 50/50 [00:15<00:00,  3.29it/s]


Epoch 5 | Loss: 0.9772 | Acc: 0.9725 | F1: 0.9709
 Saved model to: /content/drive/MyDrive/AML-PROJECT/pretrained_cnn_Ly/Resnet18_Degrade/resnet18__LabelSmoothingLoss__Adam__20250705-163531.pth

Running: LabelSmoothingLoss + SGD


Epoch 1/5: 100%|██████████| 50/50 [00:15<00:00,  3.30it/s]


Epoch 1 | Loss: 5.4834 | Acc: 0.0000 | F1: 0.0000


Epoch 2/5: 100%|██████████| 50/50 [00:15<00:00,  3.26it/s]


Epoch 2 | Loss: 5.3895 | Acc: 0.0025 | F1: 0.0002


Epoch 3/5: 100%|██████████| 50/50 [00:15<00:00,  3.30it/s]


Epoch 3 | Loss: 5.3116 | Acc: 0.0050 | F1: 0.0013


Epoch 4/5: 100%|██████████| 50/50 [00:15<00:00,  3.29it/s]


Epoch 4 | Loss: 5.2320 | Acc: 0.0100 | F1: 0.0019


Epoch 5/5: 100%|██████████| 50/50 [00:15<00:00,  3.28it/s]


Epoch 5 | Loss: 5.1661 | Acc: 0.0100 | F1: 0.0040
 Saved model to: /content/drive/MyDrive/AML-PROJECT/pretrained_cnn_Ly/Resnet18_Degrade/resnet18__LabelSmoothingLoss__SGD__20250705-163709.pth

Running: FocalLoss + Adam


Epoch 1/5: 100%|██████████| 50/50 [00:15<00:00,  3.23it/s]


Epoch 1 | Loss: 4.6155 | Acc: 0.3350 | F1: 0.2692


Epoch 2/5: 100%|██████████| 50/50 [00:15<00:00,  3.27it/s]


Epoch 2 | Loss: 1.2349 | Acc: 0.8025 | F1: 0.7760


Epoch 3/5: 100%|██████████| 50/50 [00:15<00:00,  3.29it/s]


Epoch 3 | Loss: 0.1600 | Acc: 0.9375 | F1: 0.9330


Epoch 4/5: 100%|██████████| 50/50 [00:15<00:00,  3.29it/s]


Epoch 4 | Loss: 0.0359 | Acc: 0.9625 | F1: 0.9570


Epoch 5/5: 100%|██████████| 50/50 [00:15<00:00,  3.28it/s]


Epoch 5 | Loss: 0.0250 | Acc: 0.9125 | F1: 0.9035
 Saved model to: /content/drive/MyDrive/AML-PROJECT/pretrained_cnn_Ly/Resnet18_Degrade/resnet18__FocalLoss__Adam__20250705-163847.pth

Running: FocalLoss + SGD


Epoch 1/5: 100%|██████████| 50/50 [00:15<00:00,  3.24it/s]


Epoch 1 | Loss: 5.3939 | Acc: 0.0000 | F1: 0.0000


Epoch 2/5: 100%|██████████| 50/50 [00:15<00:00,  3.27it/s]


Epoch 2 | Loss: 5.2783 | Acc: 0.0000 | F1: 0.0000


Epoch 3/5: 100%|██████████| 50/50 [00:15<00:00,  3.28it/s]


Epoch 3 | Loss: 5.1778 | Acc: 0.0025 | F1: 0.0013


Epoch 4/5: 100%|██████████| 50/50 [00:15<00:00,  3.30it/s]


Epoch 4 | Loss: 5.0735 | Acc: 0.0025 | F1: 0.0007


Epoch 5/5: 100%|██████████| 50/50 [00:15<00:00,  3.28it/s]


Epoch 5 | Loss: 4.9784 | Acc: 0.0175 | F1: 0.0131
 Saved model to: /content/drive/MyDrive/AML-PROJECT/pretrained_cnn_Ly/Resnet18_Degrade/resnet18__FocalLoss__SGD__20250705-164026.pth


In [7]:
# 11. Summary Table
df_results = pd.DataFrame(results)
df_results = df_results.sort_values(by="accuracy", ascending=False)

print("\n=== SUMMARY RESULTS ===")
print(df_results)


=== SUMMARY RESULTS ===
              loss_fn optimizer  accuracy        f1  precision  recall  \
0    CrossEntropyLoss      Adam    0.9950  0.994762   0.996458  0.9950   
2  LabelSmoothingLoss      Adam    0.9725  0.970857   0.980542  0.9725   
4           FocalLoss      Adam    0.9125  0.903534   0.918492  0.9125   
1    CrossEntropyLoss       SGD    0.0225  0.016198   0.016089  0.0225   
5           FocalLoss       SGD    0.0175  0.013088   0.016722  0.0175   
3  LabelSmoothingLoss       SGD    0.0100  0.003964   0.002502  0.0100   

                    timestamp  
0  2025-07-05T16:32:14.571924  
2  2025-07-05T16:35:31.108715  
4  2025-07-05T16:38:47.677001  
1  2025-07-05T16:33:52.877393  
5  2025-07-05T16:40:26.150366  
3  2025-07-05T16:37:09.179707  


### Resnet18 on SVM  & logistic on degrade

In [8]:
# 1. Setup
import os
import torch
import torch.nn as nn
import numpy as np
from tqdm import tqdm
from torchvision import models
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import joblib

# Setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Output directory to save models
OUTDIR = "/content/drive/MyDrive/AML-PROJECT/pretrained_cnn_Ly/Resnet18_Degrade"
os.makedirs(OUTDIR, exist_ok=True)

# 2. Load Pretrained ResNet18
resnet = models.resnet18(pretrained=True)
resnet.fc = nn.Identity()  # Remove classification head
resnet = resnet.to(device)
resnet.eval()

# 3. Feature Extraction
def extract_features(loader):
    features, labels = [], []
    with torch.no_grad():
        for imgs, lbls in tqdm(loader, desc="Extracting Features"):
            imgs = imgs.to(device)
            feats = resnet(imgs)  # Output shape: [B, 512]
            features.append(feats.cpu().numpy())
            labels.append(lbls.numpy())
    return np.vstack(features), np.hstack(labels)

# Extract features (assumes train_loader & val_loader already defined)
X_train, y_train = extract_features(train_loader)
X_val, y_val = extract_features(val_loader)

Extracting Features: 100%|██████████| 50/50 [00:14<00:00,  3.43it/s]
Extracting Features: 100%|██████████| 13/13 [00:03<00:00,  3.53it/s]


In [9]:
# 4. Standardize Features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)


# 5. Define Classifiers
models = {
    'Logistic_L2': LogisticRegression(penalty='l2', solver='lbfgs', max_iter=1000),
    'SVM_RBF': SVC(kernel='rbf', probability=True),
}

# 6. Train, Save, Evaluate
results = []

for name, clf in models.items():
    print(f"\n Training {name} ...")
    clf.fit(X_train, y_train)

    # Save trained model
    model_path = os.path.join(OUTDIR, f"{name}.joblib")
    joblib.dump(clf, model_path)

    # Predict
    y_pred = clf.predict(X_val)

    # Evaluate
    results.append({
        "variant": name,
        "accuracy": accuracy_score(y_val, y_pred),
        "f1": f1_score(y_val, y_pred, average="weighted", zero_division=0),
        "precision": precision_score(y_val, y_pred, average="weighted", zero_division=0),
        "recall": recall_score(y_val, y_pred, average="weighted", zero_division=0)
    })

# 7. Summary Table
print("\n=== SUMMARY RESULTS (ResNet18 Features + ML Classifiers) ===")
print(f"{'variant':20s} {'accuracy':>8s} {'f1':>8s} {'precision':>10s} {'recall':>8s}")
for r in results:
    print(f"{r['variant']:20s} {r['accuracy']:8.4f} {r['f1']:8.4f} {r['precision']:10.4f} {r['recall']:8.4f}")


 Training Logistic_L2 ...

 Training SVM_RBF ...

=== SUMMARY RESULTS (ResNet18 Features + ML Classifiers) ===
variant              accuracy       f1  precision   recall
Logistic_L2            0.9100   0.9051     0.9327   0.9100
SVM_RBF                0.8275   0.8223     0.8579   0.8275
