In [None]:
# prompt: mount drive

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Pre-train CNN -Resnet18 on Degrade Dataset

In [None]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import torch.optim as optim
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from datetime import datetime
import os

In [None]:
# 1. Load CSV
df = pd.read_csv('/content/drive/MyDrive/AML-PROJECT/iris_degrade.csv')

# 2. Extract ID
df['ID'] = df['Label'].str.extract(r'(\d+)', expand=False)

# Count the number of occurrences of each ID (i.e. the number of samples per person)
id_counts = df['ID'].value_counts()

# Select the IDs with occurrences ≥ 10, and take the first 100
valid_ids = id_counts[id_counts >= 10].head(100).index

# Filter data for these IDs
df = df[df['ID'].isin(valid_ids)]

# Optional: Save filtered train/val
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['ID'], random_state=42)

# 3. Encode labels
le = LabelEncoder()
train_df['encoded_label'] = le.fit_transform(train_df['Label'])
val_df['encoded_label'] = le.transform(val_df['Label'])

# 4. Dataset class
class IrisDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.iloc[idx]['Path']
        label = self.df.iloc[idx]['encoded_label']
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

# 5.Define transformations and loaders
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

train_dataset = IrisDataset(train_df, transform=transform)
val_dataset = IrisDataset(val_df, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


# 6. Load ResNet18 and modify final layer
model = models.resnet18(pretrained=True)
num_classes = len(le.classes_)
model.fc = nn.Linear(model.fc.in_features, num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)


# 7.Loss function
# Define Focal Loss
class FocalLoss(nn.Module):
    def __init__(self, gamma=2.0):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.ce = nn.CrossEntropyLoss(reduction='none')

    def forward(self, input, target):
        ce_loss = self.ce(input, target)
        pt = torch.exp(-ce_loss)
        focal_loss = ((1 - pt) ** self.gamma) * ce_loss
        return focal_loss.mean()

# Define Label Smoothing Loss
class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes, smoothing=0.1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=-1)
        with torch.no_grad():
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=-1))

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 163MB/s]


In [None]:
# 8. Evaluation Function
def evaluate(model, loader, device):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            outputs = model(x)
            preds = outputs.argmax(dim=1)
            y_true.extend(y.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
    return {
        'accuracy': accuracy_score(y_true, y_pred),
        'f1': f1_score(y_true, y_pred, average='weighted',zero_division=0),
        'precision': precision_score(y_true, y_pred, average='weighted',zero_division=0),
        'recall': recall_score(y_true, y_pred, average='weighted',zero_division=0)
    }

#9. Training Function
def train_resnet18(loss_fn, optimizer_cls, epochs=5):
    model = models.resnet18(pretrained=True)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    model = model.to(device)

    optimizer = optimizer_cls(model.parameters(), lr=1e-3)

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for x, y in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(x)
            loss = loss_fn(outputs, y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        metrics = evaluate(model, val_loader, device)
        print(f"Epoch {epoch+1} | Loss: {total_loss/len(train_loader):.4f} | "
              f"Acc: {metrics['accuracy']:.4f} | F1: {metrics['f1']:.4f}")

    final_metrics = evaluate(model, val_loader, device)


    timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
    model_name = f"resnet18__{loss_fn.__class__.__name__}__{optimizer_cls.__name__}__{timestamp}.pth"

    save_dir = "/content/drive/MyDrive/AML-PROJECT/pretrained_cnn_Ly/Resnet18_Degrade"
    os.makedirs(save_dir, exist_ok=True)
    model_path = os.path.join(save_dir, model_name)

    torch.save(model.state_dict(), model_path)
    print(f" Saved model to: {model_path}")


    return {
        'loss_fn': loss_fn.__class__.__name__,
        'optimizer': optimizer_cls.__name__ if hasattr(optimizer_cls, '__name__') else str(optimizer_cls),
        'accuracy': final_metrics['accuracy'],
        'f1': final_metrics['f1'],
        'precision': final_metrics['precision'],
        'recall': final_metrics['recall'],
        'timestamp': datetime.now().isoformat()
    }

In [None]:
#10. Run Experiments
loss_fns = [
    nn.CrossEntropyLoss(),
    LabelSmoothingLoss(classes=num_classes),
    FocalLoss(gamma=2)
]

optimizers = [optim.Adam, optim.SGD]

results = []

for loss_fn in loss_fns:
    for opt in optimizers:
        try:
            print(f"\nRunning: {loss_fn.__class__.__name__} + {opt.__name__}")
            result = train_resnet18(loss_fn, opt, epochs=5)
            results.append(result)
        except Exception as e:
            print(f"[ERROR] Skipped: {loss_fn}, {opt} — {e}")


Running: CrossEntropyLoss + Adam


Epoch 1/5: 100%|██████████| 50/50 [08:54<00:00, 10.69s/it]


Epoch 1 | Loss: 4.6320 | Acc: 0.2700 | F1: 0.2256


Epoch 2/5: 100%|██████████| 50/50 [00:15<00:00,  3.18it/s]


Epoch 2 | Loss: 1.6057 | Acc: 0.7150 | F1: 0.6795


Epoch 3/5: 100%|██████████| 50/50 [00:15<00:00,  3.19it/s]


Epoch 3 | Loss: 0.3421 | Acc: 0.9475 | F1: 0.9404


Epoch 4/5: 100%|██████████| 50/50 [00:15<00:00,  3.20it/s]


Epoch 4 | Loss: 0.0637 | Acc: 0.9900 | F1: 0.9879


Epoch 5/5: 100%|██████████| 50/50 [00:15<00:00,  3.18it/s]


Epoch 5 | Loss: 0.0144 | Acc: 0.9950 | F1: 0.9950
✅ Saved model to: /content/drive/MyDrive/AML-PROJECT/pretrained_cnn_Ly/model1/resnet18__CrossEntropyLoss__Adam__20250704-190140.pth

Running: CrossEntropyLoss + SGD


Epoch 1/5: 100%|██████████| 50/50 [00:15<00:00,  3.18it/s]


Epoch 1 | Loss: 5.4508 | Acc: 0.0075 | F1: 0.0021


Epoch 2/5: 100%|██████████| 50/50 [00:16<00:00,  3.10it/s]


Epoch 2 | Loss: 5.3400 | Acc: 0.0000 | F1: 0.0000


Epoch 3/5: 100%|██████████| 50/50 [00:15<00:00,  3.17it/s]


Epoch 3 | Loss: 5.2382 | Acc: 0.0100 | F1: 0.0062


Epoch 4/5: 100%|██████████| 50/50 [00:15<00:00,  3.18it/s]


Epoch 4 | Loss: 5.1464 | Acc: 0.0100 | F1: 0.0054


Epoch 5/5: 100%|██████████| 50/50 [00:15<00:00,  3.15it/s]


Epoch 5 | Loss: 5.0523 | Acc: 0.0100 | F1: 0.0072




✅ Saved model to: /content/drive/MyDrive/AML-PROJECT/pretrained_cnn_Ly/model1/resnet18__CrossEntropyLoss__SGD__20250704-190322.pth

Running: LabelSmoothingLoss + Adam


Epoch 1/5: 100%|██████████| 50/50 [00:15<00:00,  3.16it/s]


Epoch 1 | Loss: 4.7535 | Acc: 0.3275 | F1: 0.2837


Epoch 2/5: 100%|██████████| 50/50 [00:16<00:00,  3.12it/s]


Epoch 2 | Loss: 2.1114 | Acc: 0.7525 | F1: 0.7327


Epoch 3/5: 100%|██████████| 50/50 [00:15<00:00,  3.17it/s]


Epoch 3 | Loss: 1.1609 | Acc: 0.9625 | F1: 0.9602


Epoch 4/5: 100%|██████████| 50/50 [00:15<00:00,  3.18it/s]


Epoch 4 | Loss: 1.0304 | Acc: 0.9700 | F1: 0.9669


Epoch 5/5: 100%|██████████| 50/50 [00:15<00:00,  3.20it/s]


Epoch 5 | Loss: 0.9941 | Acc: 0.9775 | F1: 0.9779




✅ Saved model to: /content/drive/MyDrive/AML-PROJECT/pretrained_cnn_Ly/model1/resnet18__LabelSmoothingLoss__Adam__20250704-190504.pth

Running: LabelSmoothingLoss + SGD


Epoch 1/5: 100%|██████████| 50/50 [00:15<00:00,  3.21it/s]


Epoch 1 | Loss: 5.4457 | Acc: 0.0050 | F1: 0.0029


Epoch 2/5: 100%|██████████| 50/50 [00:15<00:00,  3.16it/s]


Epoch 2 | Loss: 5.3817 | Acc: 0.0050 | F1: 0.0016


Epoch 3/5: 100%|██████████| 50/50 [00:15<00:00,  3.19it/s]


Epoch 3 | Loss: 5.3280 | Acc: 0.0125 | F1: 0.0053


Epoch 4/5: 100%|██████████| 50/50 [00:15<00:00,  3.18it/s]


Epoch 4 | Loss: 5.2415 | Acc: 0.0125 | F1: 0.0062


Epoch 5/5: 100%|██████████| 50/50 [00:15<00:00,  3.15it/s]


Epoch 5 | Loss: 5.1671 | Acc: 0.0150 | F1: 0.0117




✅ Saved model to: /content/drive/MyDrive/AML-PROJECT/pretrained_cnn_Ly/model1/resnet18__LabelSmoothingLoss__SGD__20250704-190646.pth

Running: FocalLoss + Adam


Epoch 1/5: 100%|██████████| 50/50 [00:15<00:00,  3.14it/s]


Epoch 1 | Loss: 4.3156 | Acc: 0.5050 | F1: 0.4373


Epoch 2/5: 100%|██████████| 50/50 [00:15<00:00,  3.18it/s]


Epoch 2 | Loss: 0.8315 | Acc: 0.7875 | F1: 0.7830


Epoch 3/5: 100%|██████████| 50/50 [00:15<00:00,  3.22it/s]


Epoch 3 | Loss: 0.0886 | Acc: 0.9475 | F1: 0.9426


Epoch 4/5: 100%|██████████| 50/50 [00:15<00:00,  3.17it/s]


Epoch 4 | Loss: 0.0179 | Acc: 0.9875 | F1: 0.9873


Epoch 5/5: 100%|██████████| 50/50 [00:15<00:00,  3.17it/s]


Epoch 5 | Loss: 0.0039 | Acc: 0.9925 | F1: 0.9923




✅ Saved model to: /content/drive/MyDrive/AML-PROJECT/pretrained_cnn_Ly/model1/resnet18__FocalLoss__Adam__20250704-190827.pth

Running: FocalLoss + SGD


Epoch 1/5: 100%|██████████| 50/50 [00:15<00:00,  3.21it/s]


Epoch 1 | Loss: 5.3955 | Acc: 0.0025 | F1: 0.0001


Epoch 2/5: 100%|██████████| 50/50 [00:15<00:00,  3.20it/s]


Epoch 2 | Loss: 5.2762 | Acc: 0.0075 | F1: 0.0015


Epoch 3/5: 100%|██████████| 50/50 [00:15<00:00,  3.24it/s]


Epoch 3 | Loss: 5.1746 | Acc: 0.0050 | F1: 0.0014


Epoch 4/5: 100%|██████████| 50/50 [00:15<00:00,  3.21it/s]


Epoch 4 | Loss: 5.0751 | Acc: 0.0025 | F1: 0.0002


Epoch 5/5: 100%|██████████| 50/50 [00:15<00:00,  3.23it/s]


Epoch 5 | Loss: 4.9797 | Acc: 0.0075 | F1: 0.0040
✅ Saved model to: /content/drive/MyDrive/AML-PROJECT/pretrained_cnn_Ly/model1/resnet18__FocalLoss__SGD__20250704-191007.pth


In [None]:
# 11. Summary Table
df_results = pd.DataFrame(results)
df_results = df_results.sort_values(by="accuracy", ascending=False)

print("\n=== SUMMARY RESULTS ===")
print(df_results)


=== SUMMARY RESULTS ===
              loss_fn optimizer  accuracy        f1  precision  recall  \
0    CrossEntropyLoss      Adam    0.9950  0.995000   0.997083  0.9950   
4           FocalLoss      Adam    0.9925  0.992333   0.994792  0.9925   
2  LabelSmoothingLoss      Adam    0.9775  0.977929   0.984375  0.9775   
3  LabelSmoothingLoss       SGD    0.0150  0.011690   0.012792  0.0150   
1    CrossEntropyLoss       SGD    0.0100  0.007154   0.006375  0.0100   
5           FocalLoss       SGD    0.0075  0.003956   0.003399  0.0075   

                    timestamp  
0  2025-07-04T19:01:40.878237  
4  2025-07-04T19:08:27.682323  
2  2025-07-04T19:05:04.715614  
3  2025-07-04T19:06:46.216139  
1  2025-07-04T19:03:22.860660  
5  2025-07-04T19:10:07.814259  


### Resnet18 on SVM  & logistic on degrade

In [None]:
# 1. Setup
import os
import torch
import torch.nn as nn
import numpy as np
from tqdm import tqdm
from torchvision import models
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import joblib

# Setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Output directory to save models
OUTDIR = "/content/drive/MyDrive/AML-PROJECT/pretrained_cnn_Ly/model1/"
os.makedirs(OUTDIR, exist_ok=True)

# 2. Load Pretrained ResNet18
resnet = models.resnet18(pretrained=True)
resnet.fc = nn.Identity()  # Remove classification head
resnet = resnet.to(device)
resnet.eval()

# 3. Feature Extraction
def extract_features(loader):
    features, labels = [], []
    with torch.no_grad():
        for imgs, lbls in tqdm(loader, desc="Extracting Features"):
            imgs = imgs.to(device)
            feats = resnet(imgs)  # Output shape: [B, 512]
            features.append(feats.cpu().numpy())
            labels.append(lbls.numpy())
    return np.vstack(features), np.hstack(labels)

# Extract features (assumes train_loader & val_loader already defined)
X_train, y_train = extract_features(train_loader)
X_val, y_val = extract_features(val_loader)

Extracting Features: 100%|██████████| 50/50 [00:14<00:00,  3.41it/s]
Extracting Features: 100%|██████████| 13/13 [00:03<00:00,  3.58it/s]


In [None]:
# 4. Standardize Features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)


# 5. Define Classifiers
models = {
    'Logistic_L2': LogisticRegression(penalty='l2', solver='lbfgs', max_iter=1000),
    'SVM_Linear': SVC(kernel='linear', probability=True),
    'SVM_RBF': SVC(kernel='rbf', probability=True),
    'SVM_Poly': SVC(kernel='poly', degree=3, probability=True)
}

# 6. Train, Save, Evaluate
results = []

for name, clf in models.items():
    print(f"\n Training {name} ...")
    clf.fit(X_train, y_train)

    # Save trained model
    model_path = os.path.join(OUTDIR, f"{name}.joblib")
    joblib.dump(clf, model_path)

    # Predict
    y_pred = clf.predict(X_val)

    # Evaluate
    results.append({
        "variant": name,
        "accuracy": accuracy_score(y_val, y_pred),
        "f1": f1_score(y_val, y_pred, average="weighted", zero_division=0),
        "precision": precision_score(y_val, y_pred, average="weighted", zero_division=0),
        "recall": recall_score(y_val, y_pred, average="weighted", zero_division=0)
    })

# 7. Summary Table
print("\n=== SUMMARY RESULTS (ResNet18 Features + ML Classifiers) ===")
print(f"{'variant':20s} {'accuracy':>8s} {'f1':>8s} {'precision':>10s} {'recall':>8s}")
for r in results:
    print(f"{r['variant']:20s} {r['accuracy']:8.4f} {r['f1']:8.4f} {r['precision']:10.4f} {r['recall']:8.4f}")


 Training Logistic_L2 ...

 Training SVM_Linear ...

 Training SVM_RBF ...

 Training SVM_Poly ...

=== SUMMARY RESULTS (ResNet18 Features + ML Classifiers) ===
variant              accuracy       f1  precision   recall
Logistic_L2            0.9100   0.9051     0.9327   0.9100
SVM_Linear             0.8900   0.8878     0.9190   0.8900
SVM_RBF                0.8275   0.8223     0.8579   0.8275
SVM_Poly               0.3200   0.3725     0.4888   0.3200
