In [1]:
# CNN Degarded
# Mount Drive
from google.colab import drive
drive.mount('/content/drive')

# Imports
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import GradientBoostingClassifier
import numpy as np
from tqdm import tqdm

# Load & Prepare Data
df = pd.read_csv('/content/drive/MyDrive/AML-PROJECT/iris_degrade.csv')
df['ID'] = df['Label'].str.extract(r'(\d+)', expand=False)
valid_ids = df['ID'].value_counts()[lambda x: x >= 10].head(100).index
df = df[df['ID'].isin(valid_ids)]
df = df[df['Label'].notna() & df['Path'].notna()]
df['Path'] = df['Path'].apply(lambda x: x if isinstance(x, str) and os.path.exists(x) else None)
df = df[df['Path'].notna()]
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['ID'], random_state=42)
le = LabelEncoder()
train_df['encoded_label'] = le.fit_transform(train_df['Label'])
val_df['encoded_label'] = le.transform(val_df['Label'])

#Dataset Class
class IrisDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        img = Image.open(self.df.loc[idx, 'Path']).convert('RGB')
        if self.transform: img = self.transform(img)
        label = self.df.loc[idx, 'encoded_label']
        return img, label

#Transforms & DataLoaders
basic_tf = transforms.Compose([transforms.Resize((64, 64)), transforms.ToTensor()])
aug_tf = transforms.Compose([
    transforms.Resize((64, 64)), transforms.RandomHorizontalFlip(), transforms.ColorJitter(),
    transforms.RandomRotation(15), transforms.ToTensor()
])
train_loader = DataLoader(IrisDataset(train_df, basic_tf), batch_size=32, shuffle=True)
val_loader = DataLoader(IrisDataset(val_df, basic_tf), batch_size=32)

#Custom CNNs
class CNNPlain(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, 5, padding=2), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(32 * 32 * 32, 128), nn.ReLU(),
            nn.Linear(128, num_classes)
        )
    def forward(self, x): return self.model(x)

class CNN3(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.fc = nn.Linear(128 * 8 * 8, num_classes)
    def forward(self, x): return self.fc(self.conv(x).view(x.size(0), -1))

class ResidualBlock(nn.Module):
    def __init__(self, ch):
        super().__init__()
        self.block = nn.Sequential(
            nn.Conv2d(ch, ch, 3, padding=1), nn.ReLU(),
            nn.Conv2d(ch, ch, 3, padding=1), nn.ReLU()
        )
    def forward(self, x): return x + self.block(x)

class CNN9(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.start = nn.Sequential(nn.Conv2d(3, 64, 5, padding=2), nn.ReLU(), nn.MaxPool2d(2))
        self.res = nn.Sequential(
            ResidualBlock(64), ResidualBlock(64),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(),
            ResidualBlock(128), ResidualBlock(128)
        )
        self.fc = nn.Sequential(nn.AdaptiveAvgPool2d((1,1)), nn.Flatten(),
                                nn.Linear(128, 1024), nn.ReLU(), nn.Linear(1024, num_classes))
    def forward(self, x): return self.fc(self.res(self.start(x)))

# Training Function
def train_eval(model, train_loader=train_loader, val_loader=val_loader, name="Model", epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    loss_fn = nn.CrossEntropyLoss()

    for ep in range(epochs):
        model.train()
        for imgs, labels in tqdm(train_loader, desc=f"{name} Epoch {ep+1}/{epochs}"):
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            loss = loss_fn(model(imgs), labels)
            loss.backward()
            optimizer.step()

    # Evaluation
    model.eval()
    preds, truths = [], []
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs = imgs.to(device)
            out = model(imgs)
            preds.extend(torch.argmax(out, dim=1).cpu().numpy())
            truths.extend(labels.numpy())

    acc = accuracy_score(truths, preds) * 100
    prec = precision_score(truths, preds, average='macro') * 100
    rec = recall_score(truths, preds, average='macro') * 100
    f1 = f1_score(truths, preds, average='macro') * 100
    return acc, prec, rec, f1

#  Run All 8 Models
results = []

# Custom models
custom_models = [
    ("CNN-Plain-Iris", CNNPlain(len(le.classes_))),
    ("CNN-3Layer", CNN3(len(le.classes_))),
    ("CNN-9Layer", CNN9(len(le.classes_)))
]
for name, model in custom_models:
    acc, prec, rec, f1 = train_eval(model, name=name)
    results.append([name, acc, prec, rec, f1])

# Pretrained models
pretrained_cfgs = [
    ("AlexNet", models.alexnet(pretrained=True)),
    ("VGG16", models.vgg16(pretrained=True)),
    ("ResNet18", models.resnet18(pretrained=True)),
    ("ResNet18-Aug", models.resnet18(pretrained=True))
]
for name, model in pretrained_cfgs:
    for p in model.parameters(): p.requires_grad = False
    if 'resnet' in name.lower():
        model.fc = nn.Linear(model.fc.in_features, len(le.classes_))
    else:
        model.classifier[6] = nn.Linear(model.classifier[6].in_features, len(le.classes_))
    loader = DataLoader(IrisDataset(train_df, aug_tf if "Aug" in name else basic_tf), batch_size=32, shuffle=True)
    acc, prec, rec, f1 = train_eval(model, train_loader=loader, name=name)
    results.append([name, acc, prec, rec, f1])

# XGBoost-style on ResNet features
resnet = models.resnet18(pretrained=True)
resnet.fc = nn.Identity()
for p in resnet.parameters(): p.requires_grad = False
fe_train, y_train = [], []
for xb, yb in DataLoader(IrisDataset(train_df, basic_tf), batch_size=32):
    with torch.no_grad(): fe_train.extend(resnet(xb).numpy()); y_train.extend(yb.numpy())
fe_val, y_val = [], []
for xb, yb in val_loader:
    with torch.no_grad(): fe_val.extend(resnet(xb).numpy()); y_val.extend(yb.numpy())
xgb = GradientBoostingClassifier().fit(fe_train, y_train)
xgb_preds = xgb.predict(fe_val)
acc = accuracy_score(y_val, xgb_preds) * 100
prec = precision_score(y_val, xgb_preds, average='macro') * 100
rec = recall_score(y_val, xgb_preds, average='macro') * 100
f1 = f1_score(y_val, xgb_preds, average='macro') * 100
results.append(["XGBoost-style", acc, prec, rec, f1])

# Display Final Sorted Results
df_results = pd.DataFrame(results, columns=["Model", "Accuracy (%)", "Precision (%)", "Recall (%)", "F1 Score (%)"])
df_results = df_results.sort_values(by="Accuracy (%)", ascending=False).reset_index(drop=True)
print(df_results)

# Save models and results as Pickle in Google Drive
import pickle

# Save final results DataFrame
results_path = '/content/drive/MyDrive/AML-PROJECT/final_results.pkl'
with open(results_path, 'wb') as f:
    pickle.dump(df_results, f)

# Save all models in a dictionary
saved_models = {}

# Save state_dict of custom models
for name, model in custom_models:
    saved_models[name] = model.cpu().state_dict()

# Save state_dict of pretrained models
for name, model in pretrained_cfgs:
    saved_models[name] = model.cpu().state_dict()

# Save full XGBoost model directly
saved_models['XGBoost-style'] = xgb

# Save the combined model dictionary
model_path = '/content/drive/MyDrive/AML-PROJECT/all_models.pkl'
with open(model_path, 'wb') as f:
    pickle.dump(saved_models, f)

print(f"Pickle files saved:\n→ Models: {model_path}\n→ Results: {results_path}")


Mounted at /content/drive


CNN-Plain-Iris Epoch 1/10: 100%|██████████| 50/50 [25:07<00:00, 30.14s/it]
CNN-Plain-Iris Epoch 2/10: 100%|██████████| 50/50 [00:12<00:00,  4.06it/s]
CNN-Plain-Iris Epoch 3/10: 100%|██████████| 50/50 [00:12<00:00,  4.09it/s]
CNN-Plain-Iris Epoch 4/10: 100%|██████████| 50/50 [00:12<00:00,  4.11it/s]
CNN-Plain-Iris Epoch 5/10: 100%|██████████| 50/50 [00:12<00:00,  4.09it/s]
CNN-Plain-Iris Epoch 6/10: 100%|██████████| 50/50 [00:12<00:00,  4.15it/s]
CNN-Plain-Iris Epoch 7/10: 100%|██████████| 50/50 [00:12<00:00,  4.13it/s]
CNN-Plain-Iris Epoch 8/10: 100%|██████████| 50/50 [00:12<00:00,  4.15it/s]
CNN-Plain-Iris Epoch 9/10: 100%|██████████| 50/50 [00:12<00:00,  4.13it/s]
CNN-Plain-Iris Epoch 10/10: 100%|██████████| 50/50 [00:12<00:00,  4.12it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
CNN-3Layer Epoch 1/10: 100%|██████████| 50/50 [00:12<00:00,  4.06it/s]
CNN-3Layer Epoch 2/10: 100%|████

            Model  Accuracy (%)  Precision (%)  Recall (%)  F1 Score (%)
0        ResNet18         65.75      65.795262   67.766497     63.045685
1      CNN-3Layer         56.75      56.625982   57.281145     52.922989
2           VGG16         52.00      52.275407   52.848639     48.148106
3         AlexNet         40.25      36.158952   39.424704     34.105193
4    ResNet18-Aug         16.50      13.471056   15.993266     12.401119
5  CNN-Plain-Iris          9.00       6.494805    9.090909      6.005516
6   XGBoost-style          7.75       7.451145    8.458961      6.955013
7      CNN-9Layer          1.00       0.266272    1.282051      0.371000
Pickle files saved:
→ Models: /content/drive/MyDrive/AML-PROJECT/all_models.pkl
→ Results: /content/drive/MyDrive/AML-PROJECT/final_results.pkl


In [9]:
# CNN Noise
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Imports
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import GradientBoostingClassifier
import numpy as np
from tqdm import tqdm

#  Load CSV and construct image paths
csv_path = '/content/drive/MyDrive/AML-PROJECT/623final_all.csv'
image_dir = '/content/drive/MyDrive/AML-PROJECT/623final_all'
df = pd.read_csv(csv_path)
df['Path'] = df['image_path'].apply(lambda x: os.path.join(image_dir, x))
df = df[df['label'].notna() & df['Path'].apply(os.path.exists)]
label_counts = df['label'].value_counts()
valid_labels = label_counts[label_counts >= 2].index
df = df[df['label'].isin(valid_labels)]
le = LabelEncoder()
df['encoded_label'] = le.fit_transform(df['label'])

# Train/Validation Split
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['encoded_label'], random_state=42)

# Dataset Class
class IrisDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        img = Image.open(self.df.loc[idx, 'Path']).convert('RGB')
        if self.transform: img = self.transform(img)
        label = self.df.loc[idx, 'encoded_label']
        return img, label

# Transforms and Loaders
basic_tf = transforms.Compose([transforms.Resize((64, 64)), transforms.ToTensor()])
aug_tf = transforms.Compose([
    transforms.Resize((64, 64)), transforms.RandomHorizontalFlip(), transforms.ColorJitter(),
    transforms.RandomRotation(15), transforms.ToTensor()
])
train_loader = DataLoader(IrisDataset(train_df, basic_tf), batch_size=32, shuffle=True)
val_loader = DataLoader(IrisDataset(val_df, basic_tf), batch_size=32)

# Custom CNN Models
class CNNPlain(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, 5, padding=2), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.Linear(32 * 32 * 32, 128), nn.ReLU(),
            nn.Linear(128, num_classes)
        )
    def forward(self, x): return self.model(x)

class CNN3(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.fc = nn.Linear(128 * 8 * 8, num_classes)
    def forward(self, x): return self.fc(self.conv(x).view(x.size(0), -1))

class ResidualBlock(nn.Module):
    def __init__(self, ch):
        super().__init__()
        self.block = nn.Sequential(
            nn.Conv2d(ch, ch, 3, padding=1), nn.ReLU(),
            nn.Conv2d(ch, ch, 3, padding=1), nn.ReLU()
        )
    def forward(self, x): return x + self.block(x)

class CNN9(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.start = nn.Sequential(nn.Conv2d(3, 64, 5, padding=2), nn.ReLU(), nn.MaxPool2d(2))
        self.res = nn.Sequential(
            ResidualBlock(64), ResidualBlock(64),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(),
            ResidualBlock(128), ResidualBlock(128)
        )
        self.fc = nn.Sequential(nn.AdaptiveAvgPool2d((1,1)), nn.Flatten(),
                                nn.Linear(128, 1024), nn.ReLU(), nn.Linear(1024, num_classes))
    def forward(self, x): return self.fc(self.res(self.start(x)))

#  Train and Evaluate Function
def train_eval(model, train_loader, val_loader, name="Model", epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    loss_fn = nn.CrossEntropyLoss()

    for ep in range(epochs):
        model.train()
        for imgs, labels in tqdm(train_loader, desc=f"{name} Epoch {ep+1}/{epochs}"):
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            loss = loss_fn(model(imgs), labels)
            loss.backward()
            optimizer.step()

    # Evaluation
    model.eval()
    preds, truths = [], []
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs = imgs.to(device)
            out = model(imgs)
            preds.extend(torch.argmax(out, dim=1).cpu().numpy())
            truths.extend(labels.numpy())

    acc = accuracy_score(truths, preds) * 100
    prec = precision_score(truths, preds, average='macro') * 100
    rec = recall_score(truths, preds, average='macro') * 100
    f1 = f1_score(truths, preds, average='macro') * 100
    return acc, prec, rec, f1

# Run All Models
results = []
num_classes = len(le.classes_)

# Custom models
custom_models = [
    ("CNN-Plain-Iris", CNNPlain(num_classes)),
    ("CNN-3Layer", CNN3(num_classes)),
    ("CNN-9Layer", CNN9(num_classes))
]
for name, model in custom_models:
    acc, prec, rec, f1 = train_eval(model, train_loader, val_loader, name)
    results.append([name, acc, prec, rec, f1])

# Pretrained models
pretrained_cfgs = [
    ("AlexNet", models.alexnet(pretrained=True)),
    ("VGG16", models.vgg16(pretrained=True)),
    ("ResNet18", models.resnet18(pretrained=True)),
    ("ResNet18-Aug", models.resnet18(pretrained=True))
]
for name, model in pretrained_cfgs:
    for p in model.parameters(): p.requires_grad = False
    if 'resnet' in name.lower():
        model.fc = nn.Linear(model.fc.in_features, num_classes)
    else:
        model.classifier[6] = nn.Linear(model.classifier[6].in_features, num_classes)
    tf = aug_tf if "Aug" in name else basic_tf
    loader = DataLoader(IrisDataset(train_df, tf), batch_size=32, shuffle=True)
    acc, prec, rec, f1 = train_eval(model, loader, val_loader, name)
    results.append([name, acc, prec, rec, f1])

# XGBoost-style
resnet = models.resnet18(pretrained=True)
resnet.fc = nn.Identity()
for p in resnet.parameters(): p.requires_grad = False
fe_train, y_train = [], []
for xb, yb in DataLoader(IrisDataset(train_df, basic_tf), batch_size=32):
    with torch.no_grad(): fe_train.extend(resnet(xb).numpy()); y_train.extend(yb.numpy())
fe_val, y_val = [], []
for xb, yb in val_loader:
    with torch.no_grad(): fe_val.extend(resnet(xb).numpy()); y_val.extend(yb.numpy())
xgb = GradientBoostingClassifier().fit(fe_train, y_train)
xgb_preds = xgb.predict(fe_val)
acc = accuracy_score(y_val, xgb_preds) * 100
prec = precision_score(y_val, xgb_preds, average='macro') * 100
rec = recall_score(y_val, xgb_preds, average='macro') * 100
f1 = f1_score(y_val, xgb_preds, average='macro') * 100
results.append(["XGBoost-style", acc, prec, rec, f1])

# Final Results
df_results = pd.DataFrame(results, columns=["Model", "Accuracy (%)", "Precision (%)", "Recall (%)", "F1 Score (%)"])
df_results = df_results.sort_values(by="Accuracy (%)", ascending=False).reset_index(drop=True)
print(df_results)

# Save Pickle Files (Named for Noise Dataset)
import pickle

# Save results DataFrame
results_path = '/content/drive/MyDrive/AML-PROJECT/noise_final_results.pkl'
with open(results_path, 'wb') as f:
    pickle.dump(df_results, f)

# Save all models in a dictionary
saved_models = {}

# Save state_dict of custom models
for name, model in custom_models:
    saved_models[name] = model.cpu().state_dict()

# Save state_dict of pretrained models
for name, model in pretrained_cfgs:
    saved_models[name] = model.cpu().state_dict()

# Save full XGBoost model directly
saved_models['XGBoost-style'] = xgb

# Save the combined model dictionary
model_path = '/content/drive/MyDrive/AML-PROJECT/noise_final_models.pkl'
with open(model_path, 'wb') as f:
    pickle.dump(saved_models, f)

print(f"\n Pickle files saved for noise dataset:\n→ Models: {model_path}\n→ Results: {results_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


CNN-Plain-Iris Epoch 1/10: 100%|██████████| 50/50 [19:31<00:00, 23.43s/it]
CNN-Plain-Iris Epoch 2/10: 100%|██████████| 50/50 [00:04<00:00, 11.68it/s]
CNN-Plain-Iris Epoch 3/10: 100%|██████████| 50/50 [00:04<00:00, 12.30it/s]
CNN-Plain-Iris Epoch 4/10: 100%|██████████| 50/50 [00:04<00:00, 11.85it/s]
CNN-Plain-Iris Epoch 5/10: 100%|██████████| 50/50 [00:03<00:00, 12.50it/s]
CNN-Plain-Iris Epoch 6/10: 100%|██████████| 50/50 [00:03<00:00, 12.54it/s]
CNN-Plain-Iris Epoch 7/10: 100%|██████████| 50/50 [00:04<00:00, 11.77it/s]
CNN-Plain-Iris Epoch 8/10: 100%|██████████| 50/50 [00:04<00:00, 11.99it/s]
CNN-Plain-Iris Epoch 9/10: 100%|██████████| 50/50 [00:04<00:00, 12.50it/s]
CNN-Plain-Iris Epoch 10/10: 100%|██████████| 50/50 [00:04<00:00, 11.88it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
CNN-3Layer Epoch 1/10: 100%|██████████| 50/50 [00:04<00:00, 11.17it/s]
CNN-3Layer Epoch 2/10: 100%|██████████| 50/50 [00:04<00:00, 11.49it/s]
CNN-3Layer Epoch 3/10: 100%|██████

            Model  Accuracy (%)  Precision (%)  Recall (%)  F1 Score (%)
0           VGG16         24.50      24.255357       24.50     22.198413
1        ResNet18         21.00      20.159524       21.00     19.071032
2         AlexNet         16.75      14.015807       16.75     13.586952
3    ResNet18-Aug          9.50       6.172619        9.50      6.883333
4  CNN-Plain-Iris          3.00       0.380579        3.00      0.591425
5   XGBoost-style          1.25       1.475000        1.25      1.226190
6      CNN-9Layer          0.50       0.002538        0.50      0.005051
7      CNN-3Layer          0.50       0.002500        0.50      0.004975

 Pickle files saved for noise dataset:
→ Models: /content/drive/MyDrive/AML-PROJECT/noise_final_models.pkl
→ Results: /content/drive/MyDrive/AML-PROJECT/noise_final_results.pkl
