In [None]:
# =========================
# 4 Models Script
# ResNet18 / ResNet34 / DenseNet121 / ConvNeXt-Tiny (NO pretrained weights)
# + Oversampling + Best F1 model (argmax for selection)
# + Choose BEST threshold on VAL (maximize F1)
# + Produce 4 Kaggle submissions (one per model)
# BEST SUBMISSION was with LR = 5e-4 and 50 epochs, weight_decay=1e-4 val_pos/neg = int(0.15 * count)
# =========================

!pip -q install scikit-learn pandas tqdm matplotlib seaborn

import os, re, random, shutil, gc
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, WeightedRandomSampler, Subset
from torchvision import datasets, transforms, models
from sklearn.metrics import f1_score, precision_score, recall_score

import matplotlib.pyplot as plt
import seaborn as sns

# -------------------------
# Mount Drive (safe)
# -------------------------
from google.colab import drive
if not os.path.exists("/content/drive"):
    drive.mount("/content/drive")

# -------------------------
# Config
# -------------------------
SEED = 42
BATCH_SIZE = 64
EPOCHS = 50
LR = 2e-4
WEIGHT_DECAY = 1e-4
NUM_WORKERS = 2
IMG_SIZE = 224

VAL_FRAC_POS = 0.15
VAL_FRAC_NEG = 0.15

ROOT = "/content/drive/MyDrive/Colab Notebooks/Course/Skin_Cancer_Classification"

# expected structure:
# ROOT/train/0 , ROOT/train/1 , ROOT/test
TRAIN_ROOT = os.path.join(ROOT, "train")
TEST_DIR   = os.path.join(ROOT, "test")

MASTER_OUT_DIR = os.path.join(ROOT, "models_multi_backbones_scratch")
os.makedirs(MASTER_OUT_DIR, exist_ok=True)

# -------------------------
# Reproducibility
# -------------------------
def seed_everything(seed=SEED):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.benchmark = True

seed_everything()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# -------------------------
# Step 1: Ensure correct folder structure (move ROOT/0 and ROOT/1 into ROOT/train/0, ROOT/train/1 if needed)
# -------------------------
os.makedirs(TRAIN_ROOT, exist_ok=True)

for cls in ["0", "1"]:
    src_root = os.path.join(ROOT, cls)
    dst_train = os.path.join(TRAIN_ROOT, cls)

    if os.path.exists(dst_train):
        print(f"[OK] {dst_train} exists")
        continue

    if os.path.exists(src_root):
        print(f"[MOVE] {src_root}  -->  {dst_train}")
        shutil.move(src_root, dst_train)
    else:
        print(f"[WARN] Missing: {src_root} (and {dst_train} not found)")

assert os.path.isdir(os.path.join(TRAIN_ROOT, "0")), "train/0 not found"
assert os.path.isdir(os.path.join(TRAIN_ROOT, "1")), "train/1 not found"
assert os.path.isdir(TEST_DIR), "test folder not found"

# -------------------------
# Transforms
# -------------------------
train_tfms = transforms.Compose([
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.85, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(p=0.2),
    transforms.ColorJitter(brightness=0.15, contrast=0.15, saturation=0.10, hue=0.02),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

val_tfms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

# -------------------------
# Step 2: Dataset + stratified train/val split (SAME split shared by all models)
# -------------------------
full_ds_train = datasets.ImageFolder(root=TRAIN_ROOT, transform=train_tfms)
print("Classes:", full_ds_train.classes, "class_to_idx:", full_ds_train.class_to_idx)

targets = np.array([y for _, y in full_ds_train.samples])
idx_all = np.arange(len(full_ds_train))

pos_idx = idx_all[targets == 1]
neg_idx = idx_all[targets == 0]

rng = np.random.default_rng(SEED)
rng.shuffle(pos_idx)
rng.shuffle(neg_idx)

val_pos = int(VAL_FRAC_POS * len(pos_idx))
val_neg = int(VAL_FRAC_NEG * len(neg_idx))

val_idx = np.concatenate([pos_idx[:val_pos], neg_idx[:val_neg]])
train_idx = np.concatenate([pos_idx[val_pos:], neg_idx[val_neg:]])
rng.shuffle(train_idx)
rng.shuffle(val_idx)

train_ds = Subset(full_ds_train, train_idx)

full_ds_val = datasets.ImageFolder(root=TRAIN_ROOT, transform=val_tfms)
val_ds = Subset(full_ds_val, val_idx)

train_targets = targets[train_idx]
n0 = int((train_targets == 0).sum())
n1 = int((train_targets == 1).sum())
print(f"Train count: 0={n0}, 1={n1} | ratio0/1={n0/max(n1,1):.2f}")

val_targets = targets[val_idx]
vn0 = int((val_targets == 0).sum())
vn1 = int((val_targets == 1).sum())
print(f"VAL count:   0={vn0}, 1={vn1} | ratio0/1={vn0/max(vn1,1):.2f}")

# -------------------------
# Test dataset helper
# -------------------------
test_files = [f for f in os.listdir(TEST_DIR) if f.lower().endswith((".jpg", ".jpeg", ".png"))]
assert len(test_files) > 0, "No images found in test folder"

def extract_number(fn: str) -> int:
    m = re.findall(r"\d+", fn)
    return int(m[-1]) if m else -1

test_files = sorted(test_files, key=extract_number)

def filename_to_kaggle_id(fn: str) -> str:
    num = extract_number(fn)
    return f"test/{num:06d}.jpg"

class TestDataset(torch.utils.data.Dataset):
    def __init__(self, folder, files, transform):
        self.folder = folder
        self.files = files
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        from PIL import Image
        fn = self.files[idx]
        path = os.path.join(self.folder, fn)
        img = Image.open(path).convert("RGB")
        img = self.transform(img)
        return img, fn

test_ds = TestDataset(TEST_DIR, test_files, val_tfms)
test_loader = DataLoader(
    test_ds, batch_size=BATCH_SIZE, shuffle=False,
    num_workers=NUM_WORKERS, pin_memory=True
)

# -------------------------
# Model factory (all from scratch)
# -------------------------
def build_model(name: str) -> nn.Module:
    name = name.lower().strip()

    if name == "resnet18":
        m = models.resnet18(weights=None)
        m.fc = nn.Linear(m.fc.in_features, 2)
        return m

    if name == "resnet34":
        m = models.resnet34(weights=None)
        m.fc = nn.Linear(m.fc.in_features, 2)
        return m

    if name == "densenet121":
        m = models.densenet121(weights=None)
        m.classifier = nn.Linear(m.classifier.in_features, 2)
        return m

    if name in ["convnext_tiny", "convnext-tiny", "convnexttiny"]:
        m = models.convnext_tiny(weights=None)
        # torchvision convnext has classifier = Sequential(..., Linear)
        last_linear = m.classifier[-1]
        assert isinstance(last_linear, nn.Linear)
        m.classifier[-1] = nn.Linear(last_linear.in_features, 2)
        return m

    raise ValueError(f"Unknown model name: {name}")

# -------------------------
# Metrics helpers
# -------------------------
@torch.no_grad()
def eval_f1_argmax(model, loader):
    model.eval()
    ys, preds = [], []
    for x, y in loader:
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)
        with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
            logits = model(x)
        pred = torch.argmax(logits, dim=1)
        ys.append(y.detach().cpu().numpy())
        preds.append(pred.detach().cpu().numpy())
    ys = np.concatenate(ys)
    preds = np.concatenate(preds)
    return float(f1_score(ys, preds, pos_label=1))

@torch.no_grad()
def collect_val_probs(model, loader):
    model.eval()
    probs, ys = [], []
    for x, y in loader:
        x = x.to(device, non_blocking=True)
        with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
            logits = model(x)
            p1 = torch.softmax(logits, dim=1)[:, 1]
        probs.append(p1.detach().cpu().numpy())
        ys.append(y.detach().cpu().numpy())
    return np.concatenate(probs), np.concatenate(ys)

def best_threshold_max_f1(p_val, y_val):
    ths = np.linspace(0.05, 0.99, 95)
    best = {"th": None, "f1": -1.0, "prec": None, "rec": None}
    for th in ths:
        pred = (p_val > th).astype(int)
        f1 = f1_score(y_val, pred, pos_label=1)
        if f1 > best["f1"]:
            best["f1"] = float(f1)
            best["th"] = float(th)
            best["prec"] = float(precision_score(y_val, pred, pos_label=1, zero_division=0))
            best["rec"]  = float(recall_score(y_val, pred, pos_label=1, zero_division=0))
    return best

# -------------------------
# Train + submit one model
# -------------------------
def run_one_model(model_name: str):
    print("\n" + "="*80)
    print(f"RUN MODEL: {model_name}")
    print("="*80)

    out_dir = os.path.join(MASTER_OUT_DIR, model_name)
    os.makedirs(out_dir, exist_ok=True)

    best_model_path = os.path.join(out_dir, f"best_{model_name}_f1.pt")
    sub_path = os.path.join(out_dir, f"submission_{model_name}_bestValThreshold.csv")

    # Oversampling via WeightedRandomSampler on TRAIN only
    class_counts = np.bincount(train_targets, minlength=2).astype(np.float64)
    class_weights = 1.0 / np.maximum(class_counts, 1.0)
    sample_weights = class_weights[train_targets]

    sampler = WeightedRandomSampler(
        weights=torch.from_numpy(sample_weights).double(),
        num_samples=len(train_targets),
        replacement=True
    )

    train_loader = DataLoader(
        train_ds, batch_size=BATCH_SIZE, sampler=sampler,
        num_workers=NUM_WORKERS, pin_memory=True
    )
    val_loader = DataLoader(
        val_ds, batch_size=BATCH_SIZE, shuffle=False,
        num_workers=NUM_WORKERS, pin_memory=True
    )

    # Build model
    model = build_model(model_name).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
    scaler = torch.cuda.amp.GradScaler(enabled=(device.type == "cuda"))

    # Train (save best by val F1 using argmax for selection)
    best_f1 = -1.0
    best_epoch = -1

    epoch_train_losses = []
    epoch_train_accuracies = []
    epoch_val_f1s = []

    for epoch in range(1, EPOCHS + 1):
        model.train()
        running_loss = 0.0
        correct_predictions = 0
        total_samples = 0

        pbar = tqdm(train_loader, desc=f"{model_name} | Epoch {epoch}/{EPOCHS}", leave=False)
        for x, y in pbar:
            x = x.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)

            optimizer.zero_grad(set_to_none=True)
            with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                logits = model(x)
                loss = criterion(logits, y)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item() * x.size(0)
            pbar.set_postfix(loss=float(loss.item()))

            _, predicted = torch.max(logits.data, 1)
            total_samples += y.size(0)
            correct_predictions += (predicted == y).sum().item()

        avg_loss = running_loss / len(train_ds)
        train_accuracy = correct_predictions / total_samples
        val_f1 = eval_f1_argmax(model, val_loader)

        epoch_train_losses.append(avg_loss)
        epoch_train_accuracies.append(train_accuracy)
        epoch_val_f1s.append(val_f1)

        print(f"{model_name} | Epoch {epoch:02d} | train_loss={avg_loss:.4f} | train_acc={train_accuracy:.4f} | val_f1(argmax)={val_f1:.4f}")

        if val_f1 > best_f1:
            best_f1 = val_f1
            best_epoch = epoch
            torch.save(
                {"model_state": model.state_dict(), "best_f1": best_f1, "epoch": best_epoch,
                 "train_losses_history": epoch_train_losses, "train_accuracies_history": epoch_train_accuracies, "val_f1s_history": epoch_val_f1s},
                best_model_path
            )
            print(f"  -> saved BEST model (val_f1={best_f1:.4f})")

    print(f"{model_name} | Training done. Best val F1(argmax)={best_f1:.4f} at epoch {best_epoch}")
    print("Best model saved to:", best_model_path)

    # Load best to ensure we have the full history from the best run
    ckpt = torch.load(best_model_path, map_location=device)
    model.load_state_dict(ckpt["model_state"])
    model.eval()

    # Choose best threshold on VAL
    p_val, y_val = collect_val_probs(model, val_loader)
    best = best_threshold_max_f1(p_val, y_val)
    P_THRESH = best["th"]

    print(f"{model_name} | Best threshold on VAL (maximize F1): {best}")
    print(f"{model_name} | Using threshold: {P_THRESH}")

    # Predict test + save submission
    all_ids, all_p = [], []
    with torch.no_grad():
        for x, fns in tqdm(test_loader, desc=f"{model_name} | Predict test"):
            x = x.to(device, non_blocking=True)
            with torch.cuda.amp.autocast(enabled=(device.type == "cuda")):
                logits = model(x)
                p1 = torch.softmax(logits, dim=1)[:, 1]
            p1 = p1.detach().cpu().numpy()

            for fn, prob1 in zip(fns, p1):
                all_ids.append(filename_to_kaggle_id(fn))
                all_p.append(float(prob1))

    labels = (np.array(all_p) > P_THRESH).astype(int)
    sub = pd.DataFrame({"ID": all_ids, "label": labels})
    sub.to_csv(sub_path, index=False)

    print(f"{model_name} | Saved submission: {sub_path}")
    print(sub.head())
    print(f"{model_name} | Predicted positives: {int(sub['label'].sum())} / {len(sub)}")

    # Cleanup
    del model, optimizer, scaler, train_loader, val_loader
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    return {
        "model": model_name,
        "best_val_f1_argmax": float(ckpt.get("best_f1", -1.0)),
        "best_epoch": int(ckpt.get("epoch", -1)),
        "best_thresh": float(P_THRESH),
        "val_thresh_f1": float(best["f1"]),
        "val_thresh_prec": float(best["prec"]),
        "val_thresh_rec": float(best["rec"]),
        "submission_path": sub_path,
        "best_model_path": best_model_path,
        "train_losses_history": ckpt.get("train_losses_history", []),
        "train_accuracies_history": ckpt.get("train_accuracies_history", []),
        "val_f1s_history": ckpt.get("val_f1s_history", []),
    }

# -------------------------
# Run 4 models
# -------------------------
MODELS_TO_RUN = ["resnet18", "resnet34", "densenet121", "convnext_tiny"]

results = []
for mname in MODELS_TO_RUN:
    results.append(run_one_model(mname))

# Summary table
res_df = pd.DataFrame(results).sort_values(by="val_thresh_f1", ascending=False)
print("\n" + "="*80)
print("SUMMARY (sorted by VAL F1 using best threshold)")
print("="*80)
display(res_df)

# Prepare data for plotting
plot_data_acc = []
plot_data_f1 = []

for res in results:
    model_name = res['model']
    train_accuracies = res['train_accuracies_history']
    val_f1s = res['val_f1s_history']

    for epoch, acc in enumerate(train_accuracies):
        plot_data_acc.append({'model': model_name, 'epoch': epoch + 1, 'metric': 'Training Accuracy', 'value': acc})
    for epoch, f1 in enumerate(val_f1s):
        plot_data_f1.append({'model': model_name, 'epoch': epoch + 1, 'metric': 'Validation F1', 'value': f1})

df_plot_acc = pd.DataFrame(plot_data_acc)
df_plot_f1 = pd.DataFrame(plot_data_f1)

# Plot Training Accuracy
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_plot_acc, x='epoch', y='value', hue='model', marker='o')
plt.title('Training Accuracy vs. Epoch for All Models')
plt.xlabel('Epoch')
plt.ylabel('Training Accuracy')
plt.grid(True)
plt.legend(title='Model')
plt.show()

# Plot Validation F1 Score
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_plot_f1, x='epoch', y='value', hue='model', marker='o')
plt.title('Validation F1 Score vs. Epoch for All Models')
plt.xlabel('Epoch')
plt.ylabel('Validation F1 Score (argmax)')
plt.grid(True)
plt.legend(title='Model')
plt.show()