In [1]:
# Check GPU
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [2]:
# Basic deps for the split script + training
!pip -q install pandas scikit-learn tqdm


In [3]:
#1) Mount Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [4]:
#2) Define paths to your files in root of My Drive and copy to fast local SSD
# Paths on Drive (root)
ZIP1 = "/content/drive/MyDrive/HAM10000_images_part_1.zip"
ZIP2 = "/content/drive/MyDrive/HAM10000_images_part_2.zip"
META = "/content/drive/MyDrive/HAM10000_metadata.csv"

# Local working area on Colab SSD
WORK     = "/content/work"
RAW_DIR  = f"{WORK}/raw"
EXTRACT  = f"{WORK}/ham10000_extracted"
DATA_DIR = f"{WORK}/data"

import os, subprocess, pathlib
os.makedirs(RAW_DIR, exist_ok=True)
os.makedirs(EXTRACT, exist_ok=True)
os.makedirs(DATA_DIR, exist_ok=True)

# Copy only if missing (fast on later sessions)
print(subprocess.getoutput(f'rsync -ah --ignore-existing "{ZIP1}" "{ZIP2}" "{META}" "{RAW_DIR}/"'))
print("Local RAW_DIR:", subprocess.getoutput(f'ls -lh "{RAW_DIR}"'))


Local RAW_DIR: total 2.6G
-rw------- 1 root root 1.3G Sep 29 15:20 HAM10000_images_part_1.zip
-rw------- 1 root root 1.4G Sep 29 15:18 HAM10000_images_part_2.zip
-rw------- 1 root root 551K Oct  6  2019 HAM10000_metadata.csv


In [5]:
#3) Unzip locally (NOT to Drive)
!mkdir -p /content/work/ham10000_extracted
!unzip -q /content/work/raw/HAM10000_images_part_1.zip -d /content/work/ham10000_extracted/
!unzip -q /content/work/raw/HAM10000_images_part_2.zip -d /content/work/ham10000_extracted/
!ls -lh /content/work/ham10000_extracted | head

total 328K
drwxr-xr-x 2 root root 152K Sep 29 20:38 HAM10000_images_part_1
drwxrwxrwx 2 root root 168K Sep 29 15:08 HAM10000_images_part_2


In [6]:
!mkdir -p /content/scripts

In [None]:
# #4) Create the split script and run it (makes train/val/test)
# %%writefile /content/scripts/split_ham10000.py
# #!/usr/bin/env python3
# import argparse, os, shutil, zipfile
# from pathlib import Path
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from tqdm import tqdm

# def unzip_if_needed(zip_path: Path, imgs_dir: Path):
#     imgs_dir.mkdir(parents=True, exist_ok=True)
#     if any(imgs_dir.glob("*.jpg")):
#         print(f"[skip] Images already present in: {imgs_dir}")
#         return
#     assert zip_path.exists(), f"Missing: {zip_path}"
#     print(f"[unzip] {zip_path.name} → {imgs_dir}")
#     with zipfile.ZipFile(zip_path, "r") as z:
#         z.extractall(imgs_dir)

# def build_splits(meta_csv: Path, imgs_dir: Path, train_pct: float, val_pct: float, test_pct: float, seed: int):
#     assert abs((train_pct + val_pct + test_pct) - 1.0) < 1e-6, "Splits must sum to 1.0"
#     meta = pd.read_csv(meta_csv)
#     meta["image_path"] = meta["image_id"].apply(lambda x: str(imgs_dir / f"{x}.jpg"))
#     meta = meta[meta["image_path"].map(lambda p: Path(p).exists())].copy()
#     train_df, temp_df = train_test_split(meta, test_size=1.0-train_pct, stratify=meta["dx"], random_state=seed)
#     test_rel = test_pct / (val_pct + test_pct)
#     val_df, test_df = train_test_split(temp_df, test_size=test_rel, stratify=temp_df["dx"], random_state=seed)
#     return train_df, val_df, test_df, sorted(meta["dx"].unique())

# def materialize_split(df: pd.DataFrame, split_name: str, out_root: Path):
#     base = out_root / split_name; base.mkdir(parents=True, exist_ok=True)
#     for cls in sorted(df["dx"].unique()):
#         (base / cls).mkdir(parents=True, exist_ok=True)
#     for _, r in tqdm(df.iterrows(), total=len(df), desc=f"{split_name:>5}", unit="img"):
#         src, dst = Path(r["image_path"]), base / r["dx"] / Path(r["image_path"]).name
#         if not dst.exists():
#             shutil.copy2(src, dst)

# def print_counts(data_dir: Path):
#     for split in ["train","val","test"]:
#         base = data_dir / split
#         if base.exists():
#             counts = {d.name: len(list((base/d).glob("*"))) for d in base.iterdir() if d.is_dir()}
#             print(split, counts)

# def main():
#     p = argparse.ArgumentParser()
#     p.add_argument("--project_dir", default=".")
#     p.add_argument("--train", type=float, default=0.8)
#     p.add_argument("--val",   type=float, default=0.1)
#     p.add_argument("--test",  type=float, default=0.1)
#     p.add_argument("--seed",  type=int, default=42)
#     p.add_argument("--clean", action="store_true")
#     args = p.parse_args()

#     project = Path(args.project_dir).resolve()
#     raw_dir  = project / "raw"
#     data_dir = project / "data"
#     extract  = project / "ham10000_extracted"
#     imgs_dir = extract / "HAM10000_images"

#     part1 = raw_dir / "HAM10000_images_part_1.zip"
#     part2 = raw_dir / "HAM10000_images_part_2.zip"
#     meta  = raw_dir / "HAM10000_metadata.csv"

#     extract.mkdir(parents=True, exist_ok=True); data_dir.mkdir(parents=True, exist_ok=True)
#     unzip_if_needed(part1, imgs_dir)
#     unzip_if_needed(part2, imgs_dir)

#     train_df, val_df, test_df, classes = build_splits(meta, imgs_dir, args.train, args.val, args.test, args.seed)

#     if args.clean:
#         for s in ["train","val","test"]:
#             target = data_dir / s
#             if target.exists(): shutil.rmtree(target)

#     materialize_split(train_df, "train", data_dir)
#     materialize_split(val_df, "val", data_dir)
#     materialize_split(test_df, "test", data_dir)
#     print_counts(data_dir)
#     print("[done] Data ready at:", data_dir)

# if __name__ == "__main__":
#     main()

Writing /content/scripts/split_ham10000.py


In [7]:
%%writefile /content/scripts/split_ham10000.py
#!/usr/bin/env python3
import argparse, os, shutil, zipfile
from pathlib import Path
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm

def unzip_if_needed(zip_path: Path, extract_dir: Path):
    extract_dir.mkdir(parents=True, exist_ok=True)
    # If any jpgs already exist anywhere under extract_dir, skip
    if any(extract_dir.rglob("*.jpg")):
        print(f"[skip] Images already present under: {extract_dir}")
        return
    assert zip_path.exists(), f"Missing: {zip_path}"
    print(f"[unzip] {zip_path.name} -> {extract_dir}")
    with zipfile.ZipFile(zip_path, "r") as z:
        z.extractall(extract_dir)

def index_images(img_root: Path):
    """
    Build a dict: image_id (without .jpg) -> full path
    Recurses under img_root to handle both 'HAM10000_images' and '..._part_1/part_2' layouts.
    """
    mapping = {}
    for p in img_root.rglob("*.jpg"):
        mapping[p.stem] = str(p)
    return mapping

def build_splits(meta_csv: Path, img_root: Path, train_pct: float, val_pct: float, test_pct: float, seed: int):
    assert abs((train_pct + val_pct + test_pct) - 1.0) < 1e-6, "Splits must sum to 1.0"
    meta = pd.read_csv(meta_csv)
    img_map = index_images(img_root)
    meta["image_path"] = meta["image_id"].map(img_map)
    meta = meta.dropna(subset=["image_path"]).copy()

    # quick sanity
    print(f"[info] Found {len(img_map)} jpgs under {img_root}")
    print(f"[info] Matched rows with metadata: {len(meta)}")

    train_df, temp_df = train_test_split(
        meta, test_size=1.0-train_pct, stratify=meta["dx"], random_state=seed
    )
    test_rel = test_pct / (val_pct + test_pct)
    val_df, test_df = train_test_split(
        temp_df, test_size=test_rel, stratify=temp_df["dx"], random_state=seed
    )
    return train_df, val_df, test_df, sorted(meta["dx"].unique())

def materialize_split(df: pd.DataFrame, split_name: str, out_root: Path):
    base = out_root / split_name
    base.mkdir(parents=True, exist_ok=True)
    for cls in sorted(df["dx"].unique()):
        (base / cls).mkdir(parents=True, exist_ok=True)
    for _, r in tqdm(df.iterrows(), total=len(df), desc=f"{split_name:>5}", unit="img"):
        src = Path(r["image_path"])
        dst = base / r["dx"] / src.name
        if not dst.exists():
            shutil.copy2(src, dst)

def print_counts(data_dir: Path):
    for split in ["train", "val", "test"]:
        base = data_dir / split
        if base.exists():
            counts = {
                d.name: len(list((base / d).glob("*")))
                for d in base.iterdir() if d.is_dir()
            }
            print(split, counts)

def main():
    p = argparse.ArgumentParser()
    p.add_argument("--project_dir", default=".")
    p.add_argument("--train", type=float, default=0.8)
    p.add_argument("--val",   type=float, default=0.1)
    p.add_argument("--test",  type=float, default=0.1)
    p.add_argument("--seed",  type=int, default=42)
    p.add_argument("--clean", action="store_true")
    args = p.parse_args()

    project = Path(args.project_dir).resolve()
    raw_dir  = project / "raw"
    data_dir = project / "data"
    extract  = project / "ham10000_extracted"   # we recurse under this folder
    imgs_root = extract                         # <-- recursive root

    part1 = raw_dir / "HAM10000_images_part_1.zip"
    part2 = raw_dir / "HAM10000_images_part_2.zip"
    meta  = raw_dir / "HAM10000_metadata.csv"

    extract.mkdir(parents=True, exist_ok=True)
    data_dir.mkdir(parents=True, exist_ok=True)

    unzip_if_needed(part1, extract)
    unzip_if_needed(part2, extract)

    train_df, val_df, test_df, classes = build_splits(
        meta, imgs_root, args.train, args.val, args.test, args.seed
    )

    if args.clean:
        for s in ["train", "val", "test"]:
            target = data_dir / s
            if target.exists():
                shutil.rmtree(target)

    materialize_split(train_df, "train", data_dir)
    materialize_split(val_df, "val", data_dir)
    materialize_split(test_df, "test", data_dir)
    print_counts(data_dir)
    print("[done] Data ready at:", data_dir)

if __name__ == "__main__":
    main()

Writing /content/scripts/split_ham10000.py


In [8]:
!python /content/scripts/split_ham10000.py \
  --project_dir "/content/work" \
  --train 0.8 --val 0.1 --test 0.1 --seed 42 --clean

[skip] Images already present under: /content/work/ham10000_extracted
[skip] Images already present under: /content/work/ham10000_extracted
[info] Found 10015 jpgs under /content/work/ham10000_extracted
[info] Matched rows with metadata: 10015
train: 100% 8012/8012 [00:15<00:00, 516.57img/s]
  val: 100% 1001/1001 [00:02<00:00, 467.86img/s]
 test: 100% 1002/1002 [00:02<00:00, 435.20img/s]
train {'nv': 5364, 'mel': 890, 'bcc': 411, 'vasc': 114, 'akiec': 262, 'df': 92, 'bkl': 879}
val {'nv': 670, 'mel': 111, 'bcc': 51, 'vasc': 14, 'akiec': 33, 'df': 12, 'bkl': 110}
test {'nv': 671, 'mel': 112, 'bcc': 52, 'vasc': 14, 'akiec': 32, 'df': 11, 'bkl': 110}
[done] Data ready at: /content/work/data


In [9]:
#Quick sanity check:
import os
for split in ("train","val","test"):
    base = os.path.join("/content/work/data", split)
    classes = [d for d in sorted(os.listdir(base)) if os.path.isdir(os.path.join(base,d))]
    counts = {c: len(os.listdir(os.path.join(base,c))) for c in classes}
    print(split, counts)

train {'akiec': 262, 'bcc': 411, 'bkl': 879, 'df': 92, 'mel': 890, 'nv': 5364, 'vasc': 114}
val {'akiec': 33, 'bcc': 51, 'bkl': 110, 'df': 12, 'mel': 111, 'nv': 670, 'vasc': 14}
test {'akiec': 32, 'bcc': 52, 'bkl': 110, 'df': 11, 'mel': 112, 'nv': 671, 'vasc': 14}


In [10]:
#!/usr/bin/env python3
import os, json, time, csv, datetime
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score

# =============================
# CONFIG (edit this in Colab)
# =============================
class Args:
    data = "/content/work/data"   # <-- your dataset folder with train/val/test
    epochs = 10
    warmup = 2
    size = 224
    batch = 32
    base_lr = 1e-4
    ft_lr = 1e-5
    unfreeze = 10
    binary = 0
    run_name = "colab_run"
    out_dir = "runs"

args = Args()

# =============================
# Functions
# =============================
def get_datasets(data_dir, img_size=(224, 224), batch=32, seed=42, binary=False):
    def loader(split, shuffle):
        return tf.keras.utils.image_dataset_from_directory(
            os.path.join(data_dir, split),
            image_size=img_size, batch_size=batch, seed=seed, shuffle=shuffle
        )
    ds_train = loader("train", True)
    ds_val   = loader("val",   False)
    ds_test  = loader("test",  False)
    class_names = ds_train.class_names

    aug = keras.Sequential([
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.05),
        layers.RandomZoom(0.1),
    ])
    ds_train = ds_train.map(lambda x, y: (aug(x), y), num_parallel_calls=tf.data.AUTOTUNE)
    ds_train = ds_train.cache().prefetch(tf.data.AUTOTUNE)
    ds_val   = ds_val.cache().prefetch(tf.data.AUTOTUNE)
    ds_test  = ds_test.cache().prefetch(tf.data.AUTOTUNE)
    return ds_train, ds_val, ds_test, class_names, bool(binary)

def compute_class_weights(ds, num_classes):
    counts = np.zeros(num_classes, dtype=np.int64)
    for _, y in ds.unbatch():
        counts[int(y.numpy())] += 1
    total = counts.sum()
    return {i: float(total / (num_classes * max(counts[i], 1))) for i in range(num_classes)}

def build_model(num_classes, img_size=(224, 224), binary=False):
    inp = keras.Input(shape=(*img_size, 3))
    x = tf.keras.applications.resnet50.preprocess_input(inp)
    base = tf.keras.applications.ResNet50(include_top=False, weights="imagenet", input_tensor=x)
    x = layers.GlobalAveragePooling2D()(base.output)
    x = layers.Dropout(0.3)(x)
    out = layers.Dense(1, activation="sigmoid", dtype="float32")(x) if binary \
          else layers.Dense(num_classes, activation="softmax", dtype="float32")(x)
    return keras.Model(inp, out), base

def compile_and_fit(model, train_ds, val_ds, *, loss, lr, epochs, class_weight, ckpt_path):
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr), loss=loss, metrics=["accuracy"])
    os.makedirs(os.path.dirname(ckpt_path), exist_ok=True)
    cbs = [
        keras.callbacks.ModelCheckpoint(ckpt_path, save_best_only=True, monitor="val_accuracy", mode="max"),
        keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True, monitor="val_accuracy", mode="max"),
    ]
    hist = model.fit(train_ds, validation_data=val_ds, epochs=epochs,
                     class_weight=class_weight, callbacks=cbs)
    return {k: [float(v) for v in vals] for k, vals in hist.history.items()}

def eval_save(model, ds_test, names, out_dir, binary=False):
    y_true, y_pred = [], []
    for x, y in ds_test:
        p = model.predict(x, verbose=0)
        yp = (p.reshape(-1) >= 0.5).astype(int) if binary else p.argmax(axis=1)
        y_true += y.numpy().tolist(); y_pred += yp.tolist()
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    acc = float(accuracy_score(y_true, y_pred))
    f1  = float(f1_score(y_true, y_pred, average=("binary" if binary else "macro")))
    rep = classification_report(y_true, y_pred, target_names=names)
    cm  = confusion_matrix(y_true, y_pred).tolist()
    os.makedirs(out_dir, exist_ok=True)
    json.dump({"accuracy": acc, "macro_f1": f1, "confusion_matrix": cm},
              open(os.path.join(out_dir,"metrics.json"),"w"), indent=2)
    open(os.path.join(out_dir,"classification_report.txt"),"w").write(rep)
    json.dump(names, open(os.path.join(out_dir,"classes.json"),"w"), indent=2)
    print(rep); print("Saved:", out_dir)
    return acc, f1

def merge_histories(h1, h2):
    out = {}
    keys = set(h1.keys()) | set(h2.keys())
    for k in keys:
        out[k] = (h1.get(k, []) + h2.get(k, []))
    return out

def write_epoch_csv(history, path_csv):
    rows = []
    n = max(len(history.get("accuracy", [])), len(history.get("loss", [])))
    for i in range(n):
        rows.append({
            "epoch": i+1,
            "accuracy": history.get("accuracy", [None]*n)[i],
            "loss": history.get("loss", [None]*n)[i],
            "val_accuracy": history.get("val_accuracy", [None]*n)[i],
            "val_loss": history.get("val_loss", [None]*n)[i],
        })
    import pandas as pd
    pd.DataFrame(rows).to_csv(path_csv, index=False)

def append_master_row(master_csv, row_dict):
    headers = ["timestamp","run_name","data","img_size","batch","warmup","epochs","unfreeze",
               "base_lr","ft_lr","binary","best_val_acc","best_val_loss","test_acc","test_macro_f1"]
    os.makedirs(os.path.dirname(master_csv), exist_ok=True)
    file_exists = os.path.isfile(master_csv)
    with open(master_csv, "a", newline="") as f:
        w = csv.DictWriter(f, fieldnames=headers)
        if not file_exists:
            w.writeheader()
        w.writerow({k: row_dict.get(k) for k in headers})

# =============================
# MAIN
# =============================
def main(a):
    run = a.run_name or time.strftime("%Y%m%d-%H%M%S")
    out_root = a.out_dir if a.out_dir else "runs"
    out = os.path.join(out_root, "resnet50", run)
    os.makedirs(out, exist_ok=True)

    ds_tr, ds_va, ds_te, names, binary = get_datasets(a.data, (a.size, a.size), a.batch, binary=bool(a.binary))
    ncls = 2 if binary else len(names)
    class_weight = compute_class_weights(ds_tr, ncls)

    model, base = build_model(ncls, (a.size, a.size), binary)
    loss = "binary_crossentropy" if binary else keras.losses.SparseCategoricalCrossentropy()

    # warmup
    base.trainable = False
    hist_warm = compile_and_fit(model, ds_tr, ds_va, loss=loss, lr=a.base_lr,
                                epochs=max(1, a.warmup), class_weight=class_weight,
                                ckpt_path=os.path.join(out, "best.keras"))

    # fine-tune
    base.trainable = True
    if a.unfreeze > 0 and a.unfreeze < len(base.layers):
        for l in base.layers[:-a.unfreeze]:
            l.trainable = False
    hist_ft = compile_and_fit(model, ds_tr, ds_va, loss=loss, lr=a.ft_lr,
                              epochs=a.epochs, class_weight=class_weight,
                              ckpt_path=os.path.join(out, "best.keras"))

    # histories
    history = merge_histories(hist_warm, hist_ft)
    json.dump(history, open(os.path.join(out,"history.json"),"w"), indent=2)
    write_epoch_csv(history, os.path.join(out,"history_epoch.csv"))

    # evaluate
    test_acc, test_f1 = eval_save(model, ds_te, names, out, binary)

    # master CSV
    master_csv = os.path.join(out_root, "experiments_master.csv")
    best_idx = int(np.nanargmax(history.get("val_accuracy", [np.nan])))
    best_val_acc  = float(history["val_accuracy"][best_idx]) if "val_accuracy" in history else None
    best_val_loss = float(history["val_loss"][best_idx]) if "val_loss" in history else None

    append_master_row(master_csv, {
        "timestamp": datetime.datetime.now().isoformat(timespec="seconds"),
        "run_name": run,
        "data": a.data,
        "img_size": a.size,
        "batch": a.batch,
        "warmup": a.warmup,
        "epochs": a.epochs,
        "unfreeze": a.unfreeze,
        "base_lr": a.base_lr,
        "ft_lr": a.ft_lr,
        "binary": int(a.binary),
        "best_val_acc": best_val_acc,
        "best_val_loss": best_val_loss,
        "test_acc": test_acc,
        "test_macro_f1": test_f1,
    })
    print(f"[master] Appended summary to: {master_csv}")

# =============================
# RUN
# =============================
main(args)


Found 8012 files belonging to 7 classes.
Found 1001 files belonging to 7 classes.
Found 1002 files belonging to 7 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 0us/step
Epoch 1/2
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 157ms/step - accuracy: 0.2692 - loss: 2.4583 - val_accuracy: 0.3826 - val_loss: 1.6380
Epoch 2/2
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 95ms/step - accuracy: 0.3448 - loss: 1.8448 - val_accuracy: 0.5115 - val_loss: 1.3403
Epoch 1/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 156ms/step - accuracy: 0.4689 - loss: 1.5535 - val_accuracy: 0.5584 - val_loss: 1.2405
Epoch 2/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 109ms/step - accuracy: 0.5097 - loss: 1.2669 - val_accuracy: 0.5924 - val_loss:

In [11]:
import os
print("Exists:", os.path.isdir("/content/work/data"))
print("Splits:", os.listdir("/content/work/data"))

Exists: True
Splits: ['test', 'val', 'train']


In [17]:
# make sure the folder exists
!mkdir -p /content/members

In [18]:
%%writefile /content/members/run_resnet50.py
#!/usr/bin/env python3
import os, json, time, argparse, csv, datetime
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score

def get_datasets(data_dir, img_size=(224,224), batch=32, seed=42, binary=False):
    def loader(split, shuffle):
        return tf.keras.utils.image_dataset_from_directory(
            os.path.join(data_dir, split),
            image_size=img_size, batch_size=batch, seed=seed, shuffle=shuffle
        )
    ds_train = loader("train", True); ds_val = loader("val", False); ds_test = loader("test", False)
    class_names = ds_train.class_names
    aug = keras.Sequential([layers.RandomFlip("horizontal"), layers.RandomRotation(0.05), layers.RandomZoom(0.1)])
    ds_train = ds_train.map(lambda x,y:(aug(x),y), num_parallel_calls=tf.data.AUTOTUNE)
    ds_train = ds_train.cache().prefetch(tf.data.AUTOTUNE); ds_val = ds_val.cache().prefetch(tf.data.AUTOTUNE); ds_test = ds_test.cache().prefetch(tf.data.AUTOTUNE)
    return ds_train, ds_val, ds_test, class_names, bool(binary)

def compute_class_weights(ds, n):
    c = np.zeros(n, dtype=np.int64)
    for _, y in ds.unbatch(): c[int(y.numpy())]+=1
    tot=c.sum(); return {i: float(tot/(n*max(c[i],1))) for i in range(n)}

def build_model(num_classes, img_size=(224,224), binary=False):
    inp = keras.Input(shape=(*img_size,3))
    x = tf.keras.applications.resnet50.preprocess_input(inp)
    base = tf.keras.applications.ResNet50(include_top=False, weights="imagenet", input_tensor=x)
    x = layers.GlobalAveragePooling2D()(base.output); x = layers.Dropout(0.3)(x)
    out = layers.Dense(1, activation="sigmoid", dtype="float32")(x) if binary else layers.Dense(num_classes, activation="softmax", dtype="float32")(x)
    return keras.Model(inp,out), base

def compile_and_fit(model, train_ds, val_ds, *, loss, lr, epochs, class_weight, ckpt_path):
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr), loss=loss, metrics=["accuracy"])
    os.makedirs(os.path.dirname(ckpt_path), exist_ok=True)
    cbs=[keras.callbacks.ModelCheckpoint(ckpt_path, save_best_only=True, monitor="val_accuracy", mode="max"),
         keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True, monitor="val_accuracy", mode="max")]
    hist=model.fit(train_ds, validation_data=val_ds, epochs=epochs, class_weight=class_weight, callbacks=cbs)
    return {k:[float(v) for v in vals] for k,vals in hist.history.items()}

def eval_save(model, ds_test, names, out_dir, binary=False):
    y_true,y_pred=[],[]
    for x,y in ds_test:
        p=model.predict(x,verbose=0)
        yp=(p.reshape(-1)>=0.5).astype(int) if binary else p.argmax(axis=1)
        y_true+=y.numpy().tolist(); y_pred+=yp.tolist()
    y_true=np.array(y_true); y_pred=np.array(y_pred)
    acc=float(accuracy_score(y_true,y_pred)); f1=float(f1_score(y_true,y_pred,average=("binary" if binary else "macro")))
    rep=classification_report(y_true,y_pred,target_names=names); cm=confusion_matrix(y_true,y_pred).tolist()
    os.makedirs(out_dir, exist_ok=True)
    json.dump({"accuracy":acc,"macro_f1":f1,"confusion_matrix":cm}, open(os.path.join(out_dir,"metrics.json"),"w"), indent=2)
    open(os.path.join(out_dir,"classification_report.txt"),"w").write(rep)
    json.dump(names, open(os.path.join(out_dir,"classes.json"),"w"), indent=2)
    print(rep); print("Saved:", out_dir)
    return acc,f1

def merge_hist(h1,h2):
    keys=set(h1.keys())|set(h2.keys())
    return {k:(h1.get(k,[])+h2.get(k,[])) for k in keys}

def write_epoch_csv(history, path_csv):
    import pandas as pd
    n=max(len(history.get("accuracy",[])), len(history.get("loss",[])))
    rows=[{
        "epoch":i+1,
        "accuracy":history.get("accuracy",[None]*n)[i],
        "loss":history.get("loss",[None]*n)[i],
        "val_accuracy":history.get("val_accuracy",[None]*n)[i],
        "val_loss":history.get("val_loss",[None]*n)[i],
    } for i in range(n)]
    pd.DataFrame(rows).to_csv(path_csv, index=False)

def append_master_row(master_csv, row):
    headers=["timestamp","run_name","data","img_size","batch","warmup","epochs","unfreeze","base_lr","ft_lr","binary","best_val_acc","best_val_loss","test_acc","test_macro_f1"]
    os.makedirs(os.path.dirname(master_csv), exist_ok=True)
    new=not os.path.isfile(master_csv)
    with open(master_csv,"a",newline="") as f:
        w=csv.DictWriter(f, fieldnames=headers)
        if new: w.writeheader()
        w.writerow({k:row.get(k) for k in headers})

def main(a):
    run=a.run_name or time.strftime("%Y%m%d-%H%M%S")
    out_root=a.out_dir or "runs"
    out=os.path.join(out_root,"resnet50",run); os.makedirs(out,exist_ok=True)

    ds_tr,ds_va,ds_te,names,binary=get_datasets(a.data,(a.size,a.size),a.batch,binary=bool(a.binary))
    ncls=2 if binary else len(names)
    class_weight=compute_class_weights(ds_tr,ncls)

    model,base=build_model(ncls,(a.size,a.size),binary)
    loss="binary_crossentropy" if binary else keras.losses.SparseCategoricalCrossentropy()

    base.trainable=False
    hist_warm=compile_and_fit(model,ds_tr,ds_va,loss=loss,lr=a.base_lr,epochs=max(1,a.warmup),class_weight=class_weight,ckpt_path=os.path.join(out,"best.keras"))

    base.trainable=True
    if a.unfreeze>0 and a.unfreeze<len(base.layers):
        for l in base.layers[:-a.unfreeze]: l.trainable=False
    hist_ft=compile_and_fit(model,ds_tr,ds_va,loss=loss,lr=a.ft_lr,epochs=a.epochs,class_weight=class_weight,ckpt_path=os.path.join(out,"best.keras"))

    history=merge_hist(hist_warm,hist_ft)
    json.dump(history, open(os.path.join(out,"history.json"),"w"), indent=2)
    write_epoch_csv(history, os.path.join(out,"history_epoch.csv"))

    test_acc,test_f1=eval_save(model,ds_te,names,out,binary)

    master_csv=os.path.join(out_root,"experiments_master.csv")
    best_idx=int(np.nanargmax(history.get("val_accuracy",[np.nan])))
    best_val_acc=float(history["val_accuracy"][best_idx]) if "val_accuracy" in history else None
    best_val_loss=float(history["val_loss"][best_idx]) if "val_loss" in history else None
    append_master_row(master_csv,{
        "timestamp":datetime.datetime.now().isoformat(timespec="seconds"),
        "run_name":run,"data":a.data,"img_size":a.size,"batch":a.batch,
        "warmup":a.warmup,"epochs":a.epochs,"unfreeze":a.unfreeze,
        "base_lr":a.base_lr,"ft_lr":a.ft_lr,"binary":int(a.binary),
        "best_val_acc":best_val_acc,"best_val_loss":best_val_loss,
        "test_acc":test_acc,"test_macro_f1":test_f1
    })
    print(f"[master] Appended summary to: {master_csv}")

if __name__=="__main__":
    ap=argparse.ArgumentParser()
    ap.add_argument("--data", required=True)
    ap.add_argument("--epochs", type=int, default=20)
    ap.add_argument("--warmup", type=int, default=3)
    ap.add_argument("--size", type=int, default=224)
    ap.add_argument("--batch", type=int, default=32)
    ap.add_argument("--base_lr", type=float, default=1e-4)
    ap.add_argument("--ft_lr", type=float, default=1e-5)
    ap.add_argument("--unfreeze", type=int, default=10)
    ap.add_argument("--binary", type=int, default=0)
    ap.add_argument("--run_name", default="")
    ap.add_argument("--out_dir", default="runs")
    a=ap.parse_args(); main(a)


Writing /content/members/run_resnet50.py


In [1]:
# run 1
!python /content/members/run_resnet50.py \
  --data "/content/work/data" \
  --epochs 20 --warmup 3 --unfreeze 10 \
  --batch 32 --base_lr 1e-4 --ft_lr 1e-5 \
  --out_dir /content/runs --run_name e20_b32

# run 2
!python /content/members/run_resnet50.py \
  --data "/content/work/data" \
  --epochs 40 --warmup 3 --unfreeze 10 \
  --batch 16 --base_lr 1e-4 --ft_lr 1e-5 \
  --out_dir /content/runs --run_name e40_b16


2025-10-03 16:22:43.377190: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1759508563.603122   22699 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1759508563.663254   22699 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1759508564.111369   22699 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1759508564.111411   22699 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1759508564.111416   22699 computation_placer.cc:177] computation placer alr

In [None]:
# run 1 (baseline: 20 epochs, batch 32)
# !python /content/members/run_resnet50.py \
#   --data "/content/work/data" \
#   --epochs 20 --warmup 3 --unfreeze 10 \
#   --batch 32 --base_lr 1e-4 --ft_lr 1e-5 \
#   --out_dir /content/runs --run_name e20_b32

# # run 2 (longer training, smaller batch)
# !python /content/members/run_resnet50.py \
#   --data "/content/work/data" \
#   --epochs 40 --warmup 3 --unfreeze 10 \
#   --batch 16 --base_lr 1e-4 --ft_lr 1e-5 \
#   --out_dir /content/runs --run_name e40_b16

# run 3 (fewer epochs, larger batch — tests faster convergence)
!python /content/members/run_resnet50.py \
  --data "/content/work/data" \
  --epochs 15 --warmup 2 --unfreeze 10 \
  --batch 64 --base_lr 1e-4 --ft_lr 1e-5 \
  --out_dir /content/runs --run_name e15_b64

# run 4 (more fine-tuning, unfreeze 30 layers)
!python /content/members/run_resnet50.py \
  --data "/content/work/data" \
  --epochs 30 --warmup 3 --unfreeze 30 \
  --batch 32 --base_lr 5e-5 --ft_lr 1e-5 \
  --out_dir /content/runs --run_name e30_b32_unf30

# run 5 (higher learning rate for fine-tune)
!python /content/members/run_resnet50.py \
  --data "/content/work/data" \
  --epochs 25 --warmup 3 --unfreeze 10 \
  --batch 32 --base_lr 1e-4 --ft_lr 5e-5 \
  --out_dir /content/runs --run_name e25_b32_ftlr5e5

# run 6 (very small batch, stress test on gradients)
!python /content/members/run_resnet50.py \
  --data "/content/work/data" \
  --epochs 25 --warmup 3 --unfreeze 10 \
  --batch 8 --base_lr 1e-4 --ft_lr 1e-5 \
  --out_dir /content/runs --run_name e25_b8


In [2]:
#Verify per-run files
!ls -lah /content/runs/resnet50/e20_b32


total 125M
drwxr-xr-x 2 root root 4.0K Oct  3 16:36 .
drwxr-xr-x 5 root root 4.0K Oct  3 16:37 ..
-rw-r--r-- 1 root root 125M Oct  3 16:36 best.keras
-rw-r--r-- 1 root root   66 Oct  3 16:36 classes.json
-rw-r--r-- 1 root root  596 Oct  3 16:36 classification_report.txt
-rw-r--r-- 1 root root 1.8K Oct  3 16:36 history_epoch.csv
-rw-r--r-- 1 root root 2.3K Oct  3 16:36 history.json
-rw-r--r-- 1 root root  633 Oct  3 16:36 metrics.json


In [3]:
#Master CSV (all runs + scores)
!sed -n '1,10p' /content/runs/experiments_master.csv


timestamp,run_name,data,img_size,batch,warmup,epochs,unfreeze,base_lr,ft_lr,binary,best_val_acc,best_val_loss,test_acc,test_macro_f1
2025-10-03T15:52:25,colab_run,/content/work/data,224,32,2,10,10,0.0001,1e-05,0,0.7182817459106445,0.8808948993682861,0.7035928143712575,0.5599759908482217
2025-10-03T16:36:52,e20_b32,/content/work/data,224,32,3,20,10,0.0001,1e-05,0,0.7762237787246704,0.7001172304153442,0.779441117764471,0.6018622911554392
2025-10-03T16:55:07,e40_b16,/content/work/data,224,16,3,40,10,0.0001,1e-05,0,0.8101897835731506,0.8298619985580444,0.8033932135728543,0.6312824143804363


In [4]:
#Save results back to Drive (persistent)

# Save all runs
!mkdir -p "/content/drive/MyDrive/SKIN_CANCER_RESULTS"
!rsync -ah --info=progress2 "/content/runs/" "/content/drive/MyDrive/SKIN_CANCER_RESULTS/"

# Inspect what's saved
!find "/content/drive/MyDrive/SKIN_CANCER_RESULTS" -maxdepth 3 -type f | head -n 20

        392.75M 100%  114.16MB/s    0:00:03 (xfr#19, to-chk=0/24)
/content/drive/MyDrive/SKIN_CANCER_RESULTS/resnet50/colab_run/best.keras
/content/drive/MyDrive/SKIN_CANCER_RESULTS/resnet50/colab_run/classes.json
/content/drive/MyDrive/SKIN_CANCER_RESULTS/resnet50/colab_run/classification_report.txt
/content/drive/MyDrive/SKIN_CANCER_RESULTS/resnet50/colab_run/history.json
/content/drive/MyDrive/SKIN_CANCER_RESULTS/resnet50/colab_run/history_epoch.csv
/content/drive/MyDrive/SKIN_CANCER_RESULTS/resnet50/colab_run/metrics.json
/content/drive/MyDrive/SKIN_CANCER_RESULTS/resnet50/e20_b32/best.keras
/content/drive/MyDrive/SKIN_CANCER_RESULTS/resnet50/e20_b32/classes.json
/content/drive/MyDrive/SKIN_CANCER_RESULTS/resnet50/e20_b32/classification_report.txt
/content/drive/MyDrive/SKIN_CANCER_RESULTS/resnet50/e20_b32/history.json
/content/drive/MyDrive/SKIN_CANCER_RESULTS/resnet50/e20_b32/history_epoch.csv
/content/drive/MyDrive/SKIN_CANCER_RESULTS/resnet50/e20_b32/metrics.json
/content/drive