<a href="https://colab.research.google.com/github/sandeeepmedepalli/ml-colony-classification/blob/main/Kfold_enhanced_code_of_ML_research.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os, random, math
import numpy as np
import pandas as pd

from google.colab import drive
drive.mount('/content/drive')

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from PIL import Image
import torchvision.transforms as T
import torchvision.models as models
from sklearn.model_selection import train_test_split


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# ====== PATHS (separate pretrain vs ground truth) ======
BASE_DIR = "/content/drive/MyDrive/22022540"

# Public dataset (10-class pretrain)
CSV_PATH   = os.path.join(BASE_DIR, "annot_tab.csv")
IMAGES_DIR_PUBLIC = BASE_DIR  # images are directly here


# Ground-truth from CVAT (zip) — keep separate
GT_ZIP_PATH = os.path.join(BASE_DIR, "ground_truth","ground_truth_one","ground_truth_dataset.zip")
# Ground-truth images (the exact images used in CVAT)
IMAGES_DIR_GT = "/content/drive/MyDrive/22022540/ground_truth/ground_truth_one"

# Output workspaces (separate)
WORKDIR_PRETRAIN = "/content/colony_stage2_pretrain10"
WORKDIR_GT       = "/content/colony_stage2_groundtruth"
os.makedirs(WORKDIR_PRETRAIN, exist_ok=True)
os.makedirs(WORKDIR_GT, exist_ok=True)

# Patch settings
PATCH_SIZE = 100
PAD_TO_SQUARE = True

# How much extra context around the colony bbox
BOX_EXPAND = 0.20


# Reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)
print("CSV_PATH:  ", CSV_PATH)
print("GT_ZIP_PATH:", GT_ZIP_PATH)


Device: cuda
CSV_PATH:   /content/drive/MyDrive/22022540/annot_tab.csv
GT_ZIP_PATH: /content/drive/MyDrive/22022540/ground_truth/ground_truth_one/ground_truth_dataset.zip


In [3]:
#Copy images from Drive → local Colab
import shutil, os

LOCAL_IMG_DIR = "/content/colony_images"
os.makedirs(LOCAL_IMG_DIR, exist_ok=True)

for f in os.listdir(IMAGES_DIR_PUBLIC):
    src = os.path.join(IMAGES_DIR_PUBLIC, f)
    dst = os.path.join(LOCAL_IMG_DIR, f)
    if os.path.isfile(src) and not os.path.exists(dst):
        shutil.copy2(src, dst)

IMAGES_DIR_PUBLIC = LOCAL_IMG_DIR
print("Now using local images:", IMAGES_DIR_PUBLIC)

Now using local images: /content/colony_images


In [4]:
required_cols = [
    "label_name","bbox_x","bbox_y","bbox_width","bbox_height",
    "image_name","image_width","image_height"
]

df = pd.read_csv(CSV_PATH, encoding="utf-8-sig")

missing = [c for c in required_cols if c not in df.columns]
if missing:
    raise ValueError(f"CSV missing columns: {missing}\nFound: {list(df.columns)}")

# Make numeric columns numeric
for c in ["bbox_x","bbox_y","bbox_width","bbox_height","image_width","image_height"]:
    df[c] = pd.to_numeric(df[c], errors="coerce")

# Drop bad rows
df = df.dropna(subset=required_cols).copy()
df = df[(df["bbox_width"] > 0) & (df["bbox_height"] > 0)].copy()

print("Rows (boxes):", len(df))
print("Unique classes:", df["label_name"].nunique())
print(df["label_name"].value_counts().head(24))


Rows (boxes): 56862
Unique classes: 24
label_name
sp21    11160
sp23     7067
sp22     6814
sp06     5513
sp10     4364
sp05     4102
sp19     2782
sp13     1799
sp09     1775
sp02     1530
sp18     1383
sp16     1348
sp14     1102
sp07     1087
sp15      866
sp20      853
sp24      787
sp11      481
sp12      461
sp01      397
sp08      368
sp04      304
sp03      295
sp17      224
Name: count, dtype: int64


In [5]:
# 10-class list
CLASSES_10 = ["sp02","sp05","sp06","sp07","sp10","sp14","sp16","sp19","sp21","sp23"]

# Count check
counts = df["label_name"].value_counts()
def show_counts(class_list, name):
    print(f"\n{name} counts:")
    for c in class_list:
        print(f"  {c}: {int(counts.get(c, 0))}")

show_counts(CLASSES_10, "10-class")

# Safety checks for classes count to be greater than 1000 , as we are working on thousand
for c in CLASSES_10:
    if counts.get(c, 0) < 1000:
        raise ValueError(f"{c} has < 1000 boxes. Pick another class.")


10-class counts:
  sp02: 1530
  sp05: 4102
  sp06: 5513
  sp07: 1087
  sp10: 4364
  sp14: 1102
  sp16: 1348
  sp19: 2782
  sp21: 11160
  sp23: 7067


In [6]:
def sample_rows_for_class(df, cls, n, seed, exclude_index_set=None):
    """Sample n rows for one class. Optionally exclude some row indices."""
    sub = df[df["label_name"] == cls]
    if exclude_index_set is not None:
        sub = sub[~sub.index.isin(exclude_index_set)]
    if len(sub) < n:
        raise ValueError(f"Not enough rows for {cls}: need {n}, have {len(sub)} after exclusions.")
    return sub.sample(n=n, random_state=seed)

# 10-class pretrain sampling: 1000 per class
pretrain_parts = []
used_idx = set()

for i, cls in enumerate(CLASSES_10):
    samp = sample_rows_for_class(df, cls, n=1000, seed=SEED+i)
    pretrain_parts.append(samp)
    used_idx.update(samp.index.tolist())

df_pretrain = pd.concat(pretrain_parts).reset_index(drop=True)
print("Pretrain rows:", len(df_pretrain), " expected:", 10*1000)

Pretrain rows: 10000  expected: 10000


In [7]:
# Cropping function where we convert bbox to patch (with optional context expansion)
def crop_patch(
    img: Image.Image,
    x, y, w, h,
    pad_to_square=True,
    expand_factor=0.20,    # e.g., 0.20 means 20% of bbox size added on each side
    expand_px=0           # optional fixed extra pixels on each side
):
    # Expand bbox to include context
    dx = expand_px + expand_factor * float(w)
    dy = expand_px + expand_factor * float(h)

    x1 = int(round(x - dx))
    y1 = int(round(y - dy))
    x2 = int(round(x + w + dx))
    y2 = int(round(y + h + dy))

    # Clip to image boundaries
    x1 = max(0, x1); y1 = max(0, y1)
    x2 = min(img.width, x2); y2 = min(img.height, y2)

    # Safety: avoid empty crops
    if x2 <= x1: x2 = min(img.width, x1 + 1)
    if y2 <= y1: y2 = min(img.height, y1 + 1)

    patch = img.crop((x1, y1, x2, y2))

    if not pad_to_square:
        return patch

    # Pad to square (keeps aspect ratio before resize)
    side = max(patch.width, patch.height)
    new_img = Image.new("L", (side, side), color=0)  # grayscale canvas
    px = (side - patch.width) // 2
    py = (side - patch.height) // 2
    patch = patch.convert("L")
    new_img.paste(patch, (px, py))
    return new_img


In [8]:
class ColonyPatchDatasetCached(Dataset):
    def __init__(self, df_rows, images_dir, class_to_idx, transform=None):
        self.df = df_rows.reset_index(drop=True)
        self.images_dir = images_dir
        self.class_to_idx = class_to_idx
        self.transform = transform
        self.cache = {}  # idx to PIL RGB patch

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if idx not in self.cache:
            r = self.df.iloc[idx]
            img_path = os.path.join(self.images_dir, os.path.basename(r["image_name"]))
            img = Image.open(img_path).convert("L")  # image is converted to gray scale

            patch = crop_patch(   # crop patch and resize it to 100*100
                img, r["bbox_x"], r["bbox_y"], r["bbox_width"], r["bbox_height"],
                pad_to_square=PAD_TO_SQUARE,
                expand_factor=BOX_EXPAND
            ).resize((PATCH_SIZE, PATCH_SIZE), resample=Image.BILINEAR)

            patch_rgb = Image.merge("RGB", (patch, patch, patch))
            self.cache[idx] = patch_rgb

        patch_rgb = self.cache[idx]
        y = self.class_to_idx[str(self.df.iloc[idx]["label_name"])]

        if self.transform:
            patch_rgb = self.transform(patch_rgb)

        return patch_rgb, y


In [9]:
# ImageNet normalization stats (because we use an ImageNet-pretrained backbone)
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD  = (0.229, 0.224, 0.225)

train_tf = T.Compose([
    T.RandomHorizontalFlip(),
    T.RandomVerticalFlip(),
    T.RandomRotation(degrees=10),
    T.ToTensor(),
    T.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])

test_tf = T.Compose([
    T.ToTensor(),
    T.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])


In [10]:
def accuracy(pred_logits, y):
    preds = pred_logits.argmax(dim=1)
    return (preds == y).float().mean().item()

def train_one_epoch(model, loader, optim, criterion):
    model.train()
    total_loss, total_acc, n = 0.0, 0.0, 0

    for x, y in loader:
        x, y = x.to(DEVICE), y.to(DEVICE)

        optim.zero_grad()
        logits = model(x)
        loss = criterion(logits, y)
        loss.backward()
        optim.step()

        bs = x.size(0)
        total_loss += loss.item() * bs
        total_acc  += accuracy(logits, y) * bs
        n += bs

    return total_loss / n, total_acc / n

@torch.no_grad()  # helps to stop gradient tracking during evaluation
def eval_one_epoch(model, loader, criterion):
    model.eval()
    total_loss, total_acc, n = 0.0, 0.0, 0

    for x, y in loader:
        x, y = x.to(DEVICE), y.to(DEVICE)
        logits = model(x)
        loss = criterion(logits, y)

        bs = x.size(0)
        total_loss += loss.item() * bs
        total_acc  += accuracy(logits, y) * bs
        n += bs

    return total_loss / n, total_acc / n


In [11]:
class_to_idx_10 = {c:i for i,c in enumerate(CLASSES_10)}

train_df_10, test_df_10 = train_test_split(
    df_pretrain,
    test_size=0.2,
    random_state=SEED,
    stratify=df_pretrain["label_name"]
)

ds_train_10 = ColonyPatchDatasetCached(train_df_10, IMAGES_DIR_PUBLIC, class_to_idx_10, transform=train_tf)
ds_test_10  = ColonyPatchDatasetCached(test_df_10,  IMAGES_DIR_PUBLIC, class_to_idx_10, transform=test_tf)

train_loader_10 = DataLoader(ds_train_10, batch_size=128, shuffle=True, num_workers=0, pin_memory=True)
test_loader_10  = DataLoader(ds_test_10,  batch_size=128, shuffle=False, num_workers=0, pin_memory=True)


print("10-class train:", len(ds_train_10), "test:", len(ds_test_10))


10-class train: 8000 test: 2000


In [12]:
#we are going to use the pretrained back bone for imagenet weights
#ResNet-18 architecture suitable for efficient transfer learning.
#Load the official pretrained ResNet18 weights trained on ImageNet-1K(1000 classes), version 1
weights = models.ResNet18_Weights.IMAGENET1K_V1
model_10 = models.resnet18(weights=weights)

# change the resnet classifier head from 1000 to 10 classes
in_features = model_10.fc.in_features
model_10.fc = nn.Linear(in_features, len(CLASSES_10))

model_10 = model_10.to(DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_10.parameters(), lr=1e-4)


In [13]:
EPOCHS_10 = 10  # start small; you can increase later

best_acc = 0.0
for epoch in range(1, EPOCHS_10+1):
    tr_loss, tr_acc = train_one_epoch(model_10, train_loader_10, optimizer, criterion)
    te_loss, te_acc = eval_one_epoch(model_10, test_loader_10, criterion)

    print(f"[10-class] Epoch {epoch:02d} | train acc {tr_acc:.3f} | test acc {te_acc:.3f}")

    #save the model with best accuracy
    if te_acc > best_acc:
        best_acc = te_acc
        torch.save(model_10.state_dict(), os.path.join(WORKDIR_PRETRAIN, "resnet18_pretrained_10class.pt"))

print("Best 10-class test acc:", best_acc)


[10-class] Epoch 01 | train acc 0.664 | test acc 0.811
[10-class] Epoch 02 | train acc 0.868 | test acc 0.879
[10-class] Epoch 03 | train acc 0.899 | test acc 0.874
[10-class] Epoch 04 | train acc 0.914 | test acc 0.907
[10-class] Epoch 05 | train acc 0.925 | test acc 0.905
[10-class] Epoch 06 | train acc 0.935 | test acc 0.914
[10-class] Epoch 07 | train acc 0.944 | test acc 0.915
[10-class] Epoch 08 | train acc 0.943 | test acc 0.920
[10-class] Epoch 09 | train acc 0.955 | test acc 0.915
[10-class] Epoch 10 | train acc 0.955 | test acc 0.887
Best 10-class test acc: 0.9200000004768372


In [14]:
import os
import torch
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
import torchvision.models as models
import torch.nn as nn

@torch.no_grad()
def evaluate_model(model, loader, class_names, device, title=""):
    model.eval()
    y_true, y_pred = [], []

    for x, y in loader:
        x = x.to(device)
        y = y.to(device)
        logits = model(x)
        preds = torch.argmax(logits, dim=1)

        y_true.append(y.cpu().numpy())
        y_pred.append(preds.cpu().numpy())

    y_true = np.concatenate(y_true)
    y_pred = np.concatenate(y_pred)

    print("\n" + "="*80)
    print(title)
    print("="*80)

    cm = confusion_matrix(y_true, y_pred)
    print("Confusion Matrix:")
    print(cm)
    print()

    print("Classification Report:")
    print(classification_report(y_true, y_pred, target_names=class_names, digits=3))
    return cm


In [15]:
# ===== Evaluate BEST 10-class PUBLIC model =====

PATH_10 = os.path.join(
    WORKDIR_PRETRAIN,   # <-- use the REAL variable from your notebook
    "resnet18_pretrained_10class.pt"
)

model_10_eval = models.resnet18(weights=None)
model_10_eval.fc = nn.Linear(model_10_eval.fc.in_features, len(CLASSES_10))
model_10_eval.load_state_dict(torch.load(PATH_10, map_location=DEVICE))
model_10_eval = model_10_eval.to(DEVICE)

cm_10 = evaluate_model(
    model_10_eval,
    test_loader_10,
    CLASSES_10,
    DEVICE,
    title="Evaluation: Best 10-Class PUBLIC ResNet18"
)



Evaluation: Best 10-Class PUBLIC ResNet18
Confusion Matrix:
[[191   3   1   0   0   0   0   0   3   2]
 [  1 195   0   1   0   1   0   0   1   1]
 [  0   0 180   1   2   9   0   2   2   4]
 [  0   0   4 190   2   0   0   1   0   3]
 [  0   0   9   2 177   3   0   2   0   7]
 [  0   0   5   0   0 185   0   7   3   0]
 [  1   0   3   0   1   3 178   8   4   2]
 [  2   0   1   0   0   1   0 195   1   0]
 [  1   0   1   3   0   1   0   1 192   1]
 [  3   3  20   4   7   2   4   0   0 157]]

Classification Report:
              precision    recall  f1-score   support

        sp02      0.960     0.955     0.957       200
        sp05      0.970     0.975     0.973       200
        sp06      0.804     0.900     0.849       200
        sp07      0.945     0.950     0.948       200
        sp10      0.937     0.885     0.910       200
        sp14      0.902     0.925     0.914       200
        sp16      0.978     0.890     0.932       200
        sp19      0.903     0.975     0.938       2

In [16]:
import os, zipfile, yaml
import pandas as pd
from PIL import Image

# Extract CVAT export zip into runtime
GT_DIR = "/content/cvat_export_gt"
os.makedirs(GT_DIR, exist_ok=True)

with zipfile.ZipFile(GT_ZIP_PATH, "r") as z:
    z.extractall(GT_DIR)

# Read classes from YAML
yaml_path = os.path.join(GT_DIR, "data.yaml")
with open(yaml_path, "r") as f:
    y = yaml.safe_load(f)

names = y["names"]
if isinstance(names, dict):
    id_to_class = {int(k): v for k, v in names.items()}
else:
    id_to_class = {i: v for i, v in enumerate(names)}

CLASSES_FT = [id_to_class[i] for i in sorted(id_to_class.keys())]
print("GT classes from yaml:", CLASSES_FT)

# Find labels folder
LABELS_DIR = os.path.join(GT_DIR, "labels", "train")
if not os.path.isdir(LABELS_DIR):
    LABELS_DIR = os.path.join(GT_DIR, "labels")
if not os.path.isdir(LABELS_DIR):
    raise FileNotFoundError("Could not find labels folder inside CVAT export zip.")

def find_image_path(images_dir, stem):
    for ext in [".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff", ".webp"]:
        p = os.path.join(images_dir, stem + ext)
        if os.path.exists(p):
            return p
    return None

rows = []
missing = []

for txt_name in os.listdir(LABELS_DIR):
    if not txt_name.endswith(".txt"):
        continue

    stem = os.path.splitext(txt_name)[0]  # sp01_img01
    img_path = find_image_path(IMAGES_DIR_GT, stem)
    if img_path is None:
        missing.append(stem)
        continue

    img = Image.open(img_path)
    W, H = img.size

    txt_path = os.path.join(LABELS_DIR, txt_name)
    with open(txt_path, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 5:
                continue
            cls_id = int(float(parts[0]))
            cx, cy, w, h = map(float, parts[1:])

            bw = w * W
            bh = h * H
            x1 = (cx * W) - bw / 2
            y1 = (cy * H) - bh / 2

            rows.append({
                "label_name": id_to_class[cls_id],
                "bbox_x": x1,
                "bbox_y": y1,
                "bbox_width": bw,
                "bbox_height": bh,
                "image_name": os.path.basename(img_path),
                "image_width": W,
                "image_height": H,
            })

df_gt = pd.DataFrame(rows)
print("GT boxes:", len(df_gt))
print(df_gt["label_name"].value_counts())

if missing:
    print("WARNING: Missing images for label files (examples):", missing[:10])


GT classes from yaml: ['sp01', 'sp02', 'sp03', 'sp04']
GT boxes: 254
label_name
sp02    82
sp04    64
sp01    55
sp03    53
Name: count, dtype: int64


In [17]:
# ======================================================
# 10-Fold CV on IN-HOUSE data
# ======================================================

import os
import numpy as np
import pandas as pd

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import DataLoader

In [18]:
# ---------- 0) Public checkpoint ----------
PUBLIC_CKPT_PATH = os.path.join(WORKDIR_PRETRAIN, "resnet18_pretrained_10class.pt")  # adjust if needed
assert os.path.exists(PUBLIC_CKPT_PATH), f"Missing public checkpoint: {PUBLIC_CKPT_PATH}"

# ---------- 1) In-house classes ----------
CLASSES_FT = sorted(df_gt["label_name"].astype(str).unique().tolist())
class_to_idx_ft = {c: i for i, c in enumerate(CLASSES_FT)}

print("In-house classes:", CLASSES_FT)
print("Counts:\n", df_gt["label_name"].value_counts())

In-house classes: ['sp01', 'sp02', 'sp03', 'sp04']
Counts:
 label_name
sp02    82
sp04    64
sp01    55
sp03    53
Name: count, dtype: int64


In [19]:
counts = df_gt["label_name"].value_counts()
min_count = int(counts.min())

# IMPORTANT: StratifiedKFold requires each class count >= n_splits
n_splits_requested = 10
n_splits = min(n_splits_requested, min_count)

if n_splits < 2:
    raise ValueError(
        f"Not enough samples per class for CV. min_count={min_count}. "
        f"Need at least 2 per class."
    )

print(f"\n✅ Using StratifiedKFold with n_splits={n_splits} (requested {n_splits_requested})")


✅ Using StratifiedKFold with n_splits=10 (requested 10)


In [20]:
# ---------- 2) Loader builder ----------
def make_loaders(train_df, test_df, batch_train=64, batch_test=64, num_workers=2):
    ds_train = ColonyPatchDatasetCached(train_df, IMAGES_DIR_GT, class_to_idx_ft, transform=train_tf)
    ds_test  = ColonyPatchDatasetCached(test_df,  IMAGES_DIR_GT, class_to_idx_ft, transform=test_tf)

    train_loader = DataLoader(ds_train, batch_size=batch_train, shuffle=True,
                              num_workers=num_workers, pin_memory=True)
    test_loader  = DataLoader(ds_test, batch_size=batch_test, shuffle=False,
                              num_workers=num_workers, pin_memory=True)
    return train_loader, test_loader

In [21]:
# ---------- 3) Build model from public ckpt + swap head ----------
def build_model_from_public_ckpt(num_inhouse_classes: int):
    m = models.resnet18(weights=None)
    in_features = m.fc.in_features

    # temp 10-class head to load the public checkpoint
    m.fc = nn.Linear(in_features, 10)
    state = torch.load(PUBLIC_CKPT_PATH, map_location="cpu")
    m.load_state_dict(state)

    # replace head for in-house classes
    m.fc = nn.Linear(in_features, num_inhouse_classes)
    return m

In [22]:
# ---------- 4) Two-phase fine-tune ----------
def run_two_phase_finetune(model, train_loader, test_loader,
                           epochs_head=10, epochs_full=10,
                           lr_head=1e-3, lr_full=1e-4,
                           wd=1e-4):

    model = model.to(DEVICE)
    criterion = nn.CrossEntropyLoss()

    best_acc = -1.0
    best_state = None

    # ---- Phase 1: head-only ----
    for p in model.parameters():
        p.requires_grad = False
    for p in model.fc.parameters():
        p.requires_grad = True

    opt1 = torch.optim.Adam(model.fc.parameters(), lr=lr_head, weight_decay=wd)

    for _ in range(epochs_head):
        train_one_epoch(model, train_loader, opt1, criterion)
        _, te_acc = eval_one_epoch(model, test_loader, criterion)

        if te_acc > best_acc:
            best_acc = te_acc
            best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}

    if best_state is not None:
        model.load_state_dict(best_state)

    # ---- Phase 2: full fine-tune ----
    for p in model.parameters():
        p.requires_grad = True

    opt2 = torch.optim.Adam(model.parameters(), lr=lr_full, weight_decay=wd)

    for _ in range(epochs_full):
        train_one_epoch(model, train_loader, opt2, criterion)
        _, te_acc = eval_one_epoch(model, test_loader, criterion)

        if te_acc > best_acc:
            best_acc = te_acc
            best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}

    if best_state is not None:
        model.load_state_dict(best_state)

    return model

In [23]:
# ---------- 5) Predict helper ----------
@torch.no_grad()
def predict_all(model, loader):
    model.eval()
    ys, ps = [], []
    for x, y in loader:
        x = x.to(DEVICE)
        logits = model(x)
        pred = logits.argmax(dim=1).cpu().numpy()
        ys.append(y.numpy())
        ps.append(pred)
    return np.concatenate(ys), np.concatenate(ps)

In [24]:
# ---------- 6) Run CV ----------
X = df_gt.reset_index(drop=True)
y = X["label_name"].astype(str).values

skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)

fold_rows = []
for fold, (tr_idx, te_idx) in enumerate(skf.split(X, y), start=1):
    train_df = X.iloc[tr_idx].copy().reset_index(drop=True)
    test_df  = X.iloc[te_idx].copy().reset_index(drop=True)

    # sanity: no empty splits
    if len(train_df) == 0 or len(test_df) == 0:
        print(f"⚠️ Fold {fold:02d} skipped due to empty split.")
        continue

    train_loader, test_loader = make_loaders(train_df, test_df, batch_train=64, batch_test=64, num_workers=2)

    model = build_model_from_public_ckpt(num_inhouse_classes=len(CLASSES_FT))
    model = run_two_phase_finetune(model, train_loader, test_loader,
                                   epochs_head=10, epochs_full=10,
                                   lr_head=1e-3, lr_full=1e-4)

    y_true, y_pred = predict_all(model, test_loader)

    acc = accuracy_score(y_true, y_pred)
    prec, rec, f1, _ = precision_recall_fscore_support(
        y_true, y_pred, average="macro", zero_division=0
    )

    fold_rows.append({
        "fold": fold,
        "acc": acc,
        "precision_macro": prec,
        "recall_macro": rec,
        "f1_macro": f1,
        "n_train": len(train_df),
        "n_test": len(test_df),
    })

    print(f"[Fold {fold:02d}] acc={acc:.3f} prec={prec:.3f} rec={rec:.3f} f1={f1:.3f} | train={len(train_df)} test={len(test_df)}")

results = pd.DataFrame(fold_rows)

[Fold 01] acc=1.000 prec=1.000 rec=1.000 f1=1.000 | train=228 test=26
[Fold 02] acc=1.000 prec=1.000 rec=1.000 f1=1.000 | train=228 test=26
[Fold 03] acc=1.000 prec=1.000 rec=1.000 f1=1.000 | train=228 test=26
[Fold 04] acc=0.962 prec=0.964 rec=0.958 f1=0.958 | train=228 test=26
[Fold 05] acc=0.960 prec=0.964 rec=0.969 f1=0.964 | train=229 test=25
[Fold 06] acc=1.000 prec=1.000 rec=1.000 f1=1.000 | train=229 test=25
[Fold 07] acc=0.920 prec=0.939 rec=0.900 f1=0.905 | train=229 test=25
[Fold 08] acc=0.960 prec=0.969 rec=0.950 f1=0.956 | train=229 test=25
[Fold 09] acc=1.000 prec=1.000 rec=1.000 f1=1.000 | train=229 test=25
[Fold 10] acc=1.000 prec=1.000 rec=1.000 f1=1.000 | train=229 test=25


In [25]:
print("\n====================")
print("K-Fold CV Summary")
print("====================")
for col in ["acc", "precision_macro", "recall_macro", "f1_macro"]:
    mean = results[col].mean()
    std  = results[col].std(ddof=1)
    print(f"{col:16s}: mean={mean:.4f}  std={std:.4f}")

display(results)


K-Fold CV Summary
acc             : mean=0.9802  std=0.0282
precision_macro : mean=0.9837  std=0.0225
recall_macro    : mean=0.9777  std=0.0337
f1_macro        : mean=0.9783  std=0.0322


Unnamed: 0,fold,acc,precision_macro,recall_macro,f1_macro,n_train,n_test
0,1,1.0,1.0,1.0,1.0,228,26
1,2,1.0,1.0,1.0,1.0,228,26
2,3,1.0,1.0,1.0,1.0,228,26
3,4,0.961538,0.964286,0.958333,0.958042,228,26
4,5,0.96,0.964286,0.96875,0.964103,229,25
5,6,1.0,1.0,1.0,1.0,229,25
6,7,0.92,0.939286,0.9,0.905111,229,25
7,8,0.96,0.96875,0.95,0.955556,229,25
8,9,1.0,1.0,1.0,1.0,229,25
9,10,1.0,1.0,1.0,1.0,229,25
