# 01 â€” Baseline Image Classification (fastai, ResNet-34)
Edit the config cell to point to your dataset, then run all cells.
Outputs (ROC, confusion matrix, metrics) will be saved in `../reports`.

In [None]:
# Install/Imports (Kaggle usually has these preinstalled)
from pathlib import Path
import json
import random
import numpy as np
import yaml

from fastai.vision.all import *

# Reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

## Config

In [None]:
# Load config
config_path = Path('../configs/config.yaml').resolve()
with open(config_path) as f:
    CFG = yaml.safe_load(f)

CFG

## Data: ImageDataLoaders.from_folder

In [None]:
set_seed(CFG.get('random_seed', 42))

dataset_root = Path(CFG['dataset_root'])  # update in config.yaml
assert dataset_root.exists(), f"Dataset path not found: {dataset_root}. Update configs/config.yaml"

# Build dataloaders: expects subfolders per class
dls = ImageDataLoaders.from_folder(
    dataset_root,
    valid_pct=CFG['valid_pct'],
    seed=CFG.get('random_seed', 42),
    item_tfms=Resize(460),
    batch_tfms=[
        *aug_transforms(size=CFG['image_size'], min_scale=0.75),
        Normalize.from_stats(*imagenet_stats)
    ],
    bs=CFG['bs']
)

dls.show_batch(max_n=9)

## Model: ResNet-34 + fine_tune

In [None]:
arch = getattr(models, CFG['model_name'])
learn = vision_learner(dls, arch, metrics=accuracy)
learn.fine_tune(CFG['epochs'])

## Evaluation: Confusion Matrix & ROC

In [None]:
reports_dir = (Path('../reports')).resolve()
reports_dir.mkdir(parents=True, exist_ok=True)

# Confusion matrix
interp = ClassificationInterpretation.from_learner(learn)
cm = interp.confusion_matrix()
print(cm)
interp.plot_confusion_matrix(figsize=(6,6))
save_confusion = reports_dir/'cls_confusion_matrix.png'
plt.savefig(save_confusion, bbox_inches='tight')
print(f"Saved: {save_confusion}")

# ROC (one-vs-rest if >2 classes)
probs, targs = learn.get_preds()
try:
    from sklearn.metrics import roc_auc_score, RocCurveDisplay
    y_true = targs.cpu().numpy()
    y_score = probs.cpu().numpy()
    if y_score.shape[1] == 2:
        auc = roc_auc_score(y_true, y_score[:,1])
        RocCurveDisplay.from_predictions(y_true, y_score[:,1])
    else:
        # macro AUC for multi-class
        auc = roc_auc_score(y_true, y_score, multi_class='ovr', average='macro')
        # Plot ROC for each class
        for c in range(y_score.shape[1]):
            RocCurveDisplay.from_predictions((y_true==c).astype(int), y_score[:,c])
    save_roc = reports_dir/'cls_roc.png'
    plt.savefig(save_roc, bbox_inches='tight')
    print(f"Saved: {save_roc}")
except Exception as e:
    print("ROC computation skipped:", e)

# Save metrics
metrics = {
    'accuracy': float(learn.validate()[1]),
    'auc': float(auc) if 'auc' in locals() else None,
    'classes': dls.vocab if hasattr(dls, 'vocab') else None
}
with open(reports_dir/'cls_metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)
print("Saved:", reports_dir/'cls_metrics.json')

## Export model

In [None]:
learn.export(reports_dir/'cls_export.pkl')
print("Saved:", reports_dir/'cls_export.pkl')