# Image Classification (Transfer Learning)

This notebook mirrors the Streamlit page implementation. It loads images from `../data/` (subfolders per class), builds a pretrained CNN (ResNet18/EfficientNet-B0), fine-tunes with optional backbone freezing, and reports accuracy, confusion matrix and per-class F1.

Run in project root:

```bash
.venv\Scripts\activate
jupyter lab deep_learning/image_classification/notebooks/image_classification.ipynb
```



In [None]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
import torchvision
from torchvision import datasets, transforms
from torchvision.models import resnet18, efficientnet_b0
from torchvision.models import ResNet18_Weights, EfficientNet_B0_Weights

from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

DATA_ROOT = Path(__file__).resolve().parents[1] / 'data'
DATA_ROOT.mkdir(parents=True, exist_ok=True)
IMG_SIZE = 224
BATCH_SIZE = 16
EPOCHS = 2
LR = 1e-3
FREEZE_BACKBONE = True
MODEL_NAME = 'ResNet18'  # or 'EfficientNet-B0'

# Transforms
train_tf = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
val_tf = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Dataset
full_ds = datasets.ImageFolder(str(DATA_ROOT), transform=train_tf)
classes = full_ds.classes
n_total = len(full_ds)
val_split = 0.2
n_val = max(1, int(n_total * val_split))
train_ds, val_ds = random_split(full_ds, [n_total - n_val, n_val])
val_ds.dataset.transform = val_tf

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False)

# Model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if MODEL_NAME == 'ResNet18':
    weights = ResNet18_Weights.DEFAULT
    model = resnet18(weights=weights)
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, len(classes))
else:
    weights = EfficientNet_B0_Weights.DEFAULT
    model = efficientnet_b0(weights=weights)
    in_features = model.classifier[1].in_features
    model.classifier[1] = nn.Linear(in_features, len(classes))

if FREEZE_BACKBONE:
    for name, p in model.named_parameters():
        if (MODEL_NAME == 'ResNet18' and not name.startswith('fc')) or (MODEL_NAME != 'ResNet18' and not name.startswith('classifier.1')):
            p.requires_grad = False

model = model.to(device)
crit = nn.CrossEntropyLoss()
opt = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=LR)

# Train
for epoch in range(EPOCHS):
    model.train()
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        opt.zero_grad()
        out = model(xb)
        loss = crit(out, yb)
        loss.backward()
        opt.step()

# Validate
model.eval()
true, pred = [], []
with torch.no_grad():
    for xb, yb in val_loader:
        xb, yb = xb.to(device), yb.to(device)
        logits = model(xb)
        pr = torch.argmax(logits, dim=1)
        true.extend(yb.cpu().numpy())
        pred.extend(pr.cpu().numpy())

acc = accuracy_score(true, pred)
prec, rec, f1, _ = precision_recall_fscore_support(true, pred, average=None, labels=list(range(len(classes))), zero_division=0)
cm = confusion_matrix(true, pred, labels=list(range(len(classes))))

print('Accuracy:', acc)
print('Per-class F1:', f1)

plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=classes, yticklabels=classes)
plt.xlabel('Predicted'); plt.ylabel('Actual'); plt.title('Confusion Matrix')
plt.show()

