In [None]:
# imports
import os
import random
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from torchvision.models import resnet18, ResNet18_Weights


In [None]:
# -----------------
# 1) config
# -----------------
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:", device)

# dataset path - change if needed
data_dir = r"C:/Users/soham/Desktop/food_class_mini_proj/food-11"
train_folder = os.path.join(data_dir, "training")
test_folder = os.path.join(data_dir, "evaluation")

batch_size = 16
num_workers = 0
img_size = 224
EPOCHS_HEAD = 6
EPOCHS_FINETUNE = 6
save_path = "./food_resnet18.pth"
best_save_path = "./food_resnet18_best.pth"
FINETUNE = True
UNFREEZE_LAST_N_LAYERS = 2


In [None]:
# -----------------
# 2) transforms + dataloaders
# -----------------
train_transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(0.1,0.1,0.1,0.05),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])
val_transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])
trainset = torchvision.datasets.ImageFolder(root=train_folder, transform=train_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

testset = torchvision.datasets.ImageFolder(root=test_folder, transform=val_transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

classes = trainset.classes
print("detected classes (order):", classes)
print("train samples:", len(trainset), "test samples:", len(testset))


In [None]:
# -----------------
# 3) visualize a few samples
# -----------------
dataiter = iter(trainloader)
images, labels = next(dataiter)
plt.figure(figsize=(12,4))
for i in range(min(8, len(images))):
    plt.subplot(2,4,i+1)
    img = images[i].cpu()
    # unnormalize for display (imagenet mean/std)
    img = img * torch.tensor([0.229,0.224,0.225]).view(3,1,1) + torch.tensor([0.485,0.456,0.406]).view(3,1,1)
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1,2,0)))
    plt.title(classes[labels[i]])
    plt.axis('off')
plt.suptitle('sample training images', size=16)
plt.show()


In [None]:
# -----------------
# 4) build resnet18 (transfer learning)
# -----------------
weights = ResNet18_Weights.DEFAULT
model = resnet18(weights=weights)
# freeze backbone
for p in model.parameters():
    p.requires_grad = False
# replace final layer
model.fc = nn.Linear(model.fc.in_features, len(classes))
model = model.to(device)
print(model.fc)


In [None]:
# -----------------
# helpers: train / eval
# -----------------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=1e-3)

def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    return running_loss/total, correct/total

def eval_model(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
            all_preds.append(preds.cpu().numpy())
            all_labels.append(labels.cpu().numpy())
    if total==0:
        return 0,0, np.array([]), np.array([])
    return running_loss/total, correct/total, np.concatenate(all_preds), np.concatenate(all_labels)


In [None]:
# -----------------
# 5) train head (fc) only
# -----------------
best_acc = 0.0
history = {"train_loss":[], "train_acc":[], "val_loss":[], "val_acc":[]}
for epoch in range(EPOCHS_HEAD):
    train_loss, train_acc = train_one_epoch(model, trainloader, criterion, optimizer, device)
    val_loss, val_acc, _, _ = eval_model(model, testloader, criterion, device)
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    print(f"head epoch {epoch+1}/{EPOCHS_HEAD} - train_acc={train_acc:.3f} val_acc={val_acc:.3f} train_loss={train_loss:.4f} val_loss={val_loss:.4f}")
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), best_save_path)
        print("saved best head model ->", best_save_path)
torch.save(model.state_dict(), save_path)
print("saved head model to", save_path)


In [None]:
# -----------------
# 6) optional fine-tune last layers + fc
# -----------------
if FINETUNE:
    if os.path.exists(best_save_path):
        model.load_state_dict(torch.load(best_save_path, map_location=device))
        print("loaded best head model for finetuning")
    # unfreeze last n resnet blocks
    layers = [model.layer1, model.layer2, model.layer3, model.layer4]
    n = max(1, min(UNFREEZE_LAST_N_LAYERS, len(layers)))
    for p in model.parameters():
        p.requires_grad = False
    for layer in layers[-n:]:
        for p in layer.parameters():
            p.requires_grad = True
    for p in model.fc.parameters():
        p.requires_grad = True
    ft_params = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.Adam(ft_params, lr=1e-4)
    print(f"finetuning last {n} resnet blocks + fc, trainable params: {sum(p.numel() for p in ft_params)}")
    best_acc_ft = best_acc
    for epoch in range(EPOCHS_FINETUNE):
        train_loss, train_acc = train_one_epoch(model, trainloader, criterion, optimizer, device)
        val_loss, val_acc, _, _ = eval_model(model, testloader, criterion, device)
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        print(f"finetune epoch {epoch+1}/{EPOCHS_FINETUNE} - train_acc={train_acc:.3f} val_acc={val_acc:.3f}")
        if val_acc > best_acc_ft:
            best_acc_ft = val_acc
            torch.save(model.state_dict(), best_save_path)
            print("saved best finetuned model ->", best_save_path)
    torch.save(model.state_dict(), save_path)
    print("saved finetuned model to", save_path)


In [None]:
# -----------------
# 7) evaluation & metrics (full test set)
# -----------------
if os.path.exists(best_save_path):
    model.load_state_dict(torch.load(best_save_path, map_location=device))
    print("loaded best model for final evaluation ->", best_save_path)

model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for inputs, labels in tqdm(testloader, desc='evaluating'):
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.append(preds.cpu().numpy())
        all_labels.append(labels.cpu().numpy())

preds = np.concatenate(all_preds) if len(all_preds)>0 else np.array([])
labels = np.concatenate(all_labels) if len(all_labels)>0 else np.array([])

if preds.size == 0:
    print('no predictions (empty testset?)')
else:
    overall_acc = accuracy_score(labels, preds)
    report = classification_report(labels, preds, target_names=classes, digits=4)
    cm = confusion_matrix(labels, preds)
    per_class_acc = cm.diagonal() / (cm.sum(axis=1) + 1e-12)
    print(f"overall accuracy: {overall_acc:.4f}")
    print('\nclassification report:\n', report)
    for i, c in enumerate(classes):
        print(f"{c:20s} -> accuracy: {per_class_acc[i]:.4f} (n={int(cm.sum(axis=1)[i])})")
    os.makedirs('results', exist_ok=True)
    with open('results/metrics_resnet.txt', 'w', encoding='utf-8') as f:
        f.write(f"overall_accuracy: {overall_acc:.6f}\n\n")
        f.write('classification_report:\n')
        f.write(report)
        f.write('\nper_class_accuracy:\n')
        for i, c in enumerate(classes):
            f.write(f"{c:20s} -> {per_class_acc[i]:.6f} (n={int(cm.sum(axis=1)[i])})\n")
    try:
        import seaborn as sns
        plt.figure(figsize=(10,8))
        sns.heatmap(cm, annot=True, fmt='d', xticklabels=classes, yticklabels=classes, cmap='Blues')
        plt.xlabel('predicted')
        plt.ylabel('true')
        plt.title(f'confusion matrix (acc={overall_acc:.3f})')
        plt.tight_layout()
        plt.savefig('results/confusion_matrix_resnet.png', dpi=200)
        plt.show()
    except Exception:
        plt.figure(figsize=(10,8))
        plt.imshow(cm, interpolation='nearest')
        plt.colorbar()
        plt.xticks(np.arange(len(classes)), classes, rotation=45, ha='right')
        plt.yticks(np.arange(len(classes)), classes)
        plt.xlabel('predicted')
        plt.ylabel('true')
        plt.title(f'confusion matrix (acc={overall_acc:.3f})')
        plt.tight_layout()
        plt.savefig('results/confusion_matrix_resnet.png', dpi=200)
        plt.show()
    print('saved results/confusion_matrix_resnet.png and results/metrics_resnet.txt')


In [None]:
# -----------------
# 8) quick inference helper
# -----------------
from PIL import Image
def predict_image(image_path, model, classes, transform, device):
    img = Image.open(image_path).convert('RGB')
    x = transform(img).unsqueeze(0).to(device)
    model.eval()
    with torch.no_grad():
        out = model(x)
        _, pred = torch.max(out, 1)
    return classes[pred.item()]

# example:
# print(predict_image('path/to/image.jpg', model, classes, val_transform, device))
