In [1]:
import kagglehub
path = kagglehub.dataset_download("zaidworks0508/cow-breed-classification-dataset")

ModuleNotFoundError: No module named 'kagglehub'

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
from PIL import Image
import copy

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms, models
from tqdm import tqdm

print(f"PyTorch version: {torch.__version__}")
print(f"Torchvision version: {torchvision.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA Device: {torch.cuda.get_device_name(0)}")
    print(f"Number of GPUs: {torch.cuda.device_count()}")

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"\nUsing device: {device}")

# Cattle Breed Classification - Comparative Study

This notebook compares three CNN architectures for cattle breed classification:
1. ResNet18
2. ResNet50
3. EfficientNetB1

Using the zaidworks0508/cow-breed-classification-dataset with transfer learning approach.

## Data Exploration and Preparation

In [None]:
# Explore the downloaded dataset structure
print(f"Base path: {path}")
print("\nExploring directory structure...")

def explore_directory(dir_path, max_depth=3, current_depth=0, prefix=""):
    if current_depth >= max_depth:
        return
    try:
        items = os.listdir(dir_path)
        for item in items[:10]:  # Limit to first 10 items per directory
            item_path = os.path.join(dir_path, item)
            if os.path.isdir(item_path):
                print(f"{prefix}üìÅ {item}/")
                explore_directory(item_path, max_depth, current_depth + 1, prefix + "  ")
            else:
                print(f"{prefix}üìÑ {item}")
    except PermissionError:
        print(f"{prefix}‚ùå Permission denied")
    except Exception as e:
        print(f"{prefix}‚ùå Error: {e}")

explore_directory(path, max_depth=4)

# Find the dataset path
print("\n" + "="*60)
print("Searching for cattle breed images...")
print("="*60)

dataset_path = None
for root, dirs, files in os.walk(path):
    if any(item.endswith(('.jpg', '.jpeg', '.png')) for item in files):
        dataset_path = root
        print(f"‚úÖ Found images at: {dataset_path}")
        break

if dataset_path is None:
    dataset_path = path
    print(f"Using base path as dataset path: {dataset_path}")

# List classes and count images
classes = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]
print(f"\nüìä Cattle breeds found: {sorted(classes)}")

print("\nüìà Image counts per breed:")
total_images = 0
for cls in sorted(classes):
    cls_path = os.path.join(dataset_path, cls)
    try:
        num_images = len([f for f in os.listdir(cls_path) if f.endswith(('.png', '.jpg', '.jpeg'))])
        print(f"  {cls}: {num_images:,} images")
        total_images += num_images
    except Exception as e:
        print(f"  {cls}: Error - {e}")

print(f"\n  Total images: {total_images:,}")
print(f"  Total breeds: {len(classes)}")

In [None]:
# Configuration
IMG_SIZE = 224  # Using 224x224 for pre-trained models
BATCH_SIZE = 32
EPOCHS = 30
VALIDATION_SPLIT = 0.2
LEARNING_RATE = 0.001

# Custom Dataset class
class CattleDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images = []
        self.labels = []
        self.class_names = []

        # Get class directories
        classes = sorted([d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))])
        self.class_names = classes
        self.class_to_idx = {cls: idx for idx, cls in enumerate(classes)}

        # Load all image paths and labels
        for cls in classes:
            cls_path = os.path.join(root_dir, cls)
            cls_idx = self.class_to_idx[cls]
            for img_name in os.listdir(cls_path):
                if img_name.endswith(('.png', '.jpg', '.jpeg')):
                    self.images.append(os.path.join(cls_path, img_name))
                    self.labels.append(cls_idx)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

# Data augmentation and preprocessing for training
train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomRotation(20),
    transforms.RandomHorizontalFlip(),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Only normalization for validation
val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create full dataset
full_dataset = CattleDataset(dataset_path, transform=None)
class_names = full_dataset.class_names
num_classes = len(class_names)

print(f"Class names: {class_names}")
print(f"Number of classes: {num_classes}")
print(f"Total images: {len(full_dataset)}")

# Split dataset into train and validation
from torch.utils.data import random_split
train_size = int((1 - VALIDATION_SPLIT) * len(full_dataset))
val_size = len(full_dataset) - train_size
train_indices, val_indices = torch.utils.data.random_split(
    range(len(full_dataset)),
    [train_size, val_size],
    generator=torch.Generator().manual_seed(42)
)

# Create separate datasets with different transforms
class SubsetDataset(Dataset):
    def __init__(self, dataset, indices, transform):
        self.dataset = dataset
        self.indices = list(indices)
        self.transform = transform

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        original_idx = self.indices[idx]
        img_path = self.dataset.images[original_idx]
        label = self.dataset.labels[original_idx]
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, label

train_dataset = SubsetDataset(full_dataset, train_indices, train_transform)
val_dataset = SubsetDataset(full_dataset, val_indices, val_transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

print(f"\nTraining samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")
print(f"Batches per epoch (train): {len(train_loader)}")
print(f"Batches per epoch (val): {len(val_loader)}")

## Visualize Sample Images

In [None]:
# Display sample images
plt.figure(figsize=(15, 10))

# Denormalization for display
inv_normalize = transforms.Normalize(
    mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225],
    std=[1/0.229, 1/0.224, 1/0.225]
)

# Get a batch of training data
dataiter = iter(train_loader)
images, labels = next(dataiter)

for i in range(min(9, len(images))):
    plt.subplot(3, 3, i+1)

    # Convert tensor to image
    img = images[i]
    img = inv_normalize(img)
    img = img.permute(1, 2, 0).numpy()
    img = np.clip(img, 0, 1)

    plt.imshow(img)
    plt.title(class_names[labels[i].item()])
    plt.axis('off')

plt.tight_layout()
plt.show()

## Training Infrastructure: Early Stopping & Training Loop

In [None]:
# Early Stopping implementation
class EarlyStopping:
    def __init__(self, patience=5, min_delta=0, verbose=True):
        self.patience = patience
        self.min_delta = min_delta
        self.verbose = verbose
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        self.best_model = None

    def __call__(self, val_loss, model):
        if self.best_loss is None:
            self.best_loss = val_loss
            self.best_model = copy.deepcopy(model.state_dict())
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.best_model = copy.deepcopy(model.state_dict())
            self.counter = 0

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, model_name):
    """Train a PyTorch model with early stopping and learning rate scheduling"""

    # Learning rate scheduler
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=3, min_lr=1e-7
    )

    # Early stopping
    early_stopping = EarlyStopping(patience=5, verbose=True)

    # History tracking
    history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': []
    }

    print(f"\nTraining {model_name}...")
    print("="*70)

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        train_pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Train]')
        for inputs, labels in train_pbar:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            train_total += labels.size(0)
            train_correct += predicted.eq(labels).sum().item()

            train_pbar.set_postfix({
                'loss': f'{train_loss/len(train_loader):.4f}',
                'acc': f'{100.*train_correct/train_total:.2f}%'
            })

        avg_train_loss = train_loss / len(train_loader)
        train_accuracy = train_correct / train_total

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            val_pbar = tqdm(val_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Val]  ')
            for inputs, labels in val_pbar:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = outputs.max(1)
                val_total += labels.size(0)
                val_correct += predicted.eq(labels).sum().item()

                val_pbar.set_postfix({
                    'loss': f'{val_loss/len(val_loader):.4f}',
                    'acc': f'{100.*val_correct/val_total:.2f}%'
                })

        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = val_correct / val_total

        # Update history
        history['train_loss'].append(avg_train_loss)
        history['train_acc'].append(train_accuracy)
        history['val_loss'].append(avg_val_loss)
        history['val_acc'].append(val_accuracy)

        # Print epoch summary
        print(f'\nEpoch {epoch+1}/{num_epochs}:')
        print(f'  Train Loss: {avg_train_loss:.4f}, Train Acc: {train_accuracy:.4f}')
        print(f'  Val Loss: {avg_val_loss:.4f}, Val Acc: {val_accuracy:.4f}')
        print(f'  Learning Rate: {optimizer.param_groups[0]["lr"]:.2e}')

        # Learning rate scheduling
        old_lr = optimizer.param_groups[0]['lr']
        scheduler.step(avg_val_loss)
        new_lr = optimizer.param_groups[0]['lr']
        if old_lr != new_lr:
            print(f'  Learning rate reduced: {old_lr:.2e} -> {new_lr:.2e}')

        # Early stopping check
        early_stopping(avg_val_loss, model)
        if early_stopping.early_stop:
            print("\nEarly stopping triggered!")
            model.load_state_dict(early_stopping.best_model)
            break

    # Load best model
    if early_stopping.best_model is not None:
        model.load_state_dict(early_stopping.best_model)
        print("\nRestored best model weights")

    return history

## Model 1: ResNet18

In [None]:
# Build ResNet18 model
def build_resnet18_model(num_classes=3):
    """Build ResNet18 model with frozen base and custom classifier"""

    # Load pre-trained ResNet18
    model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

    # Freeze base model parameters
    for param in model.parameters():
        param.requires_grad = False

    # Replace final fully connected layer
    num_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_features, 256),
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.5),
        nn.Linear(256, 128),
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.3),
        nn.Linear(128, num_classes)
    )

    return model

# Create model
resnet18_model = build_resnet18_model(num_classes=num_classes)
resnet18_model = resnet18_model.to(device)

# Print model summary
print("ResNet18 Model Architecture:")
print("="*70)
print(resnet18_model)

# Count parameters
total_params = sum(p.numel() for p in resnet18_model.parameters())
trainable_params = sum(p.numel() for p in resnet18_model.parameters() if p.requires_grad)
print(f"\nTotal parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Non-trainable parameters: {total_params - trainable_params:,}")

In [None]:
# Train ResNet18
criterion = nn.CrossEntropyLoss()
optimizer_resnet18 = optim.Adam(resnet18_model.parameters(), lr=LEARNING_RATE)

history_resnet18 = train_model(
    resnet18_model,
    train_loader,
    val_loader,
    criterion,
    optimizer_resnet18,
    EPOCHS,
    "ResNet18"
)

## Model 2: ResNet50

In [None]:
# Build ResNet50 model
def build_resnet50_model(num_classes=3):
    """Build ResNet50 model with frozen base and custom classifier"""

    # Load pre-trained ResNet50
    model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)

    # Freeze base model parameters
    for param in model.parameters():
        param.requires_grad = False

    # Replace final fully connected layer
    num_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_features, 256),
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.5),
        nn.Linear(256, 128),
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.3),
        nn.Linear(128, num_classes)
    )

    return model

# Create model
resnet50_model = build_resnet50_model(num_classes=num_classes)
resnet50_model = resnet50_model.to(device)

# Print model summary
print("ResNet50 Model Architecture:")
print("="*70)
print(resnet50_model)

# Count parameters
total_params = sum(p.numel() for p in resnet50_model.parameters())
trainable_params = sum(p.numel() for p in resnet50_model.parameters() if p.requires_grad)
print(f"\nTotal parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Non-trainable parameters: {total_params - trainable_params:,}")

In [None]:
# Train ResNet50
optimizer_resnet50 = optim.Adam(resnet50_model.parameters(), lr=LEARNING_RATE)

history_resnet50 = train_model(
    resnet50_model,
    train_loader,
    val_loader,
    criterion,
    optimizer_resnet50,
    EPOCHS,
    "ResNet50"
)

## Model 3: EfficientNetB1

In [None]:
# Build EfficientNetB1 model
def build_efficientnet_model(num_classes=3):
    """Build EfficientNet-B1 model with frozen base and custom classifier"""

    # Load pre-trained EfficientNet-B1
    model = models.efficientnet_b1(weights=models.EfficientNet_B1_Weights.IMAGENET1K_V1)

    # Freeze base model parameters
    for param in model.parameters():
        param.requires_grad = False

    # Replace classifier with custom layers
    num_features = model.classifier[1].in_features
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2, inplace=True),
        nn.Linear(num_features, 256),
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.5),
        nn.Linear(256, 128),
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.3),
        nn.Linear(128, num_classes)
    )

    return model

# Create model
efficientnet_model = build_efficientnet_model(num_classes=num_classes)
efficientnet_model = efficientnet_model.to(device)

# Print model summary
print("EfficientNetB1 Model Architecture:")
print("="*70)
print(efficientnet_model)

# Count parameters
total_params = sum(p.numel() for p in efficientnet_model.parameters())
trainable_params = sum(p.numel() for p in efficientnet_model.parameters() if p.requires_grad)
print(f"\nTotal parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Non-trainable parameters: {total_params - trainable_params:,}")

In [None]:
# Train EfficientNetB1
optimizer_efficientnet = optim.Adam(efficientnet_model.parameters(), lr=LEARNING_RATE)

history_efficientnet = train_model(
    efficientnet_model,
    train_loader,
    val_loader,
    criterion,
    optimizer_efficientnet,
    EPOCHS,
    "EfficientNetB1"
)

## Performance Visualization and Analysis

In [None]:
# Function to plot training history
def plot_training_history(history, model_name):
    """Plot training and validation accuracy and loss"""
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))

    epochs_range = range(1, len(history['train_acc']) + 1)

    # Accuracy plot
    axes[0].plot(epochs_range, history['train_acc'], label='Training Accuracy', marker='o')
    axes[0].plot(epochs_range, history['val_acc'], label='Validation Accuracy', marker='s')
    axes[0].set_title(f'{model_name} - Accuracy Over Epochs', fontsize=14, fontweight='bold')
    axes[0].set_xlabel('Epoch', fontsize=12)
    axes[0].set_ylabel('Accuracy', fontsize=12)
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)

    # Loss plot
    axes[1].plot(epochs_range, history['train_loss'], label='Training Loss', marker='o')
    axes[1].plot(epochs_range, history['val_loss'], label='Validation Loss', marker='s')
    axes[1].set_title(f'{model_name} - Loss Over Epochs', fontsize=14, fontweight='bold')
    axes[1].set_xlabel('Epoch', fontsize=12)
    axes[1].set_ylabel('Loss', fontsize=12)
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    # Print final metrics
    final_train_acc = history['train_acc'][-1]
    final_val_acc = history['val_acc'][-1]
    final_train_loss = history['train_loss'][-1]
    final_val_loss = history['val_loss'][-1]
    best_val_acc = max(history['val_acc'])

    print(f"\n{model_name} - Final Metrics:")
    print(f"{'='*50}")
    print(f"Final Training Accuracy: {final_train_acc:.4f}")
    print(f"Final Validation Accuracy: {final_val_acc:.4f}")
    print(f"Best Validation Accuracy: {best_val_acc:.4f}")
    print(f"Final Training Loss: {final_train_loss:.4f}")
    print(f"Final Validation Loss: {final_val_loss:.4f}")
    print(f"{'='*50}\n")

### ResNet18 Performance

In [None]:
plot_training_history(history_resnet18, "ResNet18")

### ResNet50 Performance

In [None]:
plot_training_history(history_resnet50, "ResNet50")

### EfficientNetB1 Performance

In [None]:
plot_training_history(history_efficientnet, "EfficientNetB1")

### Comparative Curves

In [None]:
# Compare all models - Training and Validation Accuracy
plt.figure(figsize=(15, 6))

epochs_range_res18 = range(1, len(history_resnet18['train_acc']) + 1)
epochs_range_res50 = range(1, len(history_resnet50['train_acc']) + 1)
epochs_range_eff = range(1, len(history_efficientnet['train_acc']) + 1)

plt.subplot(1, 2, 1)
plt.plot(epochs_range_res18, history_resnet18['train_acc'], label='ResNet18', marker='o', linewidth=2)
plt.plot(epochs_range_res50, history_resnet50['train_acc'], label='ResNet50', marker='s', linewidth=2)
plt.plot(epochs_range_eff, history_efficientnet['train_acc'], label='EfficientNetB1', marker='^', linewidth=2)
plt.title('Training Accuracy Comparison', fontsize=14, fontweight='bold')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.plot(epochs_range_res18, history_resnet18['val_acc'], label='ResNet18', marker='o', linewidth=2)
plt.plot(epochs_range_res50, history_resnet50['val_acc'], label='ResNet50', marker='s', linewidth=2)
plt.plot(epochs_range_eff, history_efficientnet['val_acc'], label='EfficientNetB1', marker='^', linewidth=2)
plt.title('Validation Accuracy Comparison', fontsize=14, fontweight='bold')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Compare all models - Training and Validation Loss
plt.figure(figsize=(15, 6))

plt.subplot(1, 2, 1)
plt.plot(epochs_range_res18, history_resnet18['train_loss'], label='ResNet18', marker='o', linewidth=2)
plt.plot(epochs_range_res50, history_resnet50['train_loss'], label='ResNet50', marker='s', linewidth=2)
plt.plot(epochs_range_eff, history_efficientnet['train_loss'], label='EfficientNetB1', marker='^', linewidth=2)
plt.title('Training Loss Comparison', fontsize=14, fontweight='bold')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.plot(epochs_range_res18, history_resnet18['val_loss'], label='ResNet18', marker='o', linewidth=2)
plt.plot(epochs_range_res50, history_resnet50['val_loss'], label='ResNet50', marker='s', linewidth=2)
plt.plot(epochs_range_eff, history_efficientnet['val_loss'], label='EfficientNetB1', marker='^', linewidth=2)
plt.title('Validation Loss Comparison', fontsize=14, fontweight='bold')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Evaluation: Confusion Matrix and Classification Report

In [None]:
# Function to evaluate model and generate metrics
def evaluate_model(model, model_name, data_loader):
    """Evaluate model and compute precision, recall, F1-score"""
    print(f"\n{'='*60}")
    print(f"Evaluating {model_name}")
    print(f"{'='*60}")

    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in tqdm(data_loader, desc="Evaluating"):
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.numpy())

    y_true = np.array(all_labels)
    y_pred = np.array(all_preds)

    # Compute metrics
    accuracy = np.mean(y_pred == y_true)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')

    print(f"\nOverall Metrics:")
    print(f"Accuracy:  {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"F1-Score:  {f1:.4f}")

    # Classification report
    print(f"\nDetailed Classification Report:")
    print(classification_report(y_true, y_pred, target_names=class_names, digits=4))

    # Confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names, yticklabels=class_names,
                cbar_kws={'label': 'Count'})
    plt.title(f'{model_name} - Confusion Matrix', fontsize=14, fontweight='bold')
    plt.xlabel('Predicted Label', fontsize=12)
    plt.ylabel('True Label', fontsize=12)
    plt.tight_layout()
    plt.show()

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1
    }

# Evaluate models
metrics_resnet18 = evaluate_model(resnet18_model, "ResNet18", val_loader)
metrics_resnet50 = evaluate_model(resnet50_model, "ResNet50", val_loader)
metrics_efficientnet = evaluate_model(efficientnet_model, "EfficientNetB1", val_loader)

# Create comparison dataframe
comparison_data = {
    'Model': ['ResNet18', 'ResNet50', 'EfficientNetB1'],
    'Accuracy': [metrics_resnet18['accuracy'], metrics_resnet50['accuracy'], metrics_efficientnet['accuracy']],
    'Precision': [metrics_resnet18['precision'], metrics_resnet50['precision'], metrics_efficientnet['precision']],
    'Recall': [metrics_resnet18['recall'], metrics_resnet50['recall'], metrics_efficientnet['recall']],
    'F1-Score': [metrics_resnet18['f1_score'], metrics_resnet50['f1_score'], metrics_efficientnet['f1_score']],
    'Best Val Accuracy': [max(history_resnet18['val_acc']), max(history_resnet50['val_acc']), max(history_efficientnet['val_acc'])],
    'Final Val Loss': [history_resnet18['val_loss'][-1], history_resnet50['val_loss'][-1], history_efficientnet['val_loss'][-1]]
}

comparison_df = pd.DataFrame(comparison_data)

print("\n" + "="*90)
print("FINAL MODEL COMPARISON SUMMARY")
print("="*90)
print(comparison_df.to_string(index=False))
print("="*90)

# Highlight best model
best_model_idx = comparison_df['Accuracy'].idxmax()
best_model = comparison_df.loc[best_model_idx, 'Model']
print(f"\nüèÜ Best Performing Model: {best_model}")
print(f"   Accuracy: {comparison_df.loc[best_model_idx, 'Accuracy']:.4f}")
print(f"   F1-Score: {comparison_df.loc[best_model_idx, 'F1-Score']:.4f}")

## Next Steps / Usage
- Run cells top-to-bottom to download data, prepare loaders, and train all three models.
- If GPU memory is limited, reduce `BATCH_SIZE` or unfreeze only part of the backbones.
- You can skip training some models by commenting their training calls.
- Adjust `EPOCHS`, `LEARNING_RATE`, or augmentation if over/underfitting is observed.

### EfficientNetB1 Performance