In [1]:
import kagglehub
path = kagglehub.dataset_download("zaidworks0508/cow-breed-classification-dataset")

ModuleNotFoundError: No module named 'kagglehub'

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
from PIL import Image
import copy

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms, models
from tqdm import tqdm

# Speed/throughput knobs
torch.backends.cudnn.benchmark = True
torch.backends.cuda.matmul.allow_tf32 = True
torch.set_float32_matmul_precision("medium")

print(f"PyTorch version: {torch.__version__}")
print(f"Torchvision version: {torchvision.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA Device: {torch.cuda.get_device_name(0)}")
    print(f"Number of GPUs: {torch.cuda.device_count()}")

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"\nUsing device: {device}")

# Cattle Breed Classification - Comparative Study

This notebook compares three CNN architectures for cattle breed classification:
1. ResNet18
2. ResNet50
3. EfficientNetB1

Using the zaidworks0508/cow-breed-classification-dataset with transfer learning approach.

## Data Exploration and Preparation

In [None]:
# Explore the downloaded dataset structure
print(f"Base path: {path}")
print("\nExploring directory structure...")

def explore_directory(dir_path, max_depth=3, current_depth=0, prefix=""):
    if current_depth >= max_depth:
        return
    try:
        items = os.listdir(dir_path)
        for item in items[:10]:  # Limit to first 10 items per directory
            item_path = os.path.join(dir_path, item)
            if os.path.isdir(item_path):
                print(f"{prefix}üìÅ {item}/")
                explore_directory(item_path, max_depth, current_depth + 1, prefix + "  ")
            else:
                print(f"{prefix}üìÑ {item}")
    except PermissionError:
        print(f"{prefix}‚ùå Permission denied")
    except Exception as e:
        print(f"{prefix}‚ùå Error: {e}")

explore_directory(path, max_depth=4)

# Find the dataset path
print("\n" + "="*60)
print("Searching for cattle breed images...")
print("="*60)

dataset_path = None
for root, dirs, files in os.walk(path):
    if any(item.endswith(('.jpg', '.jpeg', '.png')) for item in files):
        dataset_path = root
        print(f"‚úÖ Found images at: {dataset_path}")
        break

if dataset_path is None:
    dataset_path = path
    print(f"Using base path as dataset path: {dataset_path}")

# List classes and count images
classes = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]
print(f"\nüìä Cattle breeds found: {sorted(classes)}")

print("\nüìà Image counts per breed:")
total_images = 0
for cls in sorted(classes):
    cls_path = os.path.join(dataset_path, cls)
    try:
        num_images = len([f for f in os.listdir(cls_path) if f.endswith(('.png', '.jpg', '.jpeg'))])
        print(f"  {cls}: {num_images:,} images")
        total_images += num_images
    except Exception as e:
        print(f"  {cls}: Error - {e}")

print(f"\n  Total images: {total_images:,}")
print(f"  Total breeds: {len(classes)}")

In [None]:
# Configuration
IMG_SIZE = 224  # Consider 256 if GPU allows; 224 keeps speed high
BATCH_SIZE = 48  # Drop to 32 if OOM
EPOCHS = 25
WARMUP_EPOCHS = 3  # Train head-only before unfreezing last block
VALIDATION_SPLIT = 0.2
LEARNING_RATE = 3e-4
WEIGHT_DECAY = 1e-4

# Custom Dataset class
class CattleDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images = []
        self.labels = []
        self.class_names = []

        # Get class directories
        classes = sorted([d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))])
        self.class_names = classes
        self.class_to_idx = {cls: idx for idx, cls in enumerate(classes)}

        # Load all image paths and labels
        for cls in classes:
            cls_path = os.path.join(root_dir, cls)
            cls_idx = self.class_to_idx[cls]
            for img_name in os.listdir(cls_path):
                if img_name.endswith(('.png', '.jpg', '.jpeg')):
                    self.images.append(os.path.join(cls_path, img_name))
                    self.labels.append(cls_idx)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

# Data augmentation and preprocessing for training (balanced, not too heavy)
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.7, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(p=0.1),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.05),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Only normalization for validation
val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create full dataset
full_dataset = CattleDataset(dataset_path, transform=None)
class_names = full_dataset.class_names
num_classes = len(class_names)

print(f"Class names: {class_names}")
print(f"Number of classes: {num_classes}")
print(f"Total images: {len(full_dataset)}")

# Split dataset into train and validation
from torch.utils.data import random_split
train_size = int((1 - VALIDATION_SPLIT) * len(full_dataset))
val_size = len(full_dataset) - train_size
train_indices, val_indices = torch.utils.data.random_split(
    range(len(full_dataset)),
    [train_size, val_size],
    generator=torch.Generator().manual_seed(42)
)

# Create separate datasets with different transforms
class SubsetDataset(Dataset):
    def __init__(self, dataset, indices, transform):
        self.dataset = dataset
        self.indices = list(indices)
        self.transform = transform

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        original_idx = self.indices[idx]
        img_path = self.dataset.images[original_idx]
        label = self.dataset.labels[original_idx]
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, label

train_dataset = SubsetDataset(full_dataset, train_indices, train_transform)
val_dataset = SubsetDataset(full_dataset, val_indices, val_transform)

# Create data loaders
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=8,
    pin_memory=True,
    persistent_workers=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=8,
    pin_memory=True,
    persistent_workers=True
)

print(f"\nTraining samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")
print(f"Batches per epoch (train): {len(train_loader)}")
print(f"Batches per epoch (val): {len(val_loader)}")

## Visualize Sample Images

In [None]:
# Display sample images
plt.figure(figsize=(15, 10))

# Denormalization for display
inv_normalize = transforms.Normalize(
    mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225],
    std=[1/0.229, 1/0.224, 1/0.225]
)

# Get a batch of training data
dataiter = iter(train_loader)
images, labels = next(dataiter)

for i in range(min(9, len(images))):
    plt.subplot(3, 3, i+1)

    # Convert tensor to image
    img = images[i]
    img = inv_normalize(img)
    img = img.permute(1, 2, 0).numpy()
    img = np.clip(img, 0, 1)

    plt.imshow(img)
    plt.title(class_names[labels[i].item()])
    plt.axis('off')

plt.tight_layout()
plt.show()

## Training Infrastructure: Early Stopping & Training Loop

In [None]:
# Early Stopping implementation
class EarlyStopping:
    def __init__(self, patience=6, min_delta=0, verbose=True):
        self.patience = patience
        self.min_delta = min_delta
        self.verbose = verbose
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        self.best_model = None

    def __call__(self, val_loss, model):
        if self.best_loss is None:
            self.best_loss = val_loss
            self.best_model = copy.deepcopy(model.state_dict())
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.best_model = copy.deepcopy(model.state_dict())
            self.counter = 0

# GradScaler for mixed precision
from torch.cuda.amp import autocast, GradScaler
scaler = GradScaler()


def unfreeze_last_block(model, model_name):
    """Unfreeze final block to fine-tune after warmup."""
    if model_name in ["ResNet18", "ResNet50"]:
        for name, param in model.named_parameters():
            if "layer4" in name:
                param.requires_grad = True
    elif model_name == "EfficientNetB1":
        unfreeze = False
        for name, param in model.named_parameters():
            if "features.6" in name or "features.7" in name:
                unfreeze = True
            if unfreeze:
                param.requires_grad = True


def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, model_name):
    """Train with warmup, AMP, cosine LR, early stopping, and selective unfreeze."""

    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=1e-5)
    early_stopping = EarlyStopping(patience=6, verbose=True)

    history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': []
    }

    print(f"\nTraining {model_name}...")
    print("="*70)

    for epoch in range(num_epochs):
        # After warmup, unfreeze last block and reset optimizer on trainable params
        if epoch == WARMUP_EPOCHS:
            unfreeze_last_block(model, model_name)
            optimizer = optim.AdamW(
                filter(lambda p: p.requires_grad, model.parameters()),
                lr=LEARNING_RATE * 0.5,
                weight_decay=WEIGHT_DECAY
            )
            print(f"Unfroze last block of {model_name} and reset optimizer")

        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        train_pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Train]')
        for inputs, labels in train_pbar:
            inputs = inputs.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            optimizer.zero_grad(set_to_none=True)
            with autocast():
                outputs = model(inputs)
                loss = criterion(outputs, labels)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            train_total += labels.size(0)
            train_correct += predicted.eq(labels).sum().item()

            train_pbar.set_postfix({
                'loss': f'{train_loss/len(train_loader):.4f}',
                'acc': f'{100.*train_correct/train_total:.2f}%'
            })

        avg_train_loss = train_loss / len(train_loader)
        train_accuracy = train_correct / train_total

        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            val_pbar = tqdm(val_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Val]  ')
            for inputs, labels in val_pbar:
                inputs = inputs.to(device, non_blocking=True)
                labels = labels.to(device, non_blocking=True)
                with autocast():
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = outputs.max(1)
                val_total += labels.size(0)
                val_correct += predicted.eq(labels).sum().item()

                val_pbar.set_postfix({
                    'loss': f'{val_loss/len(val_loader):.4f}',
                    'acc': f'{100.*val_correct/val_total:.2f}%'
                })

        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = val_correct / val_total

        history['train_loss'].append(avg_train_loss)
        history['train_acc'].append(train_accuracy)
        history['val_loss'].append(avg_val_loss)
        history['val_acc'].append(val_accuracy)

        scheduler.step()

        print(f'\nEpoch {epoch+1}/{num_epochs}:')
        print(f'  Train Loss: {avg_train_loss:.4f}, Train Acc: {train_accuracy:.4f}')
        print(f'  Val Loss: {avg_val_loss:.4f}, Val Acc: {val_accuracy:.4f}')
        print(f'  Learning Rate: {optimizer.param_groups[0]["lr"]:.2e}')

        early_stopping(avg_val_loss, model)
        if early_stopping.early_stop:
            print("\nEarly stopping triggered!")
            model.load_state_dict(early_stopping.best_model)
            break

    if early_stopping.best_model is not None:
        model.load_state_dict(early_stopping.best_model)
        print("\nRestored best model weights")

    return history

## Model 1: ResNet18

In [None]:
# Build ResNet18 model
def build_resnet18_model(num_classes=3):
    """Build ResNet18 model with frozen base and custom classifier"""

    # Load pre-trained ResNet18
    model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

    # Freeze base model parameters
    for param in model.parameters():
        param.requires_grad = False

    # Replace final fully connected layer
    num_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_features, 256),
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.5),
        nn.Linear(256, 128),
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.3),
        nn.Linear(128, num_classes)
    )

    return model

# Create model
resnet18_model = build_resnet18_model(num_classes=num_classes)
resnet18_model = resnet18_model.to(device)

# Print model summary
print("ResNet18 Model Architecture:")
print("="*70)
print(resnet18_model)

# Count parameters
total_params = sum(p.numel() for p in resnet18_model.parameters())
trainable_params = sum(p.numel() for p in resnet18_model.parameters() if p.requires_grad)
print(f"\nTotal parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Non-trainable parameters: {total_params - trainable_params:,}")

In [None]:
# Train ResNet18 (head first, backbone frozen; fine-tune later via unfreeze)
criterion = nn.CrossEntropyLoss(label_smoothing=0.05)
optimizer_resnet18 = optim.AdamW(
    filter(lambda p: p.requires_grad, resnet18_model.parameters()),
    lr=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY
)

history_resnet18 = train_model(
    resnet18_model,
    train_loader,
    val_loader,
    criterion,
    optimizer_resnet18,
    EPOCHS,
    "ResNet18"
)

## Model 2: ResNet50

In [None]:
# Build ResNet50 model
def build_resnet50_model(num_classes=3):
    """Build ResNet50 model with frozen base and custom classifier"""

    # Load pre-trained ResNet50
    model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)

    # Freeze base model parameters
    for param in model.parameters():
        param.requires_grad = False

    # Replace final fully connected layer
    num_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_features, 256),
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.5),
        nn.Linear(256, 128),
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.3),
        nn.Linear(128, num_classes)
    )

    return model

# Create model
resnet50_model = build_resnet50_model(num_classes=num_classes)
resnet50_model = resnet50_model.to(device)

# Print model summary
print("ResNet50 Model Architecture:")
print("="*70)
print(resnet50_model)

# Count parameters
total_params = sum(p.numel() for p in resnet50_model.parameters())
trainable_params = sum(p.numel() for p in resnet50_model.parameters() if p.requires_grad)
print(f"\nTotal parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Non-trainable parameters: {total_params - trainable_params:,}")

In [None]:
# Train ResNet50
optimizer_resnet50 = optim.AdamW(
    filter(lambda p: p.requires_grad, resnet50_model.parameters()),
    lr=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY
)

history_resnet50 = train_model(
    resnet50_model,
    train_loader,
    val_loader,
    criterion,
    optimizer_resnet50,
    EPOCHS,
    "ResNet50"
)

## Model 3: EfficientNetB1

In [None]:
# Build EfficientNetB1 model
def build_efficientnet_model(num_classes=3):
    """Build EfficientNet-B1 model with frozen base and custom classifier"""

    # Load pre-trained EfficientNet-B1
    model = models.efficientnet_b1(weights=models.EfficientNet_B1_Weights.IMAGENET1K_V1)

    # Freeze base model parameters
    for param in model.parameters():
        param.requires_grad = False

    # Replace classifier with custom layers
    num_features = model.classifier[1].in_features
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2, inplace=True),
        nn.Linear(num_features, 256),
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.5),
        nn.Linear(256, 128),
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.3),
        nn.Linear(128, num_classes)
    )

    return model

# Create model
efficientnet_model = build_efficientnet_model(num_classes=num_classes)
efficientnet_model = efficientnet_model.to(device)

# Print model summary
print("EfficientNetB1 Model Architecture:")
print("="*70)
print(efficientnet_model)

# Count parameters
total_params = sum(p.numel() for p in efficientnet_model.parameters())
trainable_params = sum(p.numel() for p in efficientnet_model.parameters() if p.requires_grad)
print(f"\nTotal parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Non-trainable parameters: {total_params - trainable_params:,}")

In [None]:
# Train EfficientNetB1
optimizer_efficientnet = optim.AdamW(
    filter(lambda p: p.requires_grad, efficientnet_model.parameters()),
    lr=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY
)

history_efficientnet = train_model(
    efficientnet_model,
    train_loader,
    val_loader,
    criterion,
    optimizer_efficientnet,
    EPOCHS,
    "EfficientNetB1"
)

## Model 4: InceptionV4

In [None]:
# Build InceptionV4 model
def build_inception_model(num_classes=3):
    """Build InceptionV4 model with frozen base and custom classifier"""

    # Load pre-trained InceptionV3 (V4 not directly available; V3 is excellent alternative)
    model = models.inception_v3(weights=models.Inception_V3_Weights.IMAGENET1K_V1)

    # Freeze base model parameters
    for param in model.parameters():
        param.requires_grad = False

    # Replace final fully connected layer
    num_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_features, 256),
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.5),
        nn.Linear(256, 128),
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.3),
        nn.Linear(128, num_classes)
    )

    return model

# Create model
inception_model = build_inception_model(num_classes=num_classes)
inception_model = inception_model.to(device)

# Print model summary
print("InceptionV3 Model Architecture:")
print("="*70)
print(inception_model)

# Count parameters
total_params = sum(p.numel() for p in inception_model.parameters())
trainable_params = sum(p.numel() for p in inception_model.parameters() if p.requires_grad)
print(f"\nTotal parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Non-trainable parameters: {total_params - trainable_params:,}")

In [None]:
# Train InceptionV3
optimizer_inception = optim.AdamW(
    filter(lambda p: p.requires_grad, inception_model.parameters()),
    lr=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY
)

history_inception = train_model(
    inception_model,
    train_loader,
    val_loader,
    criterion,
    optimizer_inception,
    EPOCHS,
    "InceptionV3"
)

## Model 5: Vision Transformer (ViT)

In [None]:
# Build Vision Transformer (ViT) model
def build_vit_model(num_classes=3):
    """Build Vision Transformer model with frozen base and custom classifier"""

    # Load pre-trained ViT-B/16
    model = models.vit_b_16(weights=models.ViT_B_16_Weights.IMAGENET1K_V1)

    # Freeze base model parameters
    for param in model.parameters():
        param.requires_grad = False

    # Replace final classification head
    num_features = model.heads[0].in_features
    model.heads = nn.Sequential(
        nn.Linear(num_features, 256),
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.5),
        nn.Linear(256, 128),
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.3),
        nn.Linear(128, num_classes)
    )

    return model

# Create model
vit_model = build_vit_model(num_classes=num_classes)
vit_model = vit_model.to(device)

# Print model summary
print("Vision Transformer (ViT-B/16) Model Architecture:")
print("="*70)
print(vit_model)

# Count parameters
total_params = sum(p.numel() for p in vit_model.parameters())
trainable_params = sum(p.numel() for p in vit_model.parameters() if p.requires_grad)
print(f"\nTotal parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Non-trainable parameters: {total_params - trainable_params:,}")

In [None]:
# Train Vision Transformer
optimizer_vit = optim.AdamW(
    filter(lambda p: p.requires_grad, vit_model.parameters()),
    lr=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY
)

history_vit = train_model(
    vit_model,
    train_loader,
    val_loader,
    criterion,
    optimizer_vit,
    EPOCHS,
    "ViT-B/16"
)

### InceptionV3 Performance

In [None]:
plot_training_history(history_inception, "InceptionV3")

### Vision Transformer (ViT) Performance

In [None]:
plot_training_history(history_vit, "ViT-B/16")

## Grad-CAM: Visualize Model Attention Heatmaps

In [None]:
# Grad-CAM Implementation for model interpretability
class GradCAM:
    """Generate Grad-CAM heatmaps to visualize where model attends"""
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer
        self.gradients = None
        self.activations = None
        
        # Register hooks
        self.target_layer.register_full_backward_hook(self.save_gradients)
        self.target_layer.register_forward_hook(self.save_activations)
    
    def save_activations(self, module, input, output):
        self.activations = output.detach()
    
    def save_gradients(self, module, grad_input, grad_output):
        self.gradients = grad_output[0].detach()
    
    def generate_cam(self, input_tensor, class_idx=None):
        """Generate CAM for input tensor"""
        self.model.eval()
        with torch.enable_grad():
            output = self.model(input_tensor)
            if class_idx is None:
                class_idx = output.argmax(dim=1)
            
            self.model.zero_grad()
            target_score = output[0, class_idx]
            target_score.backward()
        
        # Compute CAM
        gradients = self.gradients[0].cpu()
        activations = self.activations[0].cpu()
        
        weights = gradients.mean(dim=(1, 2))
        cam = (weights.view(-1, 1, 1) * activations).sum(dim=0)
        cam = torch.relu(cam)
        cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8)
        
        return cam.numpy()


def visualize_grad_cam(model, data_loader, model_name, num_samples=3):
    """Visualize Grad-CAM heatmaps for model predictions"""
    print(f"\nGenerating Grad-CAM visualizations for {model_name}...")
    
    # Get target layer based on model type
    if "ResNet" in model_name:
        target_layer = model.layer4[-1].conv2
    elif "EfficientNet" in model_name:
        target_layer = model.features[-1]
    elif "InceptionV3" in model_name:
        target_layer = model.Mixed_7c
    elif "ViT" in model_name:
        target_layer = model.encoder.layers.encoder_layer_11
    
    grad_cam = GradCAM(model, target_layer)
    
    model.eval()
    fig, axes = plt.subplots(num_samples, 3, figsize=(15, 5 * num_samples))
    if num_samples == 1:
        axes = axes.reshape(1, -1)
    
    sample_count = 0
    with torch.no_grad():
        for inputs, labels in data_loader:
            if sample_count >= num_samples:
                break
            
            for i in range(len(inputs)):
                if sample_count >= num_samples:
                    break
                
                input_img = inputs[i].unsqueeze(0).to(device)
                label = labels[i].item()
                
                # Generate prediction and CAM
                with torch.enable_grad():
                    output = model(input_img)
                    pred_class = output.argmax(dim=1).item()
                    confidence = torch.softmax(output, dim=1)[0, pred_class].item()
                
                cam = grad_cam.generate_cam(input_img, pred_class)
                
                # Denormalize image
                img_np = inputs[i].cpu().numpy()
                img_np = np.transpose(img_np, (1, 2, 0))
                img_np = (img_np - img_np.min()) / (img_np.max() - img_np.min() + 1e-8)
                
                # Overlay CAM on image
                cam_resized = cv2.resize(cam, (img_np.shape[1], img_np.shape[0]))
                cam_heatmap = plt.cm.jet(cam_resized)[:, :, :3]
                overlay = 0.6 * img_np + 0.4 * cam_heatmap
                
                # Plot
                axes[sample_count, 0].imshow(img_np)
                axes[sample_count, 0].set_title(f"Original\n{class_names[label]}")
                axes[sample_count, 0].axis('off')
                
                axes[sample_count, 1].imshow(cam_resized, cmap='jet')
                axes[sample_count, 1].set_title("Grad-CAM Heatmap")
                axes[sample_count, 1].axis('off')
                
                axes[sample_count, 2].imshow(overlay)
                axes[sample_count, 2].set_title(f"Pred: {class_names[pred_class]}\nConf: {confidence:.2%}")
                axes[sample_count, 2].axis('off')
                
                sample_count += 1
    
    plt.tight_layout()
    plt.show()


# Import cv2 for CAM resizing
import cv2

## Evaluation: Confusion Matrix, Classification Report, and Macro F1-Score

In [None]:
# Updated evaluation function with Macro F1-Score and Grad-CAM
def evaluate_model(model, model_name, data_loader, show_gradcam=True):
    """Evaluate model: precision, recall, weighted F1, macro F1, and confusion matrix"""
    print(f"\n{'='*60}")
    print(f"Evaluating {model_name}")
    print(f"{'='*60}")

    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in tqdm(data_loader, desc="Evaluating"):
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.numpy())

    y_true = np.array(all_labels)
    y_pred = np.array(all_preds)

    # Compute metrics
    accuracy = np.mean(y_pred == y_true)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1_weighted = f1_score(y_true, y_pred, average='weighted')
    f1_macro = f1_score(y_true, y_pred, average='macro')

    print(f"\nOverall Metrics:")
    print(f"Accuracy:           {accuracy:.4f}")
    print(f"Weighted Precision: {precision:.4f}")
    print(f"Weighted Recall:    {recall:.4f}")
    print(f"Weighted F1-Score:  {f1_weighted:.4f}")
    print(f"üéØ Macro F1-Score:  {f1_macro:.4f}")

    # Classification report
    print(f"\nDetailed Classification Report:")
    print(classification_report(y_true, y_pred, target_names=class_names, digits=4))

    # Confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names, yticklabels=class_names,
                cbar_kws={'label': 'Count'})
    plt.title(f'{model_name} - Confusion Matrix', fontsize=14, fontweight='bold')
    plt.xlabel('Predicted Label', fontsize=12)
    plt.ylabel('True Label', fontsize=12)
    plt.tight_layout()
    plt.show()

    # Grad-CAM visualization
    if show_gradcam:
        try:
            visualize_grad_cam(model, data_loader, model_name, num_samples=3)
        except Exception as e:
            print(f"Note: Grad-CAM visualization skipped for {model_name}: {str(e)}")

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_weighted': f1_weighted,
        'f1_macro': f1_macro
    }

# Evaluate all models
metrics_resnet18 = evaluate_model(resnet18_model, "ResNet18", val_loader)
metrics_resnet50 = evaluate_model(resnet50_model, "ResNet50", val_loader)
metrics_efficientnet = evaluate_model(efficientnet_model, "EfficientNetB1", val_loader)
metrics_inception = evaluate_model(inception_model, "InceptionV3", val_loader)
metrics_vit = evaluate_model(vit_model, "ViT-B/16", val_loader)

In [None]:
# Create comprehensive comparison dataframe with Macro F1-Score
comparison_data = {
    'Model': ['ResNet18', 'ResNet50', 'EfficientNetB1', 'InceptionV3', 'ViT-B/16'],
    'Accuracy': [
        metrics_resnet18['accuracy'],
        metrics_resnet50['accuracy'],
        metrics_efficientnet['accuracy'],
        metrics_inception['accuracy'],
        metrics_vit['accuracy']
    ],
    'Precision': [
        metrics_resnet18['precision'],
        metrics_resnet50['precision'],
        metrics_efficientnet['precision'],
        metrics_inception['precision'],
        metrics_vit['precision']
    ],
    'Recall': [
        metrics_resnet18['recall'],
        metrics_resnet50['recall'],
        metrics_efficientnet['recall'],
        metrics_inception['recall'],
        metrics_vit['recall']
    ],
    'Weighted F1': [
        metrics_resnet18['f1_weighted'],
        metrics_resnet50['f1_weighted'],
        metrics_efficientnet['f1_weighted'],
        metrics_inception['f1_weighted'],
        metrics_vit['f1_weighted']
    ],
    'üéØ Macro F1': [
        metrics_resnet18['f1_macro'],
        metrics_resnet50['f1_macro'],
        metrics_efficientnet['f1_macro'],
        metrics_inception['f1_macro'],
        metrics_vit['f1_macro']
    ],
    'Best Val Acc': [
        max(history_resnet18['val_acc']),
        max(history_resnet50['val_acc']),
        max(history_efficientnet['val_acc']),
        max(history_inception['val_acc']),
        max(history_vit['val_acc'])
    ],
    'Final Val Loss': [
        history_resnet18['val_loss'][-1],
        history_resnet50['val_loss'][-1],
        history_efficientnet['val_loss'][-1],
        history_inception['val_loss'][-1],
        history_vit['val_loss'][-1]
    ]
}

comparison_df = pd.DataFrame(comparison_data)

print("\n" + "="*110)
print("FINAL MODEL COMPARISON SUMMARY - ALL 5 ARCHITECTURES")
print("="*110)
print(comparison_df.to_string(index=False))
print("="*110)

# Highlight best models by different metrics
best_accuracy_idx = comparison_df['Accuracy'].idxmax()
best_macro_f1_idx = comparison_df['üéØ Macro F1'].idxmax()

print(f"\nüèÜ Best by Accuracy: {comparison_df.loc[best_accuracy_idx, 'Model']}")
print(f"   Accuracy: {comparison_df.loc[best_accuracy_idx, 'Accuracy']:.4f}")

print(f"\nüéØ Best by Macro F1-Score: {comparison_df.loc[best_macro_f1_idx, 'Model']}")
print(f"   Macro F1: {comparison_df.loc[best_macro_f1_idx, 'üéØ Macro F1']:.4f}")
print(f"   Accuracy: {comparison_df.loc[best_macro_f1_idx, 'Accuracy']:.4f}")

In [None]:
# Visualize all models comparison
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

models = comparison_df['Model'].tolist()
colors = ['#2E86AB', '#A23B72', '#F18F01', '#06A77D', '#D62839']

# Accuracy Comparison
axes[0, 0].bar(models, comparison_df['Accuracy'], color=colors, alpha=0.8, edgecolor='black')
axes[0, 0].set_title('Accuracy Comparison', fontsize=14, fontweight='bold')
axes[0, 0].set_ylabel('Accuracy', fontsize=12)
axes[0, 0].set_ylim([0.8, 1.0])
axes[0, 0].grid(axis='y', alpha=0.3)
for i, v in enumerate(comparison_df['Accuracy']):
    axes[0, 0].text(i, v, f'{v:.3f}', ha='center', va='bottom', fontweight='bold')

# Macro F1-Score Comparison
axes[0, 1].bar(models, comparison_df['üéØ Macro F1'], color=colors, alpha=0.8, edgecolor='black')
axes[0, 1].set_title('üéØ Macro F1-Score Comparison', fontsize=14, fontweight='bold')
axes[0, 1].set_ylabel('Macro F1-Score', fontsize=12)
axes[0, 1].set_ylim([0.8, 1.0])
axes[0, 1].grid(axis='y', alpha=0.3)
for i, v in enumerate(comparison_df['üéØ Macro F1']):
    axes[0, 1].text(i, v, f'{v:.3f}', ha='center', va='bottom', fontweight='bold')

# Weighted vs Macro F1-Score
x = np.arange(len(models))
width = 0.35
axes[1, 0].bar(x - width/2, comparison_df['Weighted F1'], width, label='Weighted F1', color='skyblue', edgecolor='black')
axes[1, 0].bar(x + width/2, comparison_df['üéØ Macro F1'], width, label='Macro F1', color='orange', edgecolor='black')
axes[1, 0].set_title('Weighted vs Macro F1-Score', fontsize=14, fontweight='bold')
axes[1, 0].set_ylabel('F1-Score', fontsize=12)
axes[1, 0].set_xticks(x)
axes[1, 0].set_xticklabels(models, rotation=15)
axes[1, 0].legend()
axes[1, 0].grid(axis='y', alpha=0.3)

# Precision, Recall, F1
x = np.arange(len(models))
width = 0.25
axes[1, 1].bar(x - width, comparison_df['Precision'], width, label='Precision', edgecolor='black')
axes[1, 1].bar(x, comparison_df['Recall'], width, label='Recall', edgecolor='black')
axes[1, 1].bar(x + width, comparison_df['Weighted F1'], width, label='Weighted F1', edgecolor='black')
axes[1, 1].set_title('Precision, Recall, and Weighted F1 Comparison', fontsize=14, fontweight='bold')
axes[1, 1].set_ylabel('Score', fontsize=12)
axes[1, 1].set_xticks(x)
axes[1, 1].set_xticklabels(models, rotation=15)
axes[1, 1].legend()
axes[1, 1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

## Summary: Key Findings

**Models Compared:**
1. **ResNet18** - Lightweight, fast, good for real-time
2. **ResNet50** - Deeper, more expressive than ResNet18
3. **EfficientNetB1** - Parameter-efficient with strong scaling
4. **InceptionV3** - Multi-scale feature extraction
5. **Vision Transformer (ViT)** - State-of-the-art attention-based architecture

**Key Metrics:**
- **Accuracy**: Overall proportion of correct predictions
- **Precision**: Fraction of positive predictions that are correct
- **Recall**: Fraction of actual positives correctly identified
- **Weighted F1-Score**: Harmonic mean of precision and recall (weighted by class support)
- **üéØ Macro F1-Score**: Unweighted average F1 across all classes (treats all classes equally)

**Grad-CAM Visualization:**
Shows which regions the model focuses on for predictions. Red/orange regions are high-attention areas that strongly influence the model's decision.

**Macro F1-Score Importance:**
Essential for imbalanced datasets‚Äîit ensures the model performs well across ALL breeds regardless of sample size per class.

## Performance Visualization and Analysis

In [None]:
# Function to plot training history
def plot_training_history(history, model_name):
    """Plot training and validation accuracy and loss"""
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))

    epochs_range = range(1, len(history['train_acc']) + 1)

    # Accuracy plot
    axes[0].plot(epochs_range, history['train_acc'], label='Training Accuracy', marker='o')
    axes[0].plot(epochs_range, history['val_acc'], label='Validation Accuracy', marker='s')
    axes[0].set_title(f'{model_name} - Accuracy Over Epochs', fontsize=14, fontweight='bold')
    axes[0].set_xlabel('Epoch', fontsize=12)
    axes[0].set_ylabel('Accuracy', fontsize=12)
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)

    # Loss plot
    axes[1].plot(epochs_range, history['train_loss'], label='Training Loss', marker='o')
    axes[1].plot(epochs_range, history['val_loss'], label='Validation Loss', marker='s')
    axes[1].set_title(f'{model_name} - Loss Over Epochs', fontsize=14, fontweight='bold')
    axes[1].set_xlabel('Epoch', fontsize=12)
    axes[1].set_ylabel('Loss', fontsize=12)
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    # Print final metrics
    final_train_acc = history['train_acc'][-1]
    final_val_acc = history['val_acc'][-1]
    final_train_loss = history['train_loss'][-1]
    final_val_loss = history['val_loss'][-1]
    best_val_acc = max(history['val_acc'])

    print(f"\n{model_name} - Final Metrics:")
    print(f"{'='*50}")
    print(f"Final Training Accuracy: {final_train_acc:.4f}")
    print(f"Final Validation Accuracy: {final_val_acc:.4f}")
    print(f"Best Validation Accuracy: {best_val_acc:.4f}")
    print(f"Final Training Loss: {final_train_loss:.4f}")
    print(f"Final Validation Loss: {final_val_loss:.4f}")
    print(f"{'='*50}\n")

### ResNet18 Performance

In [None]:
plot_training_history(history_resnet18, "ResNet18")

### ResNet50 Performance

In [None]:
plot_training_history(history_resnet50, "ResNet50")

### EfficientNetB1 Performance

In [None]:
plot_training_history(history_efficientnet, "EfficientNetB1")

### Comparative Curves

In [None]:
# Compare all models - Training and Validation Accuracy
plt.figure(figsize=(15, 6))

epochs_range_res18 = range(1, len(history_resnet18['train_acc']) + 1)
epochs_range_res50 = range(1, len(history_resnet50['train_acc']) + 1)
epochs_range_eff = range(1, len(history_efficientnet['train_acc']) + 1)

plt.subplot(1, 2, 1)
plt.plot(epochs_range_res18, history_resnet18['train_acc'], label='ResNet18', marker='o', linewidth=2)
plt.plot(epochs_range_res50, history_resnet50['train_acc'], label='ResNet50', marker='s', linewidth=2)
plt.plot(epochs_range_eff, history_efficientnet['train_acc'], label='EfficientNetB1', marker='^', linewidth=2)
plt.title('Training Accuracy Comparison', fontsize=14, fontweight='bold')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.plot(epochs_range_res18, history_resnet18['val_acc'], label='ResNet18', marker='o', linewidth=2)
plt.plot(epochs_range_res50, history_resnet50['val_acc'], label='ResNet50', marker='s', linewidth=2)
plt.plot(epochs_range_eff, history_efficientnet['val_acc'], label='EfficientNetB1', marker='^', linewidth=2)
plt.title('Validation Accuracy Comparison', fontsize=14, fontweight='bold')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Compare all models - Training and Validation Loss
plt.figure(figsize=(15, 6))

plt.subplot(1, 2, 1)
plt.plot(epochs_range_res18, history_resnet18['train_loss'], label='ResNet18', marker='o', linewidth=2)
plt.plot(epochs_range_res50, history_resnet50['train_loss'], label='ResNet50', marker='s', linewidth=2)
plt.plot(epochs_range_eff, history_efficientnet['train_loss'], label='EfficientNetB1', marker='^', linewidth=2)
plt.title('Training Loss Comparison', fontsize=14, fontweight='bold')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.plot(epochs_range_res18, history_resnet18['val_loss'], label='ResNet18', marker='o', linewidth=2)
plt.plot(epochs_range_res50, history_resnet50['val_loss'], label='ResNet50', marker='s', linewidth=2)
plt.plot(epochs_range_eff, history_efficientnet['val_loss'], label='EfficientNetB1', marker='^', linewidth=2)
plt.title('Validation Loss Comparison', fontsize=14, fontweight='bold')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Evaluation: Confusion Matrix and Classification Report

In [None]:
# Function to evaluate model and generate metrics
def evaluate_model(model, model_name, data_loader):
    """Evaluate model and compute precision, recall, F1-score"""
    print(f"\n{'='*60}")
    print(f"Evaluating {model_name}")
    print(f"{'='*60}")

    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in tqdm(data_loader, desc="Evaluating"):
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.numpy())

    y_true = np.array(all_labels)
    y_pred = np.array(all_preds)

    # Compute metrics
    accuracy = np.mean(y_pred == y_true)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')

    print(f"\nOverall Metrics:")
    print(f"Accuracy:  {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"F1-Score:  {f1:.4f}")

    # Classification report
    print(f"\nDetailed Classification Report:")
    print(classification_report(y_true, y_pred, target_names=class_names, digits=4))

    # Confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names, yticklabels=class_names,
                cbar_kws={'label': 'Count'})
    plt.title(f'{model_name} - Confusion Matrix', fontsize=14, fontweight='bold')
    plt.xlabel('Predicted Label', fontsize=12)
    plt.ylabel('True Label', fontsize=12)
    plt.tight_layout()
    plt.show()

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1
    }

# Evaluate models
metrics_resnet18 = evaluate_model(resnet18_model, "ResNet18", val_loader)
metrics_resnet50 = evaluate_model(resnet50_model, "ResNet50", val_loader)
metrics_efficientnet = evaluate_model(efficientnet_model, "EfficientNetB1", val_loader)

# Create comparison dataframe
comparison_data = {
    'Model': ['ResNet18', 'ResNet50', 'EfficientNetB1'],
    'Accuracy': [metrics_resnet18['accuracy'], metrics_resnet50['accuracy'], metrics_efficientnet['accuracy']],
    'Precision': [metrics_resnet18['precision'], metrics_resnet50['precision'], metrics_efficientnet['precision']],
    'Recall': [metrics_resnet18['recall'], metrics_resnet50['recall'], metrics_efficientnet['recall']],
    'F1-Score': [metrics_resnet18['f1_score'], metrics_resnet50['f1_score'], metrics_efficientnet['f1_score']],
    'Best Val Accuracy': [max(history_resnet18['val_acc']), max(history_resnet50['val_acc']), max(history_efficientnet['val_acc'])],
    'Final Val Loss': [history_resnet18['val_loss'][-1], history_resnet50['val_loss'][-1], history_efficientnet['val_loss'][-1]]
}

comparison_df = pd.DataFrame(comparison_data)

print("\n" + "="*90)
print("FINAL MODEL COMPARISON SUMMARY")
print("="*90)
print(comparison_df.to_string(index=False))
print("="*90)

# Highlight best model
best_model_idx = comparison_df['Accuracy'].idxmax()
best_model = comparison_df.loc[best_model_idx, 'Model']
print(f"\nüèÜ Best Performing Model: {best_model}")
print(f"   Accuracy: {comparison_df.loc[best_model_idx, 'Accuracy']:.4f}")
print(f"   F1-Score: {comparison_df.loc[best_model_idx, 'F1-Score']:.4f}")

## Next Steps / Usage
- Run cells top-to-bottom to download data, prepare loaders, and train all three models.
- If GPU memory is limited, reduce `BATCH_SIZE` or unfreeze only part of the backbones.
- You can skip training some models by commenting their training calls.
- Adjust `EPOCHS`, `LEARNING_RATE`, or augmentation if over/underfitting is observed.

### EfficientNetB1 Performance