In [1]:
import os
import shutil
import numpy as np
from PIL import Image, ImageStat
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

Data Cleaning

In [3]:
DATASET_DIR = os.path.join(os.path.expanduser("~"), "Documents", "animals")
TARGET_SIZE = (224, 224)

BAD_IMG_DIR = os.path.join(os.path.expanduser("~"), "Documents", "bad_animal_images")
os.makedirs(BAD_IMG_DIR, exist_ok=True)

# Function to get all image file paths recursively
def get_all_image_files(root_dir):
    image_files = []
    for subdir, _, files in os.walk(root_dir):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
                image_files.append(os.path.join(subdir, file))
    return image_files

image_files = get_all_image_files(DATASET_DIR)
print(f"Total images found recursively: {len(image_files)}")
print("First few images:", image_files[:5])

# Check for corrupted images, moving them to bad_images
def check_and_move_corrupted_images(image_paths):
    corrupted = []
    for file_path in image_paths:
        try:
            with Image.open(file_path) as img:
                img.verify()
        except Exception:
            corrupted.append(file_path)
            # Move to bad_images folder; preserve folder structure or rename to avoid collisions
            dest_path = os.path.join(BAD_IMG_DIR, os.path.basename(file_path))
            shutil.move(file_path, dest_path)
            print(f"Moved corrupted image: {file_path}")
    print(f"Total corrupted images moved: {len(corrupted)}")
    return corrupted

corrupted_images = check_and_move_corrupted_images(image_files)

# After removing corrupted images, update image list again
image_files = [f for f in image_files if f not in corrupted_images]

# Low quality detection
def is_low_quality(image, brightness_thresh=20, contrast_thresh=15):
    stat = ImageStat.Stat(image.convert('L'))
    brightness = stat.mean[0]
    contrast = stat.stddev[0]
    return brightness < brightness_thresh or contrast < contrast_thresh

def move_low_quality_images(image_paths):
    low_quality = []
    for file_path in image_paths:
        try:
            with Image.open(file_path) as img:
                if is_low_quality(img):
                    low_quality.append(file_path)
                    dest_path = os.path.join(BAD_IMG_DIR, os.path.basename(file_path))
                    shutil.move(file_path, dest_path)
                    print(f"Moved low-quality image: {file_path}")
        except Exception as e:
            print(f"Error processing {file_path}: {e}")
    print(f"Total low-quality images moved: {len(low_quality)}")
    return low_quality

low_quality_images = move_low_quality_images(image_files)

# Update image list after moving low-quality images
image_files = [f for f in image_files if f not in low_quality_images]

# Resize images
def resize_images(image_paths, target_size=TARGET_SIZE):
    resized_count = 0
    for file_path in image_paths:
        try:
            with Image.open(file_path) as img:
                if img.size != target_size:
                    img = img.resize(target_size)
                    img.save(file_path)
                    resized_count += 1
                    print(f"Resized image: {file_path}")
        except Exception as e:
            print(f"Error resizing {file_path}: {e}")
    print(f"Total images resized: {resized_count}")

resize_images(image_files)

# Label folder check (for immediate subfolders only)
def check_label_folders(base_folder):
    for label in os.listdir(base_folder):
        label_path = os.path.join(base_folder, label)
        if os.path.isdir(label_path):
            images = get_all_image_files(label_path)
            if len(images) == 0:
                print(f"Warning: Label folder '{label}' contains no images.")
        else:
            # Handle files directly inside base folder if any
            pass

check_label_folders(DATASET_DIR)

Total images found recursively: 5400
First few images: ['C:\\Users\\adren\\Documents\\animals\\antelope\\02f4b3be2d.jpg', 'C:\\Users\\adren\\Documents\\animals\\antelope\\03d7fc0888.jpg', 'C:\\Users\\adren\\Documents\\animals\\antelope\\058fa9a60f.jpg', 'C:\\Users\\adren\\Documents\\animals\\antelope\\0a37838e99.jpg', 'C:\\Users\\adren\\Documents\\animals\\antelope\\0b1a3af197.jpg']
Total corrupted images moved: 0
Moved low-quality image: C:\Users\adren\Documents\animals\bat\0315a4b6b7.jpg
Moved low-quality image: C:\Users\adren\Documents\animals\bat\3d1dda7723.jpg
Moved low-quality image: C:\Users\adren\Documents\animals\bat\4c23df5b08.jpg
Moved low-quality image: C:\Users\adren\Documents\animals\bat\557b9d9df6.jpg
Moved low-quality image: C:\Users\adren\Documents\animals\bat\68b39cc449.jpg
Moved low-quality image: C:\Users\adren\Documents\animals\bat\74d55d6d41.jpg
Moved low-quality image: C:\Users\adren\Documents\animals\bat\82a9875c4a.jpg
Moved low-quality image: C:\Users\adren\Doc

Data Segmentation and Augmentation

In [4]:
# Augmentation pipeline for training
train_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

# Transformation pipeline for validation and test
val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

# Dataset class
class AnimalDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None, label_to_idx=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

        # Use externally provided label-to-index mapping
        if label_to_idx is None:
            self.label_to_idx = {label: idx for idx, label in enumerate(sorted(set(labels)))}
        else:
            self.label_to_idx = label_to_idx

        self.targets = [self.label_to_idx[label] for label in labels]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.targets[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

# Constants
DATASET_DIR = os.path.join(os.path.expanduser("~"), "Documents", "animals")
K_FOLDS = 5

# Gather image paths and labels
image_paths, labels = [], []

for label in sorted(os.listdir(DATASET_DIR)):
    class_dir = os.path.join(DATASET_DIR, label)
    if os.path.isdir(class_dir):
        for fname in os.listdir(class_dir):
            if fname.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
                image_paths.append(os.path.join(class_dir, fname))
                labels.append(label)

image_paths = np.array(image_paths)
labels = np.array(labels)

print(f"Total images: {len(image_paths)}, Total classes: {len(set(labels))}")

# Consistent label mapping
unique_labels = sorted(set(labels))
label_to_idx = {label: idx for idx, label in enumerate(unique_labels)}

# Stratified K-Fold Cross Validation
skf = StratifiedKFold(n_splits=K_FOLDS, shuffle=True, random_state=42)

for fold, (train_idx, val_idx) in enumerate(skf.split(image_paths, labels), 1):
    train_images, val_images = image_paths[train_idx], image_paths[val_idx]
    train_labels, val_labels = labels[train_idx], labels[val_idx]

    train_dataset = AnimalDataset(train_images, train_labels, transform=train_transform, label_to_idx=label_to_idx)
    val_dataset = AnimalDataset(val_images, val_labels, transform=val_test_transform, label_to_idx=label_to_idx)

    print(f"\nFold {fold}")
    print(f"  Train size: {len(train_images)}, Test size: {len(val_images)}")
    print(f"  Train class distribution: {dict(zip(*np.unique(train_labels, return_counts=True)))}")
    print(f"  Test class distribution: {dict(zip(*np.unique(val_labels, return_counts=True)))}")


Total images: 5359, Total classes: 90

Fold 1
  Train size: 4287, Test size: 1072
  Train class distribution: {np.str_('antelope'): np.int64(48), np.str_('badger'): np.int64(48), np.str_('bat'): np.int64(40), np.str_('bear'): np.int64(48), np.str_('bee'): np.int64(48), np.str_('beetle'): np.int64(47), np.str_('bison'): np.int64(48), np.str_('boar'): np.int64(48), np.str_('butterfly'): np.int64(48), np.str_('cat'): np.int64(48), np.str_('caterpillar'): np.int64(47), np.str_('chimpanzee'): np.int64(48), np.str_('cockroach'): np.int64(48), np.str_('cow'): np.int64(48), np.str_('coyote'): np.int64(48), np.str_('crab'): np.int64(48), np.str_('crow'): np.int64(48), np.str_('deer'): np.int64(48), np.str_('dog'): np.int64(48), np.str_('dolphin'): np.int64(48), np.str_('donkey'): np.int64(48), np.str_('dragonfly'): np.int64(47), np.str_('duck'): np.int64(48), np.str_('eagle'): np.int64(48), np.str_('elephant'): np.int64(48), np.str_('flamingo'): np.int64(48), np.str_('fly'): np.int64(48), np.st

In [5]:
if torch.cuda.is_available():
    device = torch.device("cuda")
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

Using device: cuda


Model Training (ResNet50)

In [6]:
NUM_EPOCHS = 10
BATCH_SIZE = 32
NUM_WORKERS = 0
NUM_CLASSES = len(set(labels))

# Store metrics per fold
resnet_fold_metrics = {}

def train_one_fold_resnet(fold, train_images, train_labels, val_images, val_labels):
    # Dataset & Dataloaders
    train_dataset = AnimalDataset(train_images, train_labels, transform=train_transform, label_to_idx=label_to_idx)
    val_dataset = AnimalDataset(val_images, val_labels, transform=val_test_transform, label_to_idx=label_to_idx)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

    # Model
    model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
    model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)
    model = model.to(device)

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001)

    # Metric Tracking
    train_losses, train_accuracies, val_accuracies = [], [], []
    all_val_preds, all_val_targets = [], []

    for epoch in range(NUM_EPOCHS):
        # ----- Training -----
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        for images, labels in tqdm(train_loader, desc=f"[Fold {fold}] Epoch {epoch+1}/{NUM_EPOCHS} - Training"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_accuracy = 100 * correct / total
        train_losses.append(running_loss)
        train_accuracies.append(train_accuracy)
        print(f"[Fold {fold}] Epoch {epoch+1}, Loss: {running_loss:.4f}, Accuracy: {train_accuracy:.2f}%")

        # ----- Validation -----
        model.eval()
        correct, total = 0, 0

        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc=f"[Fold {fold}] Epoch {epoch+1} - Validation"):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)

                all_val_preds.extend(predicted.cpu().numpy())
                all_val_targets.extend(labels.cpu().numpy())

                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_accuracy = 100 * correct / total
        val_accuracies.append(val_accuracy)
        print(f"[Fold {fold}] Epoch {epoch+1}, Val Accuracy: {val_accuracy:.2f}%")

    # Save model
    torch.save(model.state_dict(), f"resnet50_fold{fold}.pth")
    print(f"Saved model: resnet50_fold{fold}.pth")

    return {
        "model": model,
        "train_losses": train_losses,
        "train_accuracies": train_accuracies,
        "val_accuracies": val_accuracies,
        "val_preds": all_val_preds,
        "val_targets": all_val_targets,
    }

# Begin Training Across All Folds
for fold, (train_idx, val_idx) in enumerate(skf.split(image_paths, labels), 1):
    train_images, val_images = image_paths[train_idx], image_paths[val_idx]
    train_labels, val_labels = labels[train_idx], labels[val_idx]

    print(f"\n========== Training Fold {fold} ==========")
    result = train_one_fold_resnet(fold, train_images, train_labels, val_images, val_labels)

    resnet_fold_metrics[fold] = {
        "train_losses": result["train_losses"],
        "train_accuracies": result["train_accuracies"],
        "val_accuracies": result["val_accuracies"],
        "val_preds": result["val_preds"],
        "val_targets": result["val_targets"],
    }

# Save metrics for each fold
    with open(f"resnet_metrics_fold{fold}.pkl", "wb") as f:
        pickle.dump(resnet_fold_metrics[fold], f)
        print(f"Saved Fold {fold} metrics to resnet_metrics_fold{fold}.pkl")

# Identify Best Fold
best_fold = max(resnet_fold_metrics.items(), key=lambda x: max(x[1]['val_accuracies']))[0]
print(f"Best fold: Fold {best_fold}")
print(f"Highest Val Accuracy: {max(resnet_fold_metrics[best_fold]['val_accuracies']):.2f}%")

# Average accuracy over all folds
mean_test_accuracy = np.mean([max(resnet_fold_metrics[f]['val_accuracies']) for f in resnet_fold_metrics])
print(f"Mean Test Accuracy across all folds: {mean_test_accuracy:.2f}%")

# Save all fold metrics to a single file
with open("resnet_all_fold_metrics.pkl", "wb") as f:
    pickle.dump(resnet_fold_metrics, f)
    print("Saved all fold metrics to resnet_all_fold_metrics.pkl")





[Fold 1] Epoch 1/10 - Training: 100%|██████████| 134/134 [00:51<00:00,  2.58it/s]


[Fold 1] Epoch 1, Loss: 471.9495, Accuracy: 30.14%


[Fold 1] Epoch 1 - Validation: 100%|██████████| 34/34 [00:06<00:00,  5.08it/s]


[Fold 1] Epoch 1, Val Accuracy: 78.08%


[Fold 1] Epoch 2/10 - Training: 100%|██████████| 134/134 [00:29<00:00,  4.47it/s]


[Fold 1] Epoch 2, Loss: 191.2559, Accuracy: 69.40%


[Fold 1] Epoch 2 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.69it/s]


[Fold 1] Epoch 2, Val Accuracy: 84.98%


[Fold 1] Epoch 3/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.46it/s]


[Fold 1] Epoch 3, Loss: 124.0488, Accuracy: 77.61%


[Fold 1] Epoch 3 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.47it/s]


[Fold 1] Epoch 3, Val Accuracy: 89.37%


[Fold 1] Epoch 4/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.45it/s]


[Fold 1] Epoch 4, Loss: 97.7903, Accuracy: 82.18%


[Fold 1] Epoch 4 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.53it/s]


[Fold 1] Epoch 4, Val Accuracy: 89.46%


[Fold 1] Epoch 5/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.45it/s]


[Fold 1] Epoch 5, Loss: 82.3140, Accuracy: 84.86%


[Fold 1] Epoch 5 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.37it/s]


[Fold 1] Epoch 5, Val Accuracy: 91.32%


[Fold 1] Epoch 6/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.46it/s]


[Fold 1] Epoch 6, Loss: 72.7604, Accuracy: 86.82%


[Fold 1] Epoch 6 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.64it/s]


[Fold 1] Epoch 6, Val Accuracy: 91.79%


[Fold 1] Epoch 7/10 - Training: 100%|██████████| 134/134 [00:29<00:00,  4.48it/s]


[Fold 1] Epoch 7, Loss: 65.7897, Accuracy: 88.13%


[Fold 1] Epoch 7 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.35it/s]


[Fold 1] Epoch 7, Val Accuracy: 91.79%


[Fold 1] Epoch 8/10 - Training: 100%|██████████| 134/134 [00:29<00:00,  4.47it/s]


[Fold 1] Epoch 8, Loss: 57.6546, Accuracy: 89.57%


[Fold 1] Epoch 8 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.56it/s]


[Fold 1] Epoch 8, Val Accuracy: 93.28%


[Fold 1] Epoch 9/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.43it/s]


[Fold 1] Epoch 9, Loss: 58.7510, Accuracy: 88.78%


[Fold 1] Epoch 9 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.43it/s]


[Fold 1] Epoch 9, Val Accuracy: 91.04%


[Fold 1] Epoch 10/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.46it/s]


[Fold 1] Epoch 10, Loss: 54.3428, Accuracy: 89.74%


[Fold 1] Epoch 10 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.17it/s]


[Fold 1] Epoch 10, Val Accuracy: 91.60%
Saved model: resnet50_fold1.pth
Saved Fold 1 metrics to resnet_metrics_fold1.pkl



[Fold 2] Epoch 1/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.41it/s]


[Fold 2] Epoch 1, Loss: 478.0251, Accuracy: 27.25%


[Fold 2] Epoch 1 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.52it/s]


[Fold 2] Epoch 1, Val Accuracy: 71.36%


[Fold 2] Epoch 2/10 - Training: 100%|██████████| 134/134 [00:29<00:00,  4.49it/s]


[Fold 2] Epoch 2, Loss: 200.6409, Accuracy: 68.14%


[Fold 2] Epoch 2 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.53it/s]


[Fold 2] Epoch 2, Val Accuracy: 85.35%


[Fold 2] Epoch 3/10 - Training: 100%|██████████| 134/134 [00:29<00:00,  4.47it/s]


[Fold 2] Epoch 3, Loss: 119.7346, Accuracy: 79.47%


[Fold 2] Epoch 3 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.69it/s]


[Fold 2] Epoch 3, Val Accuracy: 87.22%


[Fold 2] Epoch 4/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.46it/s]


[Fold 2] Epoch 4, Loss: 94.4192, Accuracy: 83.44%


[Fold 2] Epoch 4 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.63it/s]


[Fold 2] Epoch 4, Val Accuracy: 89.37%


[Fold 2] Epoch 5/10 - Training: 100%|██████████| 134/134 [00:29<00:00,  4.47it/s]


[Fold 2] Epoch 5, Loss: 81.8328, Accuracy: 85.19%


[Fold 2] Epoch 5 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.53it/s]


[Fold 2] Epoch 5, Val Accuracy: 90.58%


[Fold 2] Epoch 6/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.44it/s]


[Fold 2] Epoch 6, Loss: 73.7091, Accuracy: 86.40%


[Fold 2] Epoch 6 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.51it/s]


[Fold 2] Epoch 6, Val Accuracy: 90.02%


[Fold 2] Epoch 7/10 - Training: 100%|██████████| 134/134 [00:29<00:00,  4.47it/s]


[Fold 2] Epoch 7, Loss: 65.0859, Accuracy: 88.22%


[Fold 2] Epoch 7 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.35it/s]


[Fold 2] Epoch 7, Val Accuracy: 89.46%


[Fold 2] Epoch 8/10 - Training: 100%|██████████| 134/134 [00:29<00:00,  4.47it/s]


[Fold 2] Epoch 8, Loss: 64.4863, Accuracy: 87.99%


[Fold 2] Epoch 8 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.56it/s]


[Fold 2] Epoch 8, Val Accuracy: 89.46%


[Fold 2] Epoch 9/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.46it/s]


[Fold 2] Epoch 9, Loss: 53.0502, Accuracy: 90.16%


[Fold 2] Epoch 9 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.52it/s]


[Fold 2] Epoch 9, Val Accuracy: 90.67%


[Fold 2] Epoch 10/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.46it/s]


[Fold 2] Epoch 10, Loss: 52.1738, Accuracy: 89.97%


[Fold 2] Epoch 10 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.47it/s]


[Fold 2] Epoch 10, Val Accuracy: 90.58%
Saved model: resnet50_fold2.pth
Saved Fold 2 metrics to resnet_metrics_fold2.pkl



[Fold 3] Epoch 1/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.41it/s]


[Fold 3] Epoch 1, Loss: 468.8113, Accuracy: 31.23%


[Fold 3] Epoch 1 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.47it/s]


[Fold 3] Epoch 1, Val Accuracy: 78.17%


[Fold 3] Epoch 2/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.46it/s]


[Fold 3] Epoch 2, Loss: 195.4142, Accuracy: 68.58%


[Fold 3] Epoch 2 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.70it/s]


[Fold 3] Epoch 2, Val Accuracy: 88.06%


[Fold 3] Epoch 3/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.46it/s]


[Fold 3] Epoch 3, Loss: 126.3342, Accuracy: 78.10%


[Fold 3] Epoch 3 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.68it/s]


[Fold 3] Epoch 3, Val Accuracy: 89.27%


[Fold 3] Epoch 4/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.46it/s]


[Fold 3] Epoch 4, Loss: 95.1814, Accuracy: 82.58%


[Fold 3] Epoch 4 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.47it/s]


[Fold 3] Epoch 4, Val Accuracy: 90.30%


[Fold 3] Epoch 5/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.46it/s]


[Fold 3] Epoch 5, Loss: 83.8368, Accuracy: 85.28%


[Fold 3] Epoch 5 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.55it/s]


[Fold 3] Epoch 5, Val Accuracy: 92.54%


[Fold 3] Epoch 6/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.45it/s]


[Fold 3] Epoch 6, Loss: 74.3206, Accuracy: 85.98%


[Fold 3] Epoch 6 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.49it/s]


[Fold 3] Epoch 6, Val Accuracy: 91.23%


[Fold 3] Epoch 7/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.38it/s]


[Fold 3] Epoch 7, Loss: 62.3754, Accuracy: 89.11%


[Fold 3] Epoch 7 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.37it/s]


[Fold 3] Epoch 7, Val Accuracy: 91.88%


[Fold 3] Epoch 8/10 - Training: 100%|██████████| 134/134 [00:29<00:00,  4.47it/s]


[Fold 3] Epoch 8, Loss: 57.9103, Accuracy: 89.39%


[Fold 3] Epoch 8 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.52it/s]


[Fold 3] Epoch 8, Val Accuracy: 92.82%


[Fold 3] Epoch 9/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.46it/s]


[Fold 3] Epoch 9, Loss: 59.6319, Accuracy: 88.92%


[Fold 3] Epoch 9 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.49it/s]


[Fold 3] Epoch 9, Val Accuracy: 91.70%


[Fold 3] Epoch 10/10 - Training: 100%|██████████| 134/134 [00:29<00:00,  4.47it/s]


[Fold 3] Epoch 10, Loss: 53.9694, Accuracy: 89.74%


[Fold 3] Epoch 10 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.48it/s]


[Fold 3] Epoch 10, Val Accuracy: 91.70%
Saved model: resnet50_fold3.pth
Saved Fold 3 metrics to resnet_metrics_fold3.pkl



[Fold 4] Epoch 1/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.46it/s]


[Fold 4] Epoch 1, Loss: 475.7403, Accuracy: 30.16%


[Fold 4] Epoch 1 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.66it/s]


[Fold 4] Epoch 1, Val Accuracy: 76.59%


[Fold 4] Epoch 2/10 - Training: 100%|██████████| 134/134 [00:29<00:00,  4.47it/s]


[Fold 4] Epoch 2, Loss: 198.8632, Accuracy: 68.02%


[Fold 4] Epoch 2 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.50it/s]


[Fold 4] Epoch 2, Val Accuracy: 87.69%


[Fold 4] Epoch 3/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.45it/s]


[Fold 4] Epoch 3, Loss: 124.6255, Accuracy: 78.56%


[Fold 4] Epoch 3 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.28it/s]


[Fold 4] Epoch 3, Val Accuracy: 88.53%


[Fold 4] Epoch 4/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.47it/s]


[Fold 4] Epoch 4, Loss: 97.4335, Accuracy: 82.25%


[Fold 4] Epoch 4 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.73it/s]


[Fold 4] Epoch 4, Val Accuracy: 90.49%


[Fold 4] Epoch 5/10 - Training: 100%|██████████| 134/134 [00:29<00:00,  4.47it/s]


[Fold 4] Epoch 5, Loss: 81.8436, Accuracy: 85.54%


[Fold 4] Epoch 5 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.72it/s]


[Fold 4] Epoch 5, Val Accuracy: 91.32%


[Fold 4] Epoch 6/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.46it/s]


[Fold 4] Epoch 6, Loss: 71.6972, Accuracy: 87.03%


[Fold 4] Epoch 6 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.65it/s]


[Fold 4] Epoch 6, Val Accuracy: 89.74%


[Fold 4] Epoch 7/10 - Training: 100%|██████████| 134/134 [00:29<00:00,  4.47it/s]


[Fold 4] Epoch 7, Loss: 68.8878, Accuracy: 87.19%


[Fold 4] Epoch 7 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.59it/s]


[Fold 4] Epoch 7, Val Accuracy: 91.60%


[Fold 4] Epoch 8/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.45it/s]


[Fold 4] Epoch 8, Loss: 59.0399, Accuracy: 89.22%


[Fold 4] Epoch 8 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.47it/s]


[Fold 4] Epoch 8, Val Accuracy: 91.51%


[Fold 4] Epoch 9/10 - Training: 100%|██████████| 134/134 [00:29<00:00,  4.48it/s]


[Fold 4] Epoch 9, Loss: 57.4575, Accuracy: 89.22%


[Fold 4] Epoch 9 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.53it/s]


[Fold 4] Epoch 9, Val Accuracy: 92.07%


[Fold 4] Epoch 10/10 - Training: 100%|██████████| 134/134 [00:29<00:00,  4.47it/s]


[Fold 4] Epoch 10, Loss: 55.1940, Accuracy: 89.83%


[Fold 4] Epoch 10 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.54it/s]


[Fold 4] Epoch 10, Val Accuracy: 91.98%
Saved model: resnet50_fold4.pth
Saved Fold 4 metrics to resnet_metrics_fold4.pkl



[Fold 5] Epoch 1/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.46it/s]


[Fold 5] Epoch 1, Loss: 469.8324, Accuracy: 29.22%


[Fold 5] Epoch 1 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.59it/s]


[Fold 5] Epoch 1, Val Accuracy: 73.67%


[Fold 5] Epoch 2/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.46it/s]


[Fold 5] Epoch 2, Loss: 194.4187, Accuracy: 69.45%


[Fold 5] Epoch 2 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.56it/s]


[Fold 5] Epoch 2, Val Accuracy: 84.13%


[Fold 5] Epoch 3/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.46it/s]


[Fold 5] Epoch 3, Loss: 120.9467, Accuracy: 79.52%


[Fold 5] Epoch 3 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.61it/s]


[Fold 5] Epoch 3, Val Accuracy: 88.05%


[Fold 5] Epoch 4/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.43it/s]


[Fold 5] Epoch 4, Loss: 93.9723, Accuracy: 82.74%


[Fold 5] Epoch 4 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.57it/s]


[Fold 5] Epoch 4, Val Accuracy: 88.52%


[Fold 5] Epoch 5/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.46it/s]


[Fold 5] Epoch 5, Loss: 81.0821, Accuracy: 85.12%


[Fold 5] Epoch 5 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.63it/s]


[Fold 5] Epoch 5, Val Accuracy: 90.10%


[Fold 5] Epoch 6/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.43it/s]


[Fold 5] Epoch 6, Loss: 68.5640, Accuracy: 87.64%


[Fold 5] Epoch 6 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.65it/s]


[Fold 5] Epoch 6, Val Accuracy: 89.92%


[Fold 5] Epoch 7/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.43it/s]


[Fold 5] Epoch 7, Loss: 67.3280, Accuracy: 86.89%


[Fold 5] Epoch 7 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.54it/s]


[Fold 5] Epoch 7, Val Accuracy: 90.01%


[Fold 5] Epoch 8/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.42it/s]


[Fold 5] Epoch 8, Loss: 58.4324, Accuracy: 88.76%


[Fold 5] Epoch 8 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.64it/s]


[Fold 5] Epoch 8, Val Accuracy: 90.10%


[Fold 5] Epoch 9/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.46it/s]


[Fold 5] Epoch 9, Loss: 53.8180, Accuracy: 90.04%


[Fold 5] Epoch 9 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.56it/s]


[Fold 5] Epoch 9, Val Accuracy: 89.92%


[Fold 5] Epoch 10/10 - Training: 100%|██████████| 134/134 [00:30<00:00,  4.40it/s]


[Fold 5] Epoch 10, Loss: 59.3325, Accuracy: 89.18%


[Fold 5] Epoch 10 - Validation: 100%|██████████| 34/34 [00:02<00:00, 14.00it/s]


[Fold 5] Epoch 10, Val Accuracy: 91.04%
Saved model: resnet50_fold5.pth
Saved Fold 5 metrics to resnet_metrics_fold5.pkl
Best fold: Fold 1
Highest Val Accuracy: 93.28%
Mean Test Accuracy across all folds: 91.98%
Saved all fold metrics to resnet_all_fold_metrics.pkl


Model Training (VGG16)

In [7]:
NUM_EPOCHS = 10
BATCH_SIZE = 32
NUM_WORKERS = 0
NUM_CLASSES = len(set(labels))

# Store metrics per fold
vgg_fold_metrics = {}

def train_one_fold_vgg(fold, train_images, train_labels, val_images, val_labels):
    # Dataset & Dataloaders
    train_dataset = AnimalDataset(train_images, train_labels, transform=train_transform, label_to_idx=label_to_idx)
    val_dataset = AnimalDataset(val_images, val_labels, transform=val_test_transform, label_to_idx=label_to_idx)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

    # Model (VGG16)
    model = models.vgg16(weights=models.VGG16_Weights.DEFAULT)
    model.classifier[6] = nn.Linear(model.classifier[6].in_features, NUM_CLASSES)
    model = model.to(device)

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001)

    # Metric Tracking
    train_losses, train_accuracies, val_accuracies = [], [], []
    all_val_preds, all_val_targets = [], []

    for epoch in range(NUM_EPOCHS):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        for images, labels in tqdm(train_loader, desc=f"[VGG Fold {fold}] Epoch {epoch+1}/{NUM_EPOCHS} - Training"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_accuracy = 100 * correct / total
        train_losses.append(running_loss)
        train_accuracies.append(train_accuracy)
        print(f"[VGG Fold {fold}] Epoch {epoch+1}, Loss: {running_loss:.4f}, Accuracy: {train_accuracy:.2f}%")

        # Validation
        model.eval()
        correct, total = 0, 0

        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc=f"[VGG Fold {fold}] Epoch {epoch+1} - Validation"):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)

                all_val_preds.extend(predicted.cpu().numpy())
                all_val_targets.extend(labels.cpu().numpy())

                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_accuracy = 100 * correct / total
        val_accuracies.append(val_accuracy)
        print(f"[VGG Fold {fold}] Epoch {epoch+1}, Val Accuracy: {val_accuracy:.2f}%")

    # Save model
    torch.save(model.state_dict(), f"vgg16_fold{fold}.pth")
    print(f"Saved VGG16 model: vgg16_fold{fold}.pth")

    return {
        "train_losses": train_losses,
        "train_accuracies": train_accuracies,
        "val_accuracies": val_accuracies,
        "val_preds": all_val_preds,
        "val_targets": all_val_targets,
    }

for fold, (train_idx, val_idx) in enumerate(skf.split(image_paths, labels), 1):
    train_images, val_images = image_paths[train_idx], image_paths[val_idx]
    train_labels, val_labels = labels[train_idx], labels[val_idx]

    print(f"\n========== Training VGG16 Fold {fold} ==========")
    result = train_one_fold_vgg(fold, train_images, train_labels, val_images, val_labels)

    vgg_fold_metrics[fold] = {
        "train_losses": result["train_losses"],
        "train_accuracies": result["train_accuracies"],
        "val_accuracies": result["val_accuracies"],
        "val_preds": result["val_preds"],
        "val_targets": result["val_targets"],
    }

    # Save per fold
    with open(f"vgg16_metrics_fold{fold}.pkl", "wb") as f:
        pickle.dump(vgg_fold_metrics[fold], f)
        print(f"Saved VGG16 Fold {fold} metrics to vgg16_metrics_fold{fold}.pkl")

# Save all folds
with open("vgg16_all_fold_metrics.pkl", "wb") as f:
    pickle.dump(vgg_fold_metrics, f)
    print("Saved all VGG16 fold metrics to vgg16_all_fold_metrics.pkl")

# Best VGG Fold
best_vgg_fold = max(vgg_fold_metrics.items(), key=lambda x: max(x[1]['val_accuracies']))[0]
print(f"Best VGG Fold: {best_vgg_fold}")
print(f"Highest VGG Val Accuracy: {max(vgg_fold_metrics[best_vgg_fold]['val_accuracies']):.2f}%")

# Average Accuracy
mean_vgg_acc = np.mean([max(vgg_fold_metrics[f]['val_accuracies']) for f in vgg_fold_metrics])
print(f"Mean VGG16 Test Accuracy across folds: {mean_vgg_acc:.2f}%")



Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to C:\Users\adren/.cache\torch\hub\checkpoints\vgg16-397923af.pth


100%|██████████| 528M/528M [00:11<00:00, 48.7MB/s] 
[VGG Fold 1] Epoch 1/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.89it/s]


[VGG Fold 1] Epoch 1, Loss: 547.6220, Accuracy: 7.23%


[VGG Fold 1] Epoch 1 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.58it/s]


[VGG Fold 1] Epoch 1, Val Accuracy: 32.37%


[VGG Fold 1] Epoch 2/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.90it/s]


[VGG Fold 1] Epoch 2, Loss: 355.9817, Accuracy: 32.24%


[VGG Fold 1] Epoch 2 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.48it/s]


[VGG Fold 1] Epoch 2, Val Accuracy: 57.93%


[VGG Fold 1] Epoch 3/10 - Training: 100%|██████████| 134/134 [00:45<00:00,  2.91it/s]


[VGG Fold 1] Epoch 3, Loss: 261.0118, Accuracy: 48.52%


[VGG Fold 1] Epoch 3 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.55it/s]


[VGG Fold 1] Epoch 3, Val Accuracy: 68.56%


[VGG Fold 1] Epoch 4/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.91it/s]


[VGG Fold 1] Epoch 4, Loss: 205.5741, Accuracy: 58.60%


[VGG Fold 1] Epoch 4 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.56it/s]


[VGG Fold 1] Epoch 4, Val Accuracy: 73.41%


[VGG Fold 1] Epoch 5/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.91it/s]


[VGG Fold 1] Epoch 5, Loss: 183.8447, Accuracy: 62.82%


[VGG Fold 1] Epoch 5 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.57it/s]


[VGG Fold 1] Epoch 5, Val Accuracy: 73.32%


[VGG Fold 1] Epoch 6/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.91it/s]


[VGG Fold 1] Epoch 6, Loss: 174.3591, Accuracy: 65.41%


[VGG Fold 1] Epoch 6 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.56it/s]


[VGG Fold 1] Epoch 6, Val Accuracy: 73.13%


[VGG Fold 1] Epoch 7/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.91it/s]


[VGG Fold 1] Epoch 7, Loss: 151.6324, Accuracy: 68.60%


[VGG Fold 1] Epoch 7 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.61it/s]


[VGG Fold 1] Epoch 7, Val Accuracy: 76.68%


[VGG Fold 1] Epoch 8/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.89it/s]


[VGG Fold 1] Epoch 8, Loss: 147.3678, Accuracy: 70.96%


[VGG Fold 1] Epoch 8 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.12it/s]


[VGG Fold 1] Epoch 8, Val Accuracy: 77.71%


[VGG Fold 1] Epoch 9/10 - Training: 100%|██████████| 134/134 [00:49<00:00,  2.68it/s]


[VGG Fold 1] Epoch 9, Loss: 135.0007, Accuracy: 72.73%


[VGG Fold 1] Epoch 9 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.25it/s]


[VGG Fold 1] Epoch 9, Val Accuracy: 76.59%


[VGG Fold 1] Epoch 10/10 - Training: 100%|██████████| 134/134 [00:48<00:00,  2.79it/s]


[VGG Fold 1] Epoch 10, Loss: 127.8735, Accuracy: 74.46%


[VGG Fold 1] Epoch 10 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.15it/s]


[VGG Fold 1] Epoch 10, Val Accuracy: 77.24%
Saved VGG16 model: vgg16_fold1.pth
Saved VGG16 Fold 1 metrics to vgg16_metrics_fold1.pkl



[VGG Fold 2] Epoch 1/10 - Training: 100%|██████████| 134/134 [00:48<00:00,  2.77it/s]


[VGG Fold 2] Epoch 1, Loss: 581.4632, Accuracy: 3.62%


[VGG Fold 2] Epoch 1 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.24it/s]


[VGG Fold 2] Epoch 1, Val Accuracy: 15.30%


[VGG Fold 2] Epoch 2/10 - Training: 100%|██████████| 134/134 [00:48<00:00,  2.78it/s]


[VGG Fold 2] Epoch 2, Loss: 416.4379, Accuracy: 23.96%


[VGG Fold 2] Epoch 2 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.19it/s]


[VGG Fold 2] Epoch 2, Val Accuracy: 49.35%


[VGG Fold 2] Epoch 3/10 - Training: 100%|██████████| 134/134 [00:48<00:00,  2.78it/s]


[VGG Fold 2] Epoch 3, Loss: 284.7410, Accuracy: 43.46%


[VGG Fold 2] Epoch 3 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.18it/s]


[VGG Fold 2] Epoch 3, Val Accuracy: 60.35%


[VGG Fold 2] Epoch 4/10 - Training: 100%|██████████| 134/134 [00:48<00:00,  2.78it/s]


[VGG Fold 2] Epoch 4, Loss: 231.5860, Accuracy: 53.49%


[VGG Fold 2] Epoch 4 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.22it/s]


[VGG Fold 2] Epoch 4, Val Accuracy: 67.26%


[VGG Fold 2] Epoch 5/10 - Training: 100%|██████████| 134/134 [00:47<00:00,  2.79it/s]


[VGG Fold 2] Epoch 5, Loss: 201.2948, Accuracy: 59.90%


[VGG Fold 2] Epoch 5 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.20it/s]


[VGG Fold 2] Epoch 5, Val Accuracy: 69.03%


[VGG Fold 2] Epoch 6/10 - Training: 100%|██████████| 134/134 [00:48<00:00,  2.76it/s]


[VGG Fold 2] Epoch 6, Loss: 169.7336, Accuracy: 64.82%


[VGG Fold 2] Epoch 6 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.09it/s]


[VGG Fold 2] Epoch 6, Val Accuracy: 70.15%


[VGG Fold 2] Epoch 7/10 - Training: 100%|██████████| 134/134 [00:48<00:00,  2.77it/s]


[VGG Fold 2] Epoch 7, Loss: 157.7545, Accuracy: 67.76%


[VGG Fold 2] Epoch 7 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.22it/s]


[VGG Fold 2] Epoch 7, Val Accuracy: 73.97%


[VGG Fold 2] Epoch 8/10 - Training: 100%|██████████| 134/134 [00:48<00:00,  2.78it/s]


[VGG Fold 2] Epoch 8, Loss: 153.3867, Accuracy: 68.88%


[VGG Fold 2] Epoch 8 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.25it/s]


[VGG Fold 2] Epoch 8, Val Accuracy: 74.81%


[VGG Fold 2] Epoch 9/10 - Training: 100%|██████████| 134/134 [00:48<00:00,  2.77it/s]


[VGG Fold 2] Epoch 9, Loss: 129.5714, Accuracy: 73.62%


[VGG Fold 2] Epoch 9 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.22it/s]


[VGG Fold 2] Epoch 9, Val Accuracy: 77.89%


[VGG Fold 2] Epoch 10/10 - Training: 100%|██████████| 134/134 [00:48<00:00,  2.77it/s]


[VGG Fold 2] Epoch 10, Loss: 128.4326, Accuracy: 73.76%


[VGG Fold 2] Epoch 10 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.28it/s]


[VGG Fold 2] Epoch 10, Val Accuracy: 77.33%
Saved VGG16 model: vgg16_fold2.pth
Saved VGG16 Fold 2 metrics to vgg16_metrics_fold2.pkl



[VGG Fold 3] Epoch 1/10 - Training: 100%|██████████| 134/134 [00:48<00:00,  2.77it/s]


[VGG Fold 3] Epoch 1, Loss: 514.4557, Accuracy: 11.48%


[VGG Fold 3] Epoch 1 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.22it/s]


[VGG Fold 3] Epoch 1, Val Accuracy: 45.52%


[VGG Fold 3] Epoch 2/10 - Training: 100%|██████████| 134/134 [00:48<00:00,  2.78it/s]


[VGG Fold 3] Epoch 2, Loss: 317.2442, Accuracy: 39.12%


[VGG Fold 3] Epoch 2 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.26it/s]


[VGG Fold 3] Epoch 2, Val Accuracy: 63.81%


[VGG Fold 3] Epoch 3/10 - Training: 100%|██████████| 134/134 [00:48<00:00,  2.78it/s]


[VGG Fold 3] Epoch 3, Loss: 239.1639, Accuracy: 52.23%


[VGG Fold 3] Epoch 3 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.23it/s]


[VGG Fold 3] Epoch 3, Val Accuracy: 68.75%


[VGG Fold 3] Epoch 4/10 - Training: 100%|██████████| 134/134 [00:48<00:00,  2.78it/s]


[VGG Fold 3] Epoch 4, Loss: 208.5083, Accuracy: 57.73%


[VGG Fold 3] Epoch 4 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.20it/s]


[VGG Fold 3] Epoch 4, Val Accuracy: 72.11%


[VGG Fold 3] Epoch 5/10 - Training: 100%|██████████| 134/134 [00:48<00:00,  2.78it/s]


[VGG Fold 3] Epoch 5, Loss: 177.3472, Accuracy: 64.57%


[VGG Fold 3] Epoch 5 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.22it/s]


[VGG Fold 3] Epoch 5, Val Accuracy: 70.15%


[VGG Fold 3] Epoch 6/10 - Training: 100%|██████████| 134/134 [00:48<00:00,  2.77it/s]


[VGG Fold 3] Epoch 6, Loss: 163.3922, Accuracy: 67.11%


[VGG Fold 3] Epoch 6 - Validation: 100%|██████████| 34/34 [00:03<00:00,  8.87it/s]


[VGG Fold 3] Epoch 6, Val Accuracy: 72.67%


[VGG Fold 3] Epoch 7/10 - Training: 100%|██████████| 134/134 [00:48<00:00,  2.75it/s]


[VGG Fold 3] Epoch 7, Loss: 151.8655, Accuracy: 69.30%


[VGG Fold 3] Epoch 7 - Validation: 100%|██████████| 34/34 [00:03<00:00,  8.94it/s]


[VGG Fold 3] Epoch 7, Val Accuracy: 73.32%


[VGG Fold 3] Epoch 8/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.85it/s]


[VGG Fold 3] Epoch 8, Loss: 132.6544, Accuracy: 73.50%


[VGG Fold 3] Epoch 8 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.33it/s]


[VGG Fold 3] Epoch 8, Val Accuracy: 76.87%


[VGG Fold 3] Epoch 9/10 - Training: 100%|██████████| 134/134 [00:47<00:00,  2.83it/s]


[VGG Fold 3] Epoch 9, Loss: 130.4054, Accuracy: 73.27%


[VGG Fold 3] Epoch 9 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.36it/s]


[VGG Fold 3] Epoch 9, Val Accuracy: 75.75%


[VGG Fold 3] Epoch 10/10 - Training: 100%|██████████| 134/134 [00:47<00:00,  2.84it/s]


[VGG Fold 3] Epoch 10, Loss: 124.8842, Accuracy: 74.81%


[VGG Fold 3] Epoch 10 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.43it/s]


[VGG Fold 3] Epoch 10, Val Accuracy: 77.80%
Saved VGG16 model: vgg16_fold3.pth
Saved VGG16 Fold 3 metrics to vgg16_metrics_fold3.pkl



[VGG Fold 4] Epoch 1/10 - Training: 100%|██████████| 134/134 [00:47<00:00,  2.80it/s]


[VGG Fold 4] Epoch 1, Loss: 513.9569, Accuracy: 12.22%


[VGG Fold 4] Epoch 1 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.28it/s]


[VGG Fold 4] Epoch 1, Val Accuracy: 41.79%


[VGG Fold 4] Epoch 2/10 - Training: 100%|██████████| 134/134 [00:47<00:00,  2.80it/s]


[VGG Fold 4] Epoch 2, Loss: 311.7986, Accuracy: 39.72%


[VGG Fold 4] Epoch 2 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.37it/s]


[VGG Fold 4] Epoch 2, Val Accuracy: 65.39%


[VGG Fold 4] Epoch 3/10 - Training: 100%|██████████| 134/134 [00:47<00:00,  2.82it/s]


[VGG Fold 4] Epoch 3, Loss: 235.5526, Accuracy: 53.37%


[VGG Fold 4] Epoch 3 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.35it/s]


[VGG Fold 4] Epoch 3, Val Accuracy: 67.91%


[VGG Fold 4] Epoch 4/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.86it/s]


[VGG Fold 4] Epoch 4, Loss: 194.5504, Accuracy: 60.02%


[VGG Fold 4] Epoch 4 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.40it/s]


[VGG Fold 4] Epoch 4, Val Accuracy: 72.57%


[VGG Fold 4] Epoch 5/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.86it/s]


[VGG Fold 4] Epoch 5, Loss: 178.7088, Accuracy: 64.52%


[VGG Fold 4] Epoch 5 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.44it/s]


[VGG Fold 4] Epoch 5, Val Accuracy: 73.13%


[VGG Fold 4] Epoch 6/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.85it/s]


[VGG Fold 4] Epoch 6, Loss: 155.0311, Accuracy: 68.07%


[VGG Fold 4] Epoch 6 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.32it/s]


[VGG Fold 4] Epoch 6, Val Accuracy: 78.26%


[VGG Fold 4] Epoch 7/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.86it/s]


[VGG Fold 4] Epoch 7, Loss: 144.2503, Accuracy: 70.31%


[VGG Fold 4] Epoch 7 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.40it/s]


[VGG Fold 4] Epoch 7, Val Accuracy: 77.05%


[VGG Fold 4] Epoch 8/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.87it/s]


[VGG Fold 4] Epoch 8, Loss: 138.6802, Accuracy: 71.54%


[VGG Fold 4] Epoch 8 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.36it/s]


[VGG Fold 4] Epoch 8, Val Accuracy: 77.43%


[VGG Fold 4] Epoch 9/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.85it/s]


[VGG Fold 4] Epoch 9, Loss: 138.7376, Accuracy: 72.15%


[VGG Fold 4] Epoch 9 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.42it/s]


[VGG Fold 4] Epoch 9, Val Accuracy: 79.29%


[VGG Fold 4] Epoch 10/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.86it/s]


[VGG Fold 4] Epoch 10, Loss: 123.0810, Accuracy: 75.37%


[VGG Fold 4] Epoch 10 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.41it/s]


[VGG Fold 4] Epoch 10, Val Accuracy: 79.20%
Saved VGG16 model: vgg16_fold4.pth
Saved VGG16 Fold 4 metrics to vgg16_metrics_fold4.pkl



[VGG Fold 5] Epoch 1/10 - Training: 100%|██████████| 134/134 [00:47<00:00,  2.85it/s]


[VGG Fold 5] Epoch 1, Loss: 522.0557, Accuracy: 11.24%


[VGG Fold 5] Epoch 1 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.41it/s]


[VGG Fold 5] Epoch 1, Val Accuracy: 43.14%


[VGG Fold 5] Epoch 2/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.86it/s]


[VGG Fold 5] Epoch 2, Loss: 311.2120, Accuracy: 39.62%


[VGG Fold 5] Epoch 2 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.41it/s]


[VGG Fold 5] Epoch 2, Val Accuracy: 56.77%


[VGG Fold 5] Epoch 3/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.86it/s]


[VGG Fold 5] Epoch 3, Loss: 234.0043, Accuracy: 54.13%


[VGG Fold 5] Epoch 3 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.40it/s]


[VGG Fold 5] Epoch 3, Val Accuracy: 70.21%


[VGG Fold 5] Epoch 4/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.86it/s]


[VGG Fold 5] Epoch 4, Loss: 194.3332, Accuracy: 61.08%


[VGG Fold 5] Epoch 4 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.42it/s]


[VGG Fold 5] Epoch 4, Val Accuracy: 72.46%


[VGG Fold 5] Epoch 5/10 - Training: 100%|██████████| 134/134 [00:47<00:00,  2.85it/s]


[VGG Fold 5] Epoch 5, Loss: 179.5797, Accuracy: 62.73%


[VGG Fold 5] Epoch 5 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.45it/s]


[VGG Fold 5] Epoch 5, Val Accuracy: 73.11%


[VGG Fold 5] Epoch 6/10 - Training: 100%|██████████| 134/134 [00:47<00:00,  2.81it/s]


[VGG Fold 5] Epoch 6, Loss: 154.6644, Accuracy: 68.68%


[VGG Fold 5] Epoch 6 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.20it/s]


[VGG Fold 5] Epoch 6, Val Accuracy: 76.56%


[VGG Fold 5] Epoch 7/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.86it/s]


[VGG Fold 5] Epoch 7, Loss: 140.4642, Accuracy: 70.55%


[VGG Fold 5] Epoch 7 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.45it/s]


[VGG Fold 5] Epoch 7, Val Accuracy: 72.92%


[VGG Fold 5] Epoch 8/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.87it/s]


[VGG Fold 5] Epoch 8, Loss: 132.1729, Accuracy: 73.83%


[VGG Fold 5] Epoch 8 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.43it/s]


[VGG Fold 5] Epoch 8, Val Accuracy: 78.71%


[VGG Fold 5] Epoch 9/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.86it/s]


[VGG Fold 5] Epoch 9, Loss: 126.7654, Accuracy: 73.72%


[VGG Fold 5] Epoch 9 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.37it/s]


[VGG Fold 5] Epoch 9, Val Accuracy: 77.50%


[VGG Fold 5] Epoch 10/10 - Training: 100%|██████████| 134/134 [00:46<00:00,  2.86it/s]


[VGG Fold 5] Epoch 10, Loss: 121.3146, Accuracy: 75.26%


[VGG Fold 5] Epoch 10 - Validation: 100%|██████████| 34/34 [00:03<00:00,  9.41it/s]


[VGG Fold 5] Epoch 10, Val Accuracy: 77.22%
Saved VGG16 model: vgg16_fold5.pth
Saved VGG16 Fold 5 metrics to vgg16_metrics_fold5.pkl
Saved all VGG16 fold metrics to vgg16_all_fold_metrics.pkl
Best VGG Fold: 4
Highest VGG Val Accuracy: 79.29%
Mean VGG16 Test Accuracy across folds: 78.28%


In [None]:
class CustomCNN(nn.Module):
    def __init__(self, num_classes):
        super(CustomCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),  # Input: (3, 224, 224)
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # -> (32, 112, 112)

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # -> (64, 56, 56)

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # -> (128, 28, 28)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),  # -> (128*28*28)
            nn.Linear(128 * 28 * 28, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x


In [None]:
NUM_EPOCHS = 10
BATCH_SIZE = 32
NUM_WORKERS = 0
NUM_CLASSES = len(set(labels))

# Store metrics per fold
customcnn_fold_metrics = {}

def train_one_fold_custom(fold, train_images, train_labels, val_images, val_labels):
    train_dataset = AnimalDataset(train_images, train_labels, transform=train_transform, label_to_idx=label_to_idx)
    val_dataset = AnimalDataset(val_images, val_labels, transform=val_test_transform, label_to_idx=label_to_idx)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

    model = CustomCNN(num_classes=NUM_CLASSES).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001)

    train_losses, train_accuracies, val_accuracies = [], [], []
    all_val_preds, all_val_targets = [], []

    for epoch in range(NUM_EPOCHS):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        for images, labels in tqdm(train_loader, desc=f"[CustomCNN Fold {fold}] Epoch {epoch+1}/{NUM_EPOCHS} - Training"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_accuracy = 100 * correct / total
        train_losses.append(running_loss)
        train_accuracies.append(train_accuracy)
        print(f"[CustomCNN Fold {fold}] Epoch {epoch+1}, Loss: {running_loss:.4f}, Accuracy: {train_accuracy:.2f}%")

        # Validation
        model.eval()
        correct, total = 0, 0

        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc=f"[CustomCNN Fold {fold}] Epoch {epoch+1} - Validation"):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)

                all_val_preds.extend(predicted.cpu().numpy())
                all_val_targets.extend(labels.cpu().numpy())

                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_accuracy = 100 * correct / total
        val_accuracies.append(val_accuracy)
        print(f"[CustomCNN Fold {fold}] Epoch {epoch+1}, Val Accuracy: {val_accuracy:.2f}%")

    torch.save(model.state_dict(), f"customcnn_fold{fold}.pth")
    print(f"Saved CustomCNN model: customcnn_fold{fold}.pth")

    return {
        "train_losses": train_losses,
        "train_accuracies": train_accuracies,
        "val_accuracies": val_accuracies,
        "val_preds": all_val_preds,
        "val_targets": all_val_targets,
    }


for fold, (train_idx, val_idx) in enumerate(skf.split(image_paths, labels), 1):
    train_images, val_images = image_paths[train_idx], image_paths[val_idx]
    train_labels, val_labels = labels[train_idx], labels[val_idx]

    print(f"\n========== Training CustomCNN Fold {fold} ==========")
    result = train_one_fold_custom(fold, train_images, train_labels, val_images, val_labels)

    customcnn_fold_metrics[fold] = {
        "train_losses": result["train_losses"],
        "train_accuracies": result["train_accuracies"],
        "val_accuracies": result["val_accuracies"],
        "val_preds": result["val_preds"],
        "val_targets": result["val_targets"],
    }

    with open(f"customcnn_metrics_fold{fold}.pkl", "wb") as f:
        pickle.dump(customcnn_fold_metrics[fold], f)
        print(f"Saved CustomCNN Fold {fold} metrics to customcnn_metrics_fold{fold}.pkl")

with open("customcnn_all_fold_metrics.pkl", "wb") as f:
    pickle.dump(customcnn_fold_metrics, f)
    print("Saved all CustomCNN fold metrics to customcnn_all_fold_metrics.pkl")
