In [None]:
import torch
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader,random_split,Subset
from torchvision import datasets, transforms
import torchmetrics
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch.nn.functional as F
import torch.nn.init as init
from sklearn.metrics import confusion_matrix, classification_report
import pandas as pd
from sklearn.metrics import f1_score, precision_score, recall_score
import wandb
import os
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from PIL import Image
from sklearn.model_selection import train_test_split
# Set device to GPU if available, otherwise CPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
print(f"Using device: {device}")


# Set a global seed value
seed = 42

# For NumPy
np.random.seed(seed)

# For PyTorch
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
    
    # Ensure deterministic behavior for CUDA operations.
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [3]:
model = models.resnet50(pretrained=True)  # Load pre-trained ResNet-50


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 220MB/s]


In [None]:
# Initialize Weights & Biases
wandb.init(project="Assignment5", entity="usf-magma")

# Define paths to your local dataset
annotations_path = "Assignment 05/archive/annotations"  # Update this to your local annotations path
images_path = "Assignment 05/archive/images"            # Update this to your local images path

# Check if the annotation path exists
if not os.path.exists(annotations_path):
    print(f"Error: The annotations path {annotations_path} does not exist!")

# Print contents of the annotations folder
print("Folders in annotations path:", os.listdir(annotations_path))

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Get class labels
def get_class_labels(annotations_path):
    class_labels = {}
    for breed_folder in os.listdir(annotations_path):
        folder_path = os.path.join(annotations_path, breed_folder)
        if os.path.isdir(folder_path):
            if breed_folder not in class_labels:
                class_labels[breed_folder] = len(class_labels)
    return class_labels

class_labels = get_class_labels(annotations_path)

# Load Stanford Dogs dataset
def load_stanford_dogs(images_path, annotations_path, transform):
    dataset = []
    for breed_folder in os.listdir(annotations_path):
        annotation_folder_path = os.path.join(annotations_path, breed_folder)
        image_folder_path = os.path.join(images_path, breed_folder)

        if not os.path.isdir(annotation_folder_path) or not os.path.exists(image_folder_path):
            print(f"Skipping {breed_folder}, image folder not found!")
            continue

        class_index = class_labels.get(breed_folder, -1)
        if class_index == -1:
            continue

        image_files = os.listdir(image_folder_path)

        for image_file in image_files:
            image_path = os.path.join(image_folder_path, image_file)
            if image_file.endswith(".jpg") and os.path.exists(image_path):
                image = Image.open(image_path).convert("RGB")
                image = transform(image)
                dataset.append((image, class_index))

    return dataset

# Load datasets
full_dataset = load_stanford_dogs(images_path, annotations_path, transform)

# Create data loaders
trainset, testset = train_test_split(full_dataset, test_size=0.2, random_state=42)

print(f"Train set size: {len(trainset)} images")
print(f"Test set size: {len(testset)} images")

# Validate dataset size before passing to DataLoader
if len(trainset) == 0:
    raise ValueError("Error: trainset is empty! Check image paths.")

# Create data loaders
trainloader = DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)
testloader = DataLoader(testset, batch_size=32, shuffle=False, num_workers=2)

print(f"Loaded Stanford Dogs dataset with {len(class_labels)} classes and {len(trainset)} images.")

# Log dataset information to W&B
wandb.config.update({
    "dataset": "Stanford Dogs",
    "num_classes": len(class_labels),
    "train_size": len(trainset),
    "test_size": len(testset),
    "batch_size": 32
})

print(f"Loaded Stanford Dogs dataset with {len(class_labels)} classes and {len(trainset)} images.")


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mhridayreddy[0m ([33musf-magma[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Train set size: 16464 images
Test set size: 4116 images
Loaded Stanford Dogs dataset with 120 classes and 16464 images.


In [6]:
import os

# Define the path to your dataset
dataset_path = "/content/drive/MyDrive/StanfordDogs/images/Images"  # Images directory

# Extract class names from folder names
classes = tuple(sorted(os.listdir(dataset_path)))  # Sorting to ensure consistent order

print(f"Detected {len(classes)} classes:")
print(classes)


Detected 120 classes:
('n02085620-Chihuahua', 'n02085782-Japanese_spaniel', 'n02085936-Maltese_dog', 'n02086079-Pekinese', 'n02086240-Shih-Tzu', 'n02086646-Blenheim_spaniel', 'n02086910-papillon', 'n02087046-toy_terrier', 'n02087394-Rhodesian_ridgeback', 'n02088094-Afghan_hound', 'n02088238-basset', 'n02088364-beagle', 'n02088466-bloodhound', 'n02088632-bluetick', 'n02089078-black-and-tan_coonhound', 'n02089867-Walker_hound', 'n02089973-English_foxhound', 'n02090379-redbone', 'n02090622-borzoi', 'n02090721-Irish_wolfhound', 'n02091032-Italian_greyhound', 'n02091134-whippet', 'n02091244-Ibizan_hound', 'n02091467-Norwegian_elkhound', 'n02091635-otterhound', 'n02091831-Saluki', 'n02092002-Scottish_deerhound', 'n02092339-Weimaraner', 'n02093256-Staffordshire_bullterrier', 'n02093428-American_Staffordshire_terrier', 'n02093647-Bedlington_terrier', 'n02093754-Border_terrier', 'n02093859-Kerry_blue_terrier', 'n02093991-Irish_terrier', 'n02094114-Norfolk_terrier', 'n02094258-Norwich_terrier', 

In [7]:
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models

# Load pre-trained ResNet-18
model = models.resnet18(weights='IMAGENET1K_V1')

# Modify the final layer to match the number of dog breeds (120)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, len(classes))  # 120 classes in Stanford Dogs

# Move model to the device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Log model details to W&B
wandb.config.update({
    "model": "ResNet-18",
    "pretrained": True,
    "num_classes": len(classes),
    "learning_rate": 0.001
})

wandb.watch(model, log="all", log_freq=100)


In [8]:
# Freeze all layers except the final layer
for param in model.parameters():
    param.requires_grad = False

# Unfreeze parameters of the final fully connected layer
for param in model.fc.parameters():
    param.requires_grad = True

# Count the number of trainable parameters
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"Trainable parameters: {trainable_params:,} ({trainable_params/total_params:.2%} of total)")

# Log to W&B
wandb.config.update({
    "trainable_params": trainable_params,
    "total_params": total_params,
    "approach": "feature_extraction"
})

Trainable parameters: 61,560 (0.55% of total)


In [None]:
# First, add scikit-learn for metrics calculation


# Ensure the model is on the correct device
model = model.to(device)

# Set up loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

wandb.init(project="Assignment5", entity="usf-magma")
# Log hyperparameters to W&B
wandb.config.update({
    "optimizer": "Adam",
    "learning_rate": 0.001,
    "criterion": "CrossEntropyLoss",
    "epochs": 5  # We'll train for just 5 epochs for this example
})

# Number of classes
num_classes = len(class_labels)
class_names = list(class_labels.keys())

# Training loop
def train_model(model, trainloader, testloader, criterion, optimizer, num_epochs=5):
    # Track best accuracy
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        all_train_preds = []
        all_train_labels = []

        for i, (inputs, labels) in enumerate(trainloader):
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            # Collect predictions and labels for F1 calculation
            all_train_preds.extend(predicted.cpu().numpy())
            all_train_labels.extend(labels.cpu().numpy())

            # Log batch statistics (every 100 batches)
            if i % 100 == 99:
                batch_acc = 100. * correct / total
                batch_loss = running_loss / total
                print(f'Batch {i+1}, Loss: {batch_loss:.4f}, Acc: {batch_acc:.2f}%')

                wandb.log({
                    "train_batch_loss": batch_loss,
                    "train_batch_acc": batch_acc,
                    "epoch": epoch + i/len(trainloader)
                })

        # Calculate epoch statistics
        train_loss = running_loss / len(trainloader.dataset)
        train_acc = 100. * correct / total

        # Calculate F1 score for the training epoch
        train_f1_macro = f1_score(all_train_labels, all_train_preds, average='macro')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, F1-macro: {train_f1_macro:.4f}')

        # Evaluation phase
        model.eval()
        test_loss = 0.0
        correct = 0
        total = 0
        class_correct = list(0. for _ in range(num_classes))
        class_total = list(0. for _ in range(num_classes))
        all_preds = []
        all_labels = []

        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)

                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                # Statistics
                test_loss += loss.item() * inputs.size(0)
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

                # Per-class accuracy
                c = (predicted == labels).squeeze()
                for i in range(labels.size(0)):
                    label = labels[i].item()
                    class_correct[label] += c[i].item()
                    class_total[label] += 1

                # Store for confusion matrix and F1 calculation
                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        # Calculate test statistics
        test_loss = test_loss / len(testloader.dataset)
        test_acc = 100. * correct / total

        # Calculate F1 score for test data
        test_f1_macro = f1_score(all_labels, all_preds, average='macro')
        print(f'Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%, F1-macro: {test_f1_macro:.4f}')

        # Per-class accuracy
        for i in range(num_classes):
            class_acc = 100 * class_correct[i] / class_total[i] if class_total[i] > 0 else 0


        # Log epoch statistics to W&B
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_acc": train_acc,
            "train_f1_macro": train_f1_macro,
            "test_loss": test_loss,
            "test_acc": test_acc,
            "test_f1_macro": test_f1_macro
        })

        # Log per-class accuracy
        class_acc_dict = {f"class_acc_{class_names[i]}": 100 * class_correct[i] / class_total[i]
                         if class_total[i] > 0 else 0 for i in range(num_classes)}
        wandb.log(class_acc_dict)

        # Log confusion matrix
        wandb.log({
            "confusion_matrix": wandb.plot.confusion_matrix(
                probs=None,
                y_true=all_labels,
                preds=all_preds,
                class_names=class_names
            )
        })

        # Save model if it's the best so far
        if test_acc > best_acc:
            best_acc = test_acc
            best_f1 = test_f1_macro
            torch.save(model.state_dict(), f'resnet18_stanforddogs_epoch_{epoch+1}.pth')
            wandb.save(f'resnet18_stanforddogs_epoch_{epoch+1}.pth')

            # Log best model metrics to W&B summary
            wandb.run.summary["best_accuracy"] = best_acc
            wandb.run.summary["best_f1_macro"] = best_f1
            wandb.run.summary["best_epoch"] = epoch + 1

    print(f'Best test accuracy: {best_acc:.2f}%')
    return model

# Train the model
model = train_model(model, trainloader, testloader, criterion, optimizer)

# Finish the W&B run
wandb.finish()


Epoch 1/5
----------
Batch 100, Loss: 0.2336, Acc: 92.75%
Batch 200, Loss: 0.2375, Acc: 92.48%
Batch 300, Loss: 0.2389, Acc: 92.30%
Batch 400, Loss: 0.2485, Acc: 92.09%
Batch 500, Loss: 0.2540, Acc: 91.83%
Train Loss: 0.2540, Train Acc: 91.81%, F1-macro: 0.9180
Test Loss: 1.1766, Test Acc: 71.43%, F1-macro: 0.7068
Epoch 2/5
----------
Batch 100, Loss: 0.2255, Acc: 92.47%
Batch 200, Loss: 0.2324, Acc: 92.22%
Batch 300, Loss: 0.2420, Acc: 91.93%
Batch 400, Loss: 0.2443, Acc: 91.81%
Batch 500, Loss: 0.2467, Acc: 91.76%
Train Loss: 0.2474, Train Acc: 91.69%, F1-macro: 0.9162
Test Loss: 1.1433, Test Acc: 70.94%, F1-macro: 0.7071
Epoch 3/5
----------
Batch 100, Loss: 0.2202, Acc: 93.16%
Batch 200, Loss: 0.2247, Acc: 92.92%
Batch 300, Loss: 0.2282, Acc: 92.77%
Batch 400, Loss: 0.2347, Acc: 92.57%
Batch 500, Loss: 0.2377, Acc: 92.35%
Train Loss: 0.2387, Train Acc: 92.31%, F1-macro: 0.9232
Test Loss: 1.1837, Test Acc: 70.99%, F1-macro: 0.7045
Epoch 4/5
----------
Batch 100, Loss: 0.2198, Acc: 9

0,1
class_acc_n02085620-Chihuahua,█▁▆▃▅
class_acc_n02085782-Japanese_spaniel,▁▅█▆▅
class_acc_n02085936-Maltese_dog,▄▄▁█▂
class_acc_n02086079-Pekinese,▅█▁▁▅
class_acc_n02086240-Shih-Tzu,▆▇█▆▁
class_acc_n02086646-Blenheim_spaniel,▃▃▁▆█
class_acc_n02086910-papillon,▅▅▁▆█
class_acc_n02087046-toy_terrier,▆██▆▁
class_acc_n02087394-Rhodesian_ridgeback,▁▆▆▅█
class_acc_n02088094-Afghan_hound,▇▁▂█▇

0,1
best_accuracy,71.59864
best_epoch,4.0
best_f1_macro,0.71012
class_acc_n02085620-Chihuahua,51.6129
class_acc_n02085782-Japanese_spaniel,85.71429
class_acc_n02085936-Maltese_dog,57.44681
class_acc_n02086079-Pekinese,72.5
class_acc_n02086240-Shih-Tzu,51.28205
class_acc_n02086646-Blenheim_spaniel,95.12195
class_acc_n02086910-papillon,89.3617


In [20]:
# First, add scikit-learn for metrics calculation
from sklearn.metrics import f1_score
import torch.optim as optim
import torch.nn as nn
import wandb

# Ensure the model is on the correct device
model = model.to(device)

# Set up loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

wandb.init(project="Assignment5", entity="usf-magma")
# Log hyperparameters to W&B
wandb.config.update({
    "optimizer": "Adam",
    "learning_rate": 0.001,
    "criterion": "CrossEntropyLoss",
    "epochs": 15  # Updated to 15 epochs
})

# Number of classes
num_classes = len(class_labels)
class_names = list(class_labels.keys())

# Training loop
def train_model(model, trainloader, testloader, criterion, optimizer, num_epochs=15):  # Updated to 15 epochs
    # Track best accuracy
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        all_train_preds = []
        all_train_labels = []

        for i, (inputs, labels) in enumerate(trainloader):
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            # Collect predictions and labels for F1 calculation
            all_train_preds.extend(predicted.cpu().numpy())
            all_train_labels.extend(labels.cpu().numpy())

            # Log batch statistics (every 100 batches)
            if i % 100 == 99:
                batch_acc = 100. * correct / total
                batch_loss = running_loss / total
                print(f'Batch {i+1}, Loss: {batch_loss:.4f}, Acc: {batch_acc:.2f}%')

                wandb.log({
                    "train_batch_loss": batch_loss,
                    "train_batch_acc": batch_acc,
                    "epoch": epoch + i/len(trainloader)
                })

        # Calculate epoch statistics
        train_loss = running_loss / len(trainloader.dataset)
        train_acc = 100. * correct / total

        # Calculate F1 score for the training epoch
        train_f1_macro = f1_score(all_train_labels, all_train_preds, average='macro')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, F1-macro: {train_f1_macro:.4f}')

        # Evaluation phase
        model.eval()
        test_loss = 0.0
        correct = 0
        total = 0
        class_correct = list(0. for _ in range(num_classes))
        class_total = list(0. for _ in range(num_classes))
        all_preds = []
        all_labels = []

        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)

                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                # Statistics
                test_loss += loss.item() * inputs.size(0)
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

                # Per-class accuracy
                c = (predicted == labels).squeeze()
                for i in range(labels.size(0)):
                    label = labels[i].item()
                    class_correct[label] += c[i].item()
                    class_total[label] += 1

                # Store for confusion matrix and F1 calculation
                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        # Calculate test statistics
        test_loss = test_loss / len(testloader.dataset)
        test_acc = 100. * correct / total

        # Calculate F1 score for test data
        test_f1_macro = f1_score(all_labels, all_preds, average='macro')
        print(f'Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%, F1-macro: {test_f1_macro:.4f}')

        # Per-class accuracy
        for i in range(num_classes):
            class_acc = 100 * class_correct[i] / class_total[i] if class_total[i] > 0 else 0

        # Log epoch statistics to W&B
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_acc": train_acc,
            "train_f1_macro": train_f1_macro,
            "test_loss": test_loss,
            "test_acc": test_acc,
            "test_f1_macro": test_f1_macro
        })

        # Log per-class accuracy
        class_acc_dict = {f"class_acc_{class_names[i]}": 100 * class_correct[i] / class_total[i]
                         if class_total[i] > 0 else 0 for i in range(num_classes)}
        wandb.log(class_acc_dict)

        # Log confusion matrix
        wandb.log({
            "confusion_matrix": wandb.plot.confusion_matrix(
                probs=None,
                y_true=all_labels,
                preds=all_preds,
                class_names=class_names
            )
        })

        # Save model if it's the best so far
        if test_acc > best_acc:
            best_acc = test_acc
            best_f1 = test_f1_macro
            torch.save(model.state_dict(), f'resnet18_stanforddogs_epoch_{epoch+1}.pth')
            wandb.save(f'resnet18_stanforddogs_epoch_{epoch+1}.pth')

            # Log best model metrics to W&B summary
            wandb.run.summary["best_accuracy"] = best_acc
            wandb.run.summary["best_f1_macro"] = best_f1
            wandb.run.summary["best_epoch"] = epoch + 1

    print(f'Best test accuracy: {best_acc:.2f}%')
    return model

# Train the model
model = train_model(model, trainloader, testloader, criterion, optimizer)

# Finish the W&B run
wandb.finish()


Epoch 1/15
----------
Batch 100, Loss: 0.2251, Acc: 92.44%
Batch 200, Loss: 0.2321, Acc: 92.33%
Batch 300, Loss: 0.2286, Acc: 92.49%
Batch 400, Loss: 0.2304, Acc: 92.54%
Batch 500, Loss: 0.2286, Acc: 92.61%
Train Loss: 0.2301, Train Acc: 92.51%, F1-macro: 0.9249
Test Loss: 1.1963, Test Acc: 70.97%, F1-macro: 0.7048
Epoch 2/15
----------
Batch 100, Loss: 0.1998, Acc: 93.28%
Batch 200, Loss: 0.2065, Acc: 93.31%
Batch 300, Loss: 0.2058, Acc: 93.44%
Batch 400, Loss: 0.2103, Acc: 93.30%
Batch 500, Loss: 0.2142, Acc: 93.21%
Train Loss: 0.2152, Train Acc: 93.17%, F1-macro: 0.9316
Test Loss: 1.2186, Test Acc: 70.70%, F1-macro: 0.7008
Epoch 3/15
----------
Batch 100, Loss: 0.1886, Acc: 93.94%
Batch 200, Loss: 0.1985, Acc: 93.75%
Batch 300, Loss: 0.1984, Acc: 93.75%
Batch 400, Loss: 0.2041, Acc: 93.45%
Batch 500, Loss: 0.2096, Acc: 93.28%
Train Loss: 0.2102, Train Acc: 93.25%, F1-macro: 0.9325
Test Loss: 1.2532, Test Acc: 70.41%, F1-macro: 0.7000
Epoch 4/15
----------
Batch 100, Loss: 0.2047, Ac

0,1
class_acc_n02085620-Chihuahua,▅█▅▅▅▄▄▁▅▂▆▁▅▅▃
class_acc_n02085782-Japanese_spaniel,▁▇▅▅█▇▅▇▇█▇▇▇██
class_acc_n02085936-Maltese_dog,▅█▆▆▄▂▁▃▆▃▅▂▅▄▅
class_acc_n02086079-Pekinese,▇▃▁▄▄▅▄▅█▄▅▄▄▄▂
class_acc_n02086240-Shih-Tzu,█▄█▆▆▆▁▅▇▆▆▄▅█▆
class_acc_n02086646-Blenheim_spaniel,█▆▆▄▄█▆█▃▃▆▆▁▆▃
class_acc_n02086910-papillon,▇▆▅▆▂▅▄▅▆▅█▅▅▁▅
class_acc_n02087046-toy_terrier,▅▆▇▁▇▇▇▆▅▇█▅█▄▇
class_acc_n02087394-Rhodesian_ridgeback,▄▇█▆▅▇▇▃▇▄▁▃▂▅▆
class_acc_n02088094-Afghan_hound,▁▆█▃▅▂▆▆▄▅▄▇▄▆▇

0,1
best_accuracy,71.18562
best_epoch,8.0
best_f1_macro,0.70629
class_acc_n02085620-Chihuahua,51.6129
class_acc_n02085782-Japanese_spaniel,96.42857
class_acc_n02085936-Maltese_dog,65.95745
class_acc_n02086079-Pekinese,62.5
class_acc_n02086240-Shih-Tzu,58.97436
class_acc_n02086646-Blenheim_spaniel,82.92683
class_acc_n02086910-papillon,82.97872


In [None]:
# made data augmentation, used adamW optimizer, no of epochs =15, lr scheduler
from sklearn.metrics import f1_score
import torch.optim as optim
import torch.nn as nn
import wandb
from torchvision import transforms
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Data Augmentation
train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Ensure the model is on the correct device
model = model.to(device)

# Set up loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.fc.parameters(), lr=0.005, weight_decay=1e-4)

# Learning Rate Scheduler
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)

# Early Stopping Variables
best_loss = float("inf")
patience = 5
patience_counter = 0
wandb.init(project="Assignment5", entity="usf-magma")
# Log hyperparameters to W&B
wandb.config.update({
    "optimizer": "AdamW",
    "learning_rate": 0.005,
    "criterion": "CrossEntropyLoss",
    "epochs": 15  # Increased epochs for better convergence
})

# Number of classes
num_classes = len(class_labels)
class_names = list(class_labels.keys())

# Training loop
def train_model(model, trainloader, testloader, criterion, optimizer, num_epochs=15):
    best_acc = 0.0
    global best_loss, patience_counter

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        all_train_preds = []
        all_train_labels = []

        for i, (inputs, labels) in enumerate(trainloader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            all_train_preds.extend(predicted.cpu().numpy())
            all_train_labels.extend(labels.cpu().numpy())

        train_loss = running_loss / len(trainloader.dataset)
        train_acc = 100. * correct / total
        train_f1_macro = f1_score(all_train_labels, all_train_preds, average='macro')

        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, F1-macro: {train_f1_macro:.4f}')

        # Evaluation phase
        model.eval()
        test_loss = 0.0
        correct = 0
        total = 0
        all_preds = []
        all_labels = []
        misclassified_samples = []

        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                test_loss += loss.item() * inputs.size(0)
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

                # Capture misclassified samples
                for i in range(len(labels)):
                    if predicted[i] != labels[i]:
                        misclassified_samples.append((inputs[i].cpu(), labels[i].cpu(), predicted[i].cpu()))

        test_loss = test_loss / len(testloader.dataset)
        test_acc = 100. * correct / total
        test_f1_macro = f1_score(all_labels, all_preds, average='macro')

        print(f'Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%, F1-macro: {test_f1_macro:.4f}')

        # Log statistics to W&B
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_acc": train_acc,
            "train_f1_macro": train_f1_macro,
            "test_loss": test_loss,
            "test_acc": test_acc,
            "test_f1_macro": test_f1_macro
        })

        # Log misclassified samples
        for img, true_label, pred_label in misclassified_samples[:10]:  # Log only first 10
            wandb.log({
                "Misclassified Sample": [wandb.Image(img, caption=f"True: {class_names[true_label]} | Pred: {class_names[pred_label]}")]
            })

        # Update learning rate scheduler
        scheduler.step(test_loss)

        # Early stopping
        if test_loss < best_loss:
            best_loss = test_loss
            patience_counter = 0
            torch.save(model.state_dict(), f'best_model.pth')
            wandb.save('best_model.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered.")
                break

    print(f'Best test accuracy: {best_acc:.2f}%')
    return model

# Train the model
model = train_model(model, trainloader, testloader, criterion, optimizer)

# Finish the W&B run
wandb.finish()


Epoch 1/15
----------
Train Loss: 0.4948, Train Acc: 90.31%, F1-macro: 0.9028
Test Loss: 4.1914, Test Acc: 68.22%, F1-macro: 0.6771
Epoch 2/15
----------
Train Loss: 0.4743, Train Acc: 90.82%, F1-macro: 0.9078
Test Loss: 4.2828, Test Acc: 67.83%, F1-macro: 0.6764
Epoch 3/15
----------
Train Loss: 0.4762, Train Acc: 90.58%, F1-macro: 0.9058
Test Loss: 4.3441, Test Acc: 67.40%, F1-macro: 0.6712
Epoch 4/15
----------


In [10]:
# First, add scikit-learn for metrics calculation
from sklearn.metrics import f1_score
import torch.optim as optim
import torch.nn as nn
import wandb

# Ensure the model is on the correct device
model = model.to(device)

# Set up loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

# Log hyperparameters to W&B
wandb.config.update({
    "optimizer": "Adam",
    "learning_rate": 0.001,
    "criterion": "CrossEntropyLoss",
    "epochs": 5  # We'll train for just 5 epochs for this example
})

# Number of classes
num_classes = len(class_labels)
class_names = list(class_labels.keys())

# Training loop
def train_model(model, trainloader, criterion, optimizer, num_epochs=5):
    # Track best accuracy
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        all_train_preds = []
        all_train_labels = []

        for i, (inputs, labels) in enumerate(trainloader):
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            # Collect predictions and labels for F1 calculation
            all_train_preds.extend(predicted.cpu().numpy())
            all_train_labels.extend(labels.cpu().numpy())

            # Log batch statistics (every 100 batches)
            if i % 100 == 99:
                batch_acc = 100. * correct / total
                batch_loss = running_loss / total
                print(f'Batch {i+1}, Loss: {batch_loss:.4f}, Acc: {batch_acc:.2f}%')

                wandb.log({
                    "train_batch_loss": batch_loss,
                    "train_batch_acc": batch_acc,
                    "epoch": epoch + i/len(trainloader)
                })

        # Calculate epoch statistics
        train_loss = running_loss / len(trainloader.dataset)
        train_acc = 100. * correct / total

        # Calculate F1 score for the training epoch
        train_f1_macro = f1_score(all_train_labels, all_train_preds, average='macro')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, F1-macro: {train_f1_macro:.4f}')

        # Evaluation phase
        model.eval()
        test_loss = 0.0
        correct = 0
        total = 0
        class_correct = list(0. for _ in range(num_classes))
        class_total = list(0. for _ in range(num_classes))
        all_preds = []
        all_labels = []

        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)

                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                # Statistics
                test_loss += loss.item() * inputs.size(0)
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

                # Per-class accuracy
                c = (predicted == labels).squeeze()
                for i in range(labels.size(0)):
                    label = labels[i].item()
                    class_correct[label] += c[i].item()
                    class_total[label] += 1

                # Store for confusion matrix and F1 calculation
                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        # Calculate test statistics
        test_loss = test_loss / len(testloader.dataset)
        test_acc = 100. * correct / total

        # Calculate F1 score for test data
        test_f1_macro = f1_score(all_labels, all_preds, average='macro')
        print(f'Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%, F1-macro: {test_f1_macro:.4f}')

        # Per-class accuracy
        for i in range(num_classes):
            class_acc = 100 * class_correct[i] / class_total[i] if class_total[i] > 0 else 0
            print(f'Accuracy of {class_names[i]}: {class_acc:.2f}%')

        # Log epoch statistics to W&B
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_acc": train_acc,
            "train_f1_macro": train_f1_macro,
            "test_loss": test_loss,
            "test_acc": test_acc,
            "test_f1_macro": test_f1_macro
        })

        # Log per-class accuracy
        class_acc_dict = {f"class_acc_{class_names[i]}": 100 * class_correct[i] / class_total[i]
                         if class_total[i] > 0 else 0 for i in range(num_classes)}
        wandb.log(class_acc_dict)

        # Log confusion matrix
        wandb.log({
            "confusion_matrix": wandb.plot.confusion_matrix(
                probs=None,
                y_true=all_labels,
                preds=all_preds,
                class_names=class_names
            )
        })

        # Save model if it's the best so far
        if test_acc > best_acc:
            best_acc = test_acc
            best_f1 = test_f1_macro
            torch.save(model.state_dict(), f'resnet18_stanforddogs_epoch_{epoch+1}.pth')
            wandb.save(f'resnet18_stanforddogs_epoch_{epoch+1}.pth')

            # Log best model metrics to W&B summary
            wandb.run.summary["best_accuracy"] = best_acc
            wandb.run.summary["best_f1_macro"] = best_f1
            wandb.run.summary["best_epoch"] = epoch + 1

    print(f'Best test accuracy: {best_acc:.2f}%')
    return model

# Train the model (without passing testloader)
model = train_model(model, trainloader, criterion, optimizer)

# Save the trained model
torch.save(model.state_dict(), 'resnet18_stanforddogs_final.pth')



Epoch 1/5
----------
Batch 100, Loss: 3.7894, Acc: 22.41%
Batch 200, Loss: 3.0475, Acc: 34.66%
Batch 300, Loss: 2.6039, Acc: 43.03%
Batch 400, Loss: 2.3158, Acc: 48.02%
Batch 500, Loss: 2.1128, Acc: 51.82%
Train Loss: 2.0895, Train Acc: 52.33%, F1-macro: 0.5157
Test Loss: 1.1756, Test Acc: 67.69%, F1-macro: 0.6641
Accuracy of n02086240-Shih-Tzu: 17.95%
Accuracy of n02092002-Scottish_deerhound: 70.21%
Accuracy of n02085782-Japanese_spaniel: 96.43%
Accuracy of n02090622-borzoi: 45.16%
Accuracy of n02088238-basset: 56.00%
Accuracy of n02090721-Irish_wolfhound: 81.40%
Accuracy of n02093859-Kerry_blue_terrier: 64.52%
Accuracy of n02091467-Norwegian_elkhound: 91.67%
Accuracy of n02091134-whippet: 26.67%
Accuracy of n02088364-beagle: 80.95%
Accuracy of n02093256-Staffordshire_bullterrier: 58.06%
Accuracy of n02086910-papillon: 55.32%
Accuracy of n02094258-Norwich_terrier: 77.78%
Accuracy of n02088094-Afghan_hound: 90.38%
Accuracy of n02086646-Blenheim_spaniel: 87.80%
Accuracy of n02093428-Ame