In [None]:
# Import Libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
import numpy as np
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import os
import cv2
import random
from tqdm import tqdm
import wandb
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import gc


In [None]:
# Constants and Configurations
CLASS_NAMES = [
    "Amphibia", "Animalia", "Arachnida", "Aves", "Fungi",
    "Insecta", "Mammalia", "Mollusca", "Plantae", "Reptilia"
]
IMAGE_DIM = (224, 224)
BATCH_SIZE = 64
NUM_EPOCHS = 10
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
# Dataset Preparation 
def download_dataset():
    # Download and extract dataset
    !wget https://storage.googleapis.com/wandb_datasets/nature_12K.zip -O nature_12K.zip
    !unzip -q nature_12K.zip
    !rm nature_12K.zip

def create_data_loaders(train_dir, test_dir, batch_size):
    transform = transforms.Compose([
        transforms.Resize(IMAGE_DIM),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    # Load training data and split into train/validation
    train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
    class_indices = {cls: [] for cls in range(len(CLASS_NAMES))}
    
    for idx, (_, label) in enumerate(train_dataset.samples):
        class_indices[label].append(idx)
    
    train_indices, val_indices = [], []
    for indices in class_indices.values():
        trn, val = train_test_split(indices, test_size=0.2, random_state=42)
        train_indices.extend(trn)
        val_indices.extend(val)
    
    # Create data loaders
    train_loader = DataLoader(
        Subset(train_dataset, train_indices),
        batch_size=batch_size,
        shuffle=True,
        num_workers=2
    )
    val_loader = DataLoader(
        Subset(train_dataset, val_indices),
        batch_size=batch_size,
        shuffle=True,
        num_workers=2
    )
    
    # Test dataset
    test_dataset = datasets.ImageFolder(root=test_dir, transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=2)
    
    return train_loader, val_loader, test_loader


In [None]:
# Model Setup
def initialize_model():
    model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
    for param in model.parameters():
        param.requires_grad = False  # Freeze base layers
    
    # Modify final layer
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, len(CLASS_NAMES))
    return model.to(DEVICE)


In [None]:
# Training Utilities 
def train_epoch(model, dataloader, criterion, optimizer):
    model.train()
    for inputs, labels in tqdm(dataloader, desc="Training"):
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

def evaluate(model, dataloader, criterion, mode="Validation"):
    model.eval()
    total_loss, correct = 0, 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
    
    accuracy = 100 * correct / len(dataloader.dataset)
    avg_loss = total_loss / len(dataloader)
    print(f"{mode} Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")
    return avg_loss, accuracy


In [None]:
# Main Execution
def main():
    wandb.init(project="Deep_Learning_Assignment_2")
    download_dataset()
    
    # Initialize components
    model = initialize_model()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.NAdam(model.parameters(), lr=1e-4, weight_decay=0.005)
    
    # Create data loaders
    train_loader, val_loader, test_loader = create_data_loaders(
        train_dir='inaturalist_12K/train',
        test_dir='inaturalist_12K/val',
        batch_size=BATCH_SIZE
    )
    
    # Training loop
    for epoch in range(NUM_EPOCHS):
        train_epoch(model, train_loader, criterion, optimizer)
        val_loss, val_acc = evaluate(model, val_loader, criterion)
        wandb.log({"val_loss": val_loss, "val_accuracy": val_acc})
    
    # Final evaluation
    test_loss, test_acc = evaluate(model, test_loader, criterion, "Test")
    wandb.log({"test_loss": test_loss, "test_accuracy": test_acc})
    wandb.finish()

if _name_ == "_main_":
    main()