In [None]:
import os
import json
import pandas as pd
import torch
import torch.nn as nn
from tqdm import tqdm
from datetime import datetime
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import Dataset,DataLoader

<h2> Image Transformations </h2>

In [None]:
#imagenet stats

mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]

train_transforms = transforms.Compose([
    transforms.Resize((256,256)),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(p=0.5),
    # transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
    # transforms.RandomRotation(degrees=15),
    transforms.ToTensor(),
    transforms.Normalize(mean,std),
])

val_transforms =transforms.Compose([
    transforms.Resize((256,256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean,std)
])

<h2> Dataset Creation </h2>

In [None]:
TRAIN_DIR = "../../datasets/tiny-imagenet-200/train"
VAL_DIR = "../../datasets/tiny-imagenet-200/val"

train_dataset = ImageFolder(root=TRAIN_DIR, transform=train_transforms)
val_dataset = ImageFolder(root=VAL_DIR, transform=val_transforms)
# test_dataset = ImageFolder(root=TEST_DIR, transform=val_transforms)
batch_size = 64


In [None]:
train_loader = DataLoader(
    dataset=train_dataset,
    shuffle=True,
    batch_size=batch_size,
    num_workers=4,
    pin_memory=True,
)

val_loader = DataLoader(
    dataset=val_dataset,
    shuffle=False,
    batch_size=batch_size,
    num_workers=4,
    pin_memory=True,
)

<h2> Dataset Testing </h2>

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
for images, labels in train_loader:
    print(f"Image shape: {images.shape}") 
    print(f"Label: {labels.shape}")
    break
    

<h2> Alexnet Model </h2>

In [None]:
class Alexnet(nn.Module):

    def __init__(self):
        super().__init__()

        self.maxpool = nn.MaxPool2d(kernel_size=3,stride=2,padding=0)
        self.relu = nn.ReLU(inplace=True)
        
        self.conv1 = nn.Conv2d(in_channels=3,out_channels=96,kernel_size=11,stride=4,padding=2)
        self.conv2 = nn.Conv2d(in_channels=96,out_channels=256,kernel_size=5,stride=1,padding=2)
        self.conv3 = nn.Conv2d(in_channels=256,out_channels=384,kernel_size=3,stride=1,padding=1)
        self.conv4 = nn.Conv2d(in_channels=384,out_channels=384,kernel_size=3,stride=1,padding=1)
        self.conv5 = nn.Conv2d(in_channels=384,out_channels=256,kernel_size=3,stride=1,padding=1)

        feature_extractor_layers = [self.conv1, self.relu, self.maxpool, self.conv2, self.relu,
                                    self.maxpool, self.conv3, self.relu, self.conv4, self.relu,
                                    self.conv5, self.relu, self.maxpool]

        self.feature_extractor = nn.Sequential(*feature_extractor_layers)

        self.dropout = nn.Dropout(p=0.5)

        self.fc1 = nn.Linear(in_features=9216, out_features=4096)
        self.fc2 = nn.Linear(in_features=4096, out_features=4096)
        self.fc3 = nn.Linear(in_features=4096, out_features=200)

        classifier_layers = [self.dropout, self.fc1, self.relu, self.dropout, self.fc2,
                             self.relu, self.fc3]
        self.classifier = nn.Sequential(*classifier_layers)


    def forward(self,images):
        out = self.feature_extractor(images)
        out = torch.flatten(out, 1)
        out = self.classifier(out)



In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_of_epochs = 50
learning_rate = 0.001


model = Alexnet().to(device=device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # type: ignore 
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3,factor=0.1)
checkpoint_path = r"../checkpoints/current_model.pth"
best_model_path = r"../checkpoints/best_model.pth"

In [None]:
def log_one_epoch_summary(epoch,num_of_epochs, train_loss,train_accuracy, val_loss, val_accuracy):
    print(f"\nEpoch [{epoch}/{num_of_epochs}] Summary:")
    print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_accuracy:.4f}")
    print(f"  Val   Loss: {val_loss:.4f} | Val   Acc: {val_accuracy:.4f}\n")

def save_checkpoint(epoch, model, optimizer, scheduler, history, patience_counter, checkpoint_path):
    model.eval()
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
        'history':history,
        'patience_counter':patience_counter
    }, checkpoint_path)

def load_checkpoint(device, model, optimizer, scheduler, checkpoint_path):
    print("🔁 Resuming from checkpoint...")
    checkpoint = torch.load(checkpoint_path, map_location=device)

    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    scheduler.load_state_dict(checkpoint['scheduler_state_dict'])

    start_epoch = checkpoint['epoch'] + 1
    history = checkpoint['history']
    patience_counter = checkpoint['patience_counter']


    print(f"✅ Loaded checkpoint from epoch {checkpoint['epoch']} with val loss {history['val_loss']:.4f}")
    return start_epoch,history,patience_counter

In [None]:
def train_one_epoch(device, epoch, num_of_epochs, train_loader, model, criterion, optimizer, clip_value):
    train_loss = 0
    train_correct = 0
    train_total = 0
    
    model.train()
    for batch_idx, (images, labels) in tqdm(enumerate(train_loader),
                                                total=len(train_loader), desc=f"Epoch {epoch} [Train]"):
        images = images.to(device)
        labels = labels.to(device)

        predictions = model(images)
        loss = criterion(predictions, labels)

        optimizer.zero_grad()

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_value)

        optimizer.step()

        train_loss += loss.item()
        train_correct += (predictions.argmax(1) == labels).sum().item()
        train_total += labels.size(0)

        if (batch_idx ) % 10 == 0:
            tqdm.write(f"[Train] Epoch [{epoch}/{num_of_epochs}], Step [{batch_idx+1}/{len(train_loader)}], Loss: {loss.item():.4f}")

    #average loss in an epoch     
    avg_train_loss = train_loss / len(train_loader)
    train_accuracy = train_correct / train_total

    return avg_train_loss, train_accuracy


In [None]:
def evaluate_one_epoch(device, epoch, num_of_epochs, val_loader, model, criterion):
    model.eval()
    val_loss = 0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for batch_idx, (images, labels) in tqdm(enumerate(val_loader),
                                                    total=len(val_loader), desc=f"Epoch {epoch} [VAL]"):
            images = images.to(device)
            labels = labels.to(device)

            predictions = model(images)
            loss = criterion(predictions, labels)

            val_loss += loss.item()
            val_correct += (predictions.argmax(1) == labels).sum().item()
            val_total += labels.size(0)

            if (batch_idx ) % 10 == 0:
                tqdm.write(f"[Val] Epoch [{epoch}/{num_of_epochs}], Step [{batch_idx+1}/{len(val_loader)}], Loss: {loss.item():.4f}")

    #average loss in an epoch     
    avg_val_loss = val_loss / len(val_loader)
    val_accuracy = val_correct / val_total

    return avg_val_loss, val_accuracy

In [None]:
def train_model(model, train_loader,val_loader, criterion, optimizer,scheduler,num_of_epochs,
                 device, clip_value=10,checkpoint_path=r"../checkpoints/current_model.pth",
                  best_model_path=r"../checkpoints/best_model.pth", resume=False ):
    
    start_epoch = 1
    best_val_loss = float('inf')

    history = {
        "train_loss": [], "val_loss": [],
        "train_acc": [], "val_acc": []
    }

    early_stop_patience = 8
    patience_counter = 0

# Resume from checkpoint if available
    if resume and os.path.exists(checkpoint_path):
        start_epoch,history, patience_counter = load_checkpoint(device, model, optimizer, scheduler, checkpoint_path)

    for epoch in range(start_epoch, num_of_epochs+1):

        avg_train_loss, train_accuracy = train_one_epoch(device, epoch, num_of_epochs, train_loader, model, criterion, optimizer, clip_value)
        avg_val_loss, val_accuracy = evaluate_one_epoch(device, epoch, num_of_epochs, val_loader, model, criterion)
        
        scheduler.step(avg_val_loss)

        history["train_loss"].append(avg_train_loss)
        history["val_loss"].append(avg_val_loss)
        history["train_acc"].append(train_accuracy)
        history["val_acc"].append(val_accuracy)

        save_checkpoint(epoch, model, optimizer, scheduler, history, patience_counter, checkpoint_path)


        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            patience_counter = 0
            model.eval()
            save_checkpoint(epoch, model, optimizer, scheduler, history, patience_counter, best_model_path)
            print("🌟 New best model saved.")
        else:
            patience_counter += 1
            if patience_counter > early_stop_patience:
                print(f"⏹ Early stopping at epoch {epoch}. No improvement for {early_stop_patience} epochs.")
                break
        
        log_one_epoch_summary(epoch, num_of_epochs, avg_train_loss, train_accuracy,avg_val_loss, val_accuracy)

    return history


In [None]:
history = train_model(model, train_loader,val_loader, criterion, optimizer,scheduler,num_of_epochs,
                 device, clip_value=10,checkpoint_path=checkpoint_path,best_model_path=best_model_path, resume=False)

In [None]:
def log_experiment(log_file, hyperparams, metrics):
    log_entry = {
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "hyperparameters": hyperparams,
        "metrics": metrics
    }

    if os.path.exists(log_file):
        with open(log_file, "r") as f:
            logs = json.load(f)
    else:
        logs = []

    logs.append(log_entry)

    with open(log_file, "w") as f:
        json.dump(logs, f, indent=4)

In [None]:
json_log_file = "../outputs/experiment_log.json"
csv_log_file = "../outputs/experiment_log.csv"

hyperparams = {
    "model": "AlexNet",
    "optimizer": "Adam",
    "learning_rate": learning_rate,
    "batch_size": batch_size,
    "epochs": num_of_epochs,
    "scheduler": "ReduceLROnPlateau",
    "clip_value": 10,
    "early_stop_patience": 8,
    "transform": "Resize(256)->Crop->Flip->Norm",
    "Weight Initialization": None,
}

final_metrics = {
    "final_train_loss": history["train_loss"][-1],
    "final_val_loss": history["val_loss"][-1],
    "final_train_acc": history["train_acc"][-1],
    "final_val_acc": history["val_acc"][-1]
}

log_experiment(json_log_file, hyperparams, final_metrics)

In [None]:
def convert_json_to_csv(json_path, csv_path):
    with open(json_path, 'r') as f:
        logs = json.load(f)

    # Flatten entries (combine hyperparameters and metrics)
    flattened_logs = []
    for entry in logs:
        flat = {
            "timestamp": entry["timestamp"],
            **entry["hyperparameters"],
            **entry["metrics"]
        }
        flattened_logs.append(flat)

    df = pd.DataFrame(flattened_logs)
    df.to_csv(csv_path, index=False)
    print(f"✅ Log converted to CSV: {csv_path}")

convert_json_to_csv(json_log_file, csv_log_file)