## CONVNEXT test comparison vs Cspdarknet53, Convnext, Resnet18
----------------------------


## loading dataset


In [6]:
import torchvision.datasets as datasets
from torchvision.datasets import Food101
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Use the ImageNet normalization required by all three models
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]
BATCH_SIZE = 64

# Define the transforms
transform = transforms.Compose([
    transforms.Resize(224), # Resize to 224x224 
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)
])

# Load and automatically download the dataset
train_dataset = datasets.CIFAR10(
    root='./data', 
    train=True, 
    download=True, 
    transform=transform
)
test_dataset = datasets.CIFAR10(
    root='./data', 
    train=False, 
    download=True, 
    transform=transform
)

# Create the DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import time
from tqdm import tqdm #

NUM_CLASSES = 100 
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(DEVICE)

def train_on_epoch(model, dataloader, criterion, optimizer, device):
    model.train() 
    running_loss = 0.0    
    for inputs, labels in tqdm(dataloader, desc="Training"):        
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)

    epoch_loss = running_loss / len(dataloader.dataset)
    print(f"Training Loss: {epoch_loss:.4f}")
    return epoch_loss

cuda:0


In [8]:

def test_accuracy(model, dataloader, device):
    model.eval()
    correct = 0
    total = 0
    
    # Disable gradient tracking for efficiency during inference
    with torch.no_grad(): 
        for inputs, labels in tqdm(dataloader, desc="Testing"):
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            # Forward pass
            outputs = model(inputs)
            
            # Get the predicted class index (max score)
            _, predicted = torch.max(outputs.data, 1)
            
            # Update counts
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

In [9]:
def save_model_weights(model, model_name):
    """Saves the model's state dictionary to a file."""
    filename = f"{model_name.lower().replace('_', '-')}.pth"
    torch.save(model.state_dict(), filename)
    print(f"--- Weights saved to {filename} ---")

In [10]:
from torchvision.models import convnext_tiny, ConvNeXt_Tiny_Weights

model = convnext_tiny(weights=ConvNeXt_Tiny_Weights.IMAGENET1K_V1) 
LEARNING_RATE = 4e-3            
WEIGHT_DECAY = 5e-2
EPOCHS = 10

# num_ftrs = model.classifier[-1].in_features
# model.classifier[-1] = nn.Linear(num_ftrs, NUM_CLASSES) 
# model.to(DEVICE)
# optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
# criterion = nn.CrossEntropyLoss()
# history = {'train_loss': [], 'test_acc': []}
# best_acc = 0.0

# for epoch in range(1, EPOCHS + 1):
#     train_loss = train_on_epoch(model, train_loader, criterion, optimizer, DEVICE)
#     test_acc = test_accuracy(model, test_loader, DEVICE)
#     history['train_loss'].append(train_loss)
#     history['test_acc'].append(test_acc)

#     print(f"Epoch {epoch}/{EPOCHS} | Loss: {train_loss:.4f} | Acc: {test_acc:.2f}%")

# print(f"\nTraining complete. Best Accuracy for {best_acc:.2f}%")
# saved_model_name = "ConvNeXt_Tiny_CIFAR100"
# save_model_weights(model, saved_model_name)

In [11]:
import timm
model = timm.create_model(
    'cspdarknet53', 
    pretrained=False, 
    num_classes=NUM_CLASSES, 
    in_chans=3 # Standard 3 color channels
)
model.to(DEVICE)
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
criterion = nn.CrossEntropyLoss()
history = {'train_loss': [], 'test_acc': []}
best_acc = 0.0

for epoch in range(1, EPOCHS + 1):
    train_loss = train_on_epoch(model, train_loader, criterion, optimizer, DEVICE)
    test_acc = test_accuracy(model, test_loader, DEVICE)
    history['train_loss'].append(train_loss)
    history['test_acc'].append(test_acc)

    print(f"Epoch {epoch}/{EPOCHS} | Loss: {train_loss:.4f} | Acc: {test_acc:.2f}%")

print(f"\nTraining complete. Best Accuracy for {best_acc:.2f}%")
saved_model_name = "CSPDarkNet53_CIFAR100"
save_model_weights(model, saved_model_name)

  from .autonotebook import tqdm as notebook_tqdm
Training:   0%|          | 0/782 [00:00<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 26.00 MiB. GPU 0 has a total capacity of 5.80 GiB of which 24.75 MiB is free. Including non-PyTorch memory, this process has 5.76 GiB memory in use. Of the allocated memory 5.61 GiB is allocated by PyTorch, and 32.25 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
from torchvision.models import resnet18
model = resnet18(weights=None)

num_ftrs = model.classifier[-1].in_features
model.classifier[-1] = nn.Linear(num_ftrs, NUM_CLASSES) 
model.to(DEVICE)
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
criterion = nn.CrossEntropyLoss()
history = {'train_loss': [], 'test_acc': []}
best_acc = 0.0

for epoch in range(1, EPOCHS + 1):
    train_loss = train_on_epoch(model, train_loader, criterion, optimizer, DEVICE)
    test_acc = test_accuracy(model, test_loader, DEVICE)
    history['train_loss'].append(train_loss)
    history['test_acc'].append(test_acc)

    print(f"Epoch {epoch}/{EPOCHS} | Loss: {train_loss:.4f} | Acc: {test_acc:.2f}%")

print(f"\nTraining complete. Best Accuracy for {best_acc:.2f}%")
saved_model_name = "ResNet18_CIFAR100"
save_model_weights(model, saved_model_name)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def draw_comparison_graphs(all_results):

    if not all_results:
        print("Error: The 'all_results' dictionary is empty. Please ensure all models finished training.")
        return

    names = list(all_results.keys())
    accuracies = [data['best_accuracy'] for data in all_results.values()]
    parameters = [data['parameters_m'] for data in all_results.values()]

    # --- Plot 1: Bar Chart (Accuracy vs. Parameters) ---
    
    x = np.arange(len(names))
    width = 0.35

    fig, ax1 = plt.subplots(figsize=(10, 6))

    # Accuracy Bar Chart (Primary Axis)
    rects1 = ax1.bar(x - width/2, accuracies, width, label='Best Test Accuracy (%)', color='mediumblue')
    ax1.set_ylabel('Accuracy (%)', color='mediumblue')
    ax1.set_ylim(min(accuracies)*0.9, max(accuracies)*1.05) 
    ax1.tick_params(axis='y', labelcolor='mediumblue')

    # Parameters Bar Chart (Secondary Axis)
    ax2 = ax1.twinx()
    rects2 = ax2.bar(x + width/2, parameters, width, label='Parameters (M)', color='darkorange')
    ax2.set_ylabel('Parameters (M)', color='darkorange')
    ax2.tick_params(axis='y', labelcolor='darkorange')
    
    # Labeling and Titles
    ax1.set_xticks(x)
    ax1.set_xticklabels(names)
    ax1.set_title(f'Model Comparison: Accuracy vs. Parameters')
    
    # Combine Legends
    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines + lines2, labels + labels2, loc='upper left')

    plt.grid(axis='y', linestyle='--')
    plt.show()

    # --- Plot 2: Learning Curves (Accuracy Progression) ---
    
    plt.figure(figsize=(10, 6))

    for name in names:
        history = all_results[name]['history']
        # Ensure histories are of the same length if necessary, though Python handles it
        plt.plot(history['test_acc'], label=f'{name} (Best: {all_results[name]["best_accuracy"]:.2f}%)')

    plt.title('Test Accuracy Progression Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Test Accuracy (%)')
    plt.legend()
    plt.grid(True)
    plt.show()

# --- EXECUTION ---
# You must ensure the all_results dictionary is defined and populated in previous cells.
# Example: 
# all_results = {
#     'ResNet18': {'best_accuracy': 80.5, 'parameters_m': 11.7, 'history': {'test_acc': [10, 20, 30, ..., 80.5]}},
#     'CSPDarknet53': {'best_accuracy': 85.1, 'parameters_m': 27.6, 'history': {'test_acc': [15, 25, 35, ..., 85.1]}},
#     'ConvNeXt_Tiny': {'best_accuracy': 87.2, 'parameters_m': 28.6, 'history': {'test_acc': [12, 22, 32, ..., 87.2]}}
# }
# For demonstration purposes, I will execute a sample plot using dummy data:

# Creating dummy data simulating the results from your training loops:
dummy_all_results = {
    'ResNet18': {'best_accuracy': 45.2, 'parameters_m': 11.7, 'history': {'test_acc': [10.5, 25.1, 32.8, 38.9, 42.5, 45.2, 44.9, 45.1, 44.0, 43.5]}},
    'CSPDarknet53': {'best_accuracy': 48.7, 'parameters_m': 27.6, 'history': {'test_acc': [12.2, 28.3, 36.5, 42.1, 46.5, 48.7, 47.9, 48.5, 48.0, 47.1]}},
    'ConvNeXt_Tiny': {'best_accuracy': 51.9, 'parameters_m': 28.6, 'history': {'test_acc': [11.0, 24.5, 35.0, 43.1, 48.0, 50.1, 51.5, 51.9, 51.0, 50.8]}}
}

draw_comparison_graphs(dummy_all_results)