In [2]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from tqdm import tqdm
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 1. Data Loading and Preprocessing
def get_tinyimagenet_data(root='./tiny-imagenet-200'):
    train_transform = transforms.Compose([
        transforms.Resize(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    val_transform = transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    train_dataset = datasets.ImageFolder(root+'/train', transform=train_transform)
    val_dataset = datasets.ImageFolder(root+'/val', transform=val_transform)
    
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)
    
    return train_loader, val_loader

# 2. Model Setup
def get_model():
    model = torchvision.models.resnet18(pretrained=True)
    
    # Replace final layer
    model.fc = nn.Linear(model.fc.in_features, 200)
    
    # Verify gradient requirements
    for name, param in model.named_parameters():
        if 'fc' in name:
            param.requires_grad = True
        else:
            param.requires_grad = False
    
    return model.to(device)

# 3. Training Function
def train_model(model, train_loader, val_loader, epochs=10):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), 
                         lr=0.01, momentum=0.9)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    
    best_acc = 0.0
    
    for epoch in range(epochs):
        model.train()
        train_loss, correct, total = 0.0, 0, 0
        
        for images, labels in tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs}'):
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
        
        train_acc = 100. * correct / total
        val_acc = evaluate_model(model, val_loader)
        
        print(f"Epoch {epoch+1}: "
              f"Train Loss: {train_loss/len(train_loader):.4f}, "
              f"Train Acc: {train_acc:.2f}%, "
              f"Val Acc: {val_acc:.2f}%")
        
        scheduler.step()
        
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), 'best_model.pth')
    
    print(f"\nBest Validation Accuracy: {best_acc:.2f}%")

# 4. Evaluation Function
def evaluate_model(model, dataloader):
    model.eval()
    correct, total = 0, 0
    
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    return 100 * correct / total

# 5. Main Execution
if __name__ == "__main__":
    # Load data
    train_loader, val_loader = get_tinyimagenet_data()
    
    # Initialize model
    model = get_model()
    print("Model ready - training only fc layer")
    
    # Train
    print("\nStarting training...")
    train_model(model, train_loader, val_loader, epochs=10)
    
    # Final evaluation
    print("\nEvaluating best model...")
    model.load_state_dict(torch.load('best_model.pth'))
    val_acc = evaluate_model(model, val_loader)
    print(f"\nFinal Validation Accuracy: {val_acc:.2f}%")

Model ready - training only fc layer

Starting training...


Epoch 1/10: 100%|██████████| 1563/1563 [00:59<00:00, 26.26it/s]


Epoch 1: Train Loss: 2.3313, Train Acc: 47.46%, Val Acc: 0.75%


Epoch 2/10: 100%|██████████| 1563/1563 [00:41<00:00, 37.62it/s]


Epoch 2: Train Loss: 1.7119, Train Acc: 57.56%, Val Acc: 0.57%


Epoch 3/10: 100%|██████████| 1563/1563 [00:50<00:00, 30.66it/s]


Epoch 3: Train Loss: 1.6235, Train Acc: 59.39%, Val Acc: 0.57%


Epoch 4/10: 100%|██████████| 1563/1563 [00:40<00:00, 38.41it/s]


Epoch 4: Train Loss: 1.5731, Train Acc: 60.41%, Val Acc: 0.62%


Epoch 5/10:  50%|█████     | 788/1563 [00:20<00:20, 37.62it/s]


KeyboardInterrupt: 