In [70]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [71]:
import torch 
import torchvision
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F

In [127]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 64

# Training transforms with augmentation
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),  # 50% chance to flip
    transforms.RandomCrop(32, padding=4),  # Pad then randomly crop
    transforms.ColorJitter(brightness=0.2, contrast=0.2),  # Random color adjustments
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))  # CIFAR-10 mean/std
])

# Test transforms (no augmentation)
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

training_data = torchvision.datasets.CIFAR10(
    root='./data',
    train=True,
    transform=transform_train,  # Use augmentation for training
    download=True
)

testing_data = torchvision.datasets.CIFAR10(
    root='./data',
    train=False,
    transform=transform_test  # No augmentation for testing
)

training_loader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
testing_loader = DataLoader(testing_data, batch_size=batch_size, shuffle=False)

x, y = next(iter(training_loader))
print(x.shape)
print(y.shape)
print(y)

print(len(testing_loader))

torch.Size([64, 3, 32, 32])
torch.Size([64])
tensor([1, 7, 6, 4, 8, 5, 6, 5, 3, 5, 4, 1, 7, 5, 7, 9, 6, 0, 7, 2, 9, 9, 7, 9,
        5, 6, 4, 2, 1, 8, 0, 1, 8, 8, 2, 5, 8, 4, 6, 8, 0, 4, 4, 5, 5, 2, 5, 3,
        4, 9, 9, 3, 1, 6, 3, 1, 9, 8, 4, 3, 3, 9, 0, 0])
157


In [134]:
class Classifier(nn.Module):
  def __init__(self, H, W, output_size):
    super().__init__()
    self.layer = nn.Sequential(
        nn.Conv2d(3, 48, 3, padding=1),
        nn.Conv2d(48, 48, 3, padding=1),
        nn.BatchNorm2d(48),
        nn.LeakyReLU(),
        nn.MaxPool2d(2),
        nn.Conv2d(48, 64, 3, padding=1),
        nn.BatchNorm2d(64),
        nn.LeakyReLU(),
        nn.MaxPool2d(2),
        nn.Dropout2d(0.1),
        nn.Conv2d(64, 128, 3, padding=1),
        nn.BatchNorm2d(128),
        nn.LeakyReLU(),
        nn.MaxPool2d(2),
        nn.Dropout(0.2),
        nn.Flatten(),
        nn.Dropout(0.3),
        nn.Linear(128 * H//8 * W//8, 100),
        nn.LeakyReLU(),
        nn.Dropout(0.5),
        nn.Linear(100, output_size),
    )

  def forward(self, x):
    out = self.layer(x)
    return out

model = Classifier(32, 32, 10).to(device)

In [135]:
n_epochs = 50
learning_rate = 0.001
patience = 5  # Number of epochs to wait before early stopping

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.0001)

best_val_loss = float('inf')
patience_counter = 0
best_model_state = None

for epoch in range(n_epochs):
    # Training phase
    model.train()
    train_loss = 0.0
    for i, (x, y) in enumerate(training_loader):
        x, y = x.to(device), y.to(device)
        
        optimizer.zero_grad()
        logits = model(x)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    
    # Validation phase
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for x, y in testing_loader:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            loss = criterion(logits, y)
            val_loss += loss.item()
    
    avg_train_loss = train_loss / len(training_loader)
    avg_val_loss = val_loss / len(testing_loader)
    
    print(f'Epoch {epoch+1}: Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')
    
    # Early stopping check
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        patience_counter = 0
        best_model_state = model.state_dict().copy()
    else:
        patience_counter += 1
        
    if patience_counter >= patience:
        print('Early stopping!')
        break

# Load best model
model.load_state_dict(best_model_state)

Epoch 1: Train Loss: 1.7500, Val Loss: 1.3373
Epoch 2: Train Loss: 1.4223, Val Loss: 1.0653
Epoch 3: Train Loss: 1.2661, Val Loss: 0.9675
Epoch 4: Train Loss: 1.1661, Val Loss: 0.8629
Epoch 5: Train Loss: 1.0949, Val Loss: 0.8529
Epoch 6: Train Loss: 1.0285, Val Loss: 0.7720
Epoch 7: Train Loss: 0.9831, Val Loss: 0.7348
Epoch 8: Train Loss: 0.9465, Val Loss: 0.7102
Epoch 9: Train Loss: 0.9108, Val Loss: 0.6734
Epoch 10: Train Loss: 0.8828, Val Loss: 0.6565
Epoch 11: Train Loss: 0.8521, Val Loss: 0.6385
Epoch 12: Train Loss: 0.8367, Val Loss: 0.6564
Epoch 13: Train Loss: 0.8142, Val Loss: 0.6181
Epoch 14: Train Loss: 0.8074, Val Loss: 0.5902
Epoch 15: Train Loss: 0.7867, Val Loss: 0.5892
Epoch 16: Train Loss: 0.7747, Val Loss: 0.5724
Epoch 17: Train Loss: 0.7625, Val Loss: 0.5646
Epoch 18: Train Loss: 0.7505, Val Loss: 0.5824
Epoch 19: Train Loss: 0.7380, Val Loss: 0.5495
Epoch 20: Train Loss: 0.7302, Val Loss: 0.5426
Epoch 21: Train Loss: 0.7234, Val Loss: 0.5267
Epoch 22: Train Loss: 

<All keys matched successfully>

In [137]:
def evaluate_accuracy(model, data_loader, device):
    model.eval()  # Set to evaluation mode
    correct = 0
    total = 0
    
    with torch.no_grad():  # No need to track gradients
        for x, y in data_loader:
            x = x.to(device)
            y = y.to(device)
            
            outputs = model(x)
            _, predicted = torch.max(outputs.data, 1)  # Get predicted class
            
            total += y.size(0)
            correct += (predicted == y).sum().item()
    
    return 100 * correct / total  # Return accuracy percentage

# Check accuracy on training set
train_accuracy = evaluate_accuracy(model, training_loader, device)
print(f'Training Accuracy: {train_accuracy:.2f}%')

# Check accuracy on test set
test_accuracy = evaluate_accuracy(model, testing_loader, device)
print(f'Test Accuracy: {test_accuracy:.2f}%')

Training Accuracy: 84.99%
Test Accuracy: 84.51%
