In [1]:
import torch
from torch import optim
import torchvision
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
from torch.utils.data import random_split, DataLoader
import torch.nn as nn
import torch.nn.functional as F
from models.basic import BasicModel, SimpleCIFARModel_LeakyReLU, SimpleCIFARModel, DeepCIFARModel
import os
import pickle


transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=0)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=0)

train_size = int(0.8 * len(trainset))
val_size = len(trainset) - train_size
train_subset, val_subset = random_split(trainset, [train_size, val_size])

trainloader = DataLoader(train_subset, batch_size=64, shuffle=True)
valloader = DataLoader(val_subset, batch_size=64, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [2]:
def train(model, optimizer, criterion, epochs=15, save_dir="results/"):
    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []

    os.makedirs(save_dir, exist_ok=True)
    best_model_path = os.path.join(save_dir, "best_model.pth")
    best_val_acc = 0.0

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        correct = 0
        total = 0

        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
        
        train_loss = train_loss / len(trainloader)
        train_losses.append(train_loss)
        train_acc = correct / total
        train_accs.append(train_acc)

        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in valloader:
                images, labels = images.to(device), labels.to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss  /= len(valloader)
        val_losses.append(val_loss)

        val_acc = correct / total
        val_accs.append(val_acc)

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), best_model_path)

        print(f"Epoch [{epoch+1}/{epochs}], Train Accuracy: {train_acc:.4f}, Val Accuracy: {val_acc:.4f}")

    results = {
        "train_losses": train_losses,
        "val_losses": val_losses,
        "train_accs": train_accs,
        "val_accs": val_accs
    }

    with open(os.path.join(save_dir, "training_results.pkl"), "wb") as f:
        pickle.dump(results, f)
    
    return train_losses, val_losses, train_accs, val_accs

def evaluate(model):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f"Test Accuracy: {accuracy * 100:.2f}%")

    return accuracy

In [3]:
loss_fns = [
    nn.CrossEntropyLoss(),
    nn.CrossEntropyLoss(label_smoothing=0.1),
]

optims = [
    optim.SGD,
    optim.Adam,
    optim.AdamW
]


In [17]:
model5 = DeepCIFARModel().to(device)
criterion = loss_fns[0]
optimizer = optims[1](model5.parameters(), lr=1e-3, weight_decay=1e-4)

train(model5, optimizer, criterion, 15, "results/DeepModel-Adam")

evaluate(model5)

Epoch [1/15], Train Accuracy: 0.5393, Val Accuracy: 0.6471
Epoch [2/15], Train Accuracy: 0.7041, Val Accuracy: 0.7325
Epoch [3/15], Train Accuracy: 0.7579, Val Accuracy: 0.7731
Epoch [4/15], Train Accuracy: 0.7858, Val Accuracy: 0.7954
Epoch [5/15], Train Accuracy: 0.8128, Val Accuracy: 0.8045
Epoch [6/15], Train Accuracy: 0.8280, Val Accuracy: 0.8196
Epoch [7/15], Train Accuracy: 0.8457, Val Accuracy: 0.8332
Epoch [8/15], Train Accuracy: 0.8545, Val Accuracy: 0.8321
Epoch [9/15], Train Accuracy: 0.8668, Val Accuracy: 0.8236
Epoch [10/15], Train Accuracy: 0.8770, Val Accuracy: 0.8335
Epoch [11/15], Train Accuracy: 0.8841, Val Accuracy: 0.8423
Epoch [12/15], Train Accuracy: 0.8904, Val Accuracy: 0.8522
Epoch [13/15], Train Accuracy: 0.8974, Val Accuracy: 0.8634
Epoch [14/15], Train Accuracy: 0.9059, Val Accuracy: 0.8517
Epoch [15/15], Train Accuracy: 0.9085, Val Accuracy: 0.8578
Test Accuracy: 85.05%


0.8505

In [None]:
model = DeepCIFARModel().to(device)
save_path = "results/DeepModel-Adam/best_model.pth"
model.load_state_dict(torch.load(save_path))

evaluate(model)