In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch.utils.data import Subset, DataLoader
from torch.cuda.amp import autocast, GradScaler
import numpy as np
import matplotlib.pyplot as plt

device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
# Load CIFAR-10 datasets

from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch.utils.data import Subset, DataLoader

torch.manual_seed(100)


train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010)),
])

eval_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010)),
])


full_len = 50000
indices = torch.randperm(full_len)
train_indices = indices[:50000]
val_indices   = indices[40000:]


train_full = CIFAR10(root='./data', train=True,  download=True, transform=train_transform)
val_full   = CIFAR10(root='./data', train=True,  download=True, transform=eval_transform)

train_dataset = Subset(train_full, train_indices)
val_dataset   = Subset(val_full,   val_indices)


test_dataset  = CIFAR10(root='./data', train=False, download=True, transform=eval_transform)


use_cuda = torch.cuda.is_available()
common_kwargs = dict(num_workers=8, pin_memory=use_cuda)


train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True,  **common_kwargs)
val_loader   = DataLoader(val_dataset,   batch_size=1000, shuffle=False, **common_kwargs)
test_loader  = DataLoader(test_dataset,  batch_size=10000, shuffle=False, **common_kwargs)

print(f"Train size: {len(train_dataset)}, Val size: {len(val_dataset)}, Test size: {len(test_dataset)}")



In [None]:
class ResNet18_CIFAR(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.model = models.resnet18(weights=None)
        self.model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.model.maxpool = nn.Identity()
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)

    def forward(self, x):
        return self.model(x)

In [None]:
def cross_entropy_loss_with_tau(logits, targets, tau=1.0):
    logits_scaled = logits / tau
    log_probs = F.log_softmax(logits_scaled, dim=1)
    return F.nll_loss(log_probs, targets)

In [None]:
def evaluate_accuracy(model, loader, device):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            pred = model(x).argmax(dim=1)
            correct += pred.eq(y).sum().item()
            total += y.size(0)
    return correct / total


In [None]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
print(device)
model = ResNet18_CIFAR().to(device)
# optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=501)
optimizer = torch.optim.SGD(model.parameters(),
                            lr=0.1,          # 
                            momentum=0.9,    # 
                            weight_decay=5e-4)  # 

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
scaler = GradScaler()

em_epochs = 201
train_losses = []
val_accuracies = []

for epoch in range(em_epochs):
    model.train()
    total_loss = 0

    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        with autocast():
            output = model(x)
            loss = cross_entropy_loss_with_tau(output, y, tau=1.0)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()

    scheduler.step()
    avg_loss = total_loss / len(train_loader)
    val_acc = evaluate_accuracy(model, val_loader, device)
    train_losses.append(avg_loss)
    val_accuracies.append(val_acc)

    # 
    if epoch % 10 == 0 or epoch == em_epochs - 1:
        print(f"Epoch {epoch}: Loss = {avg_loss:.10f}, Val Acc = {val_acc:.10f}")



In [None]:

test_acc = evaluate_accuracy(model, test_loader, device)
print(f"\n Test Accuracy: {test_acc:.6f}")



import numpy as np

# list or numpy array
train_losses = np.array(train_losses)
val_accuracies = np.array(val_accuracies)
# freqs = np.array(freqs)

# 
np.savetxt("MLP_train_losses.txt", train_losses, fmt="%.10f")

# 
np.savetxt("MLP_val_accuracies.txt", val_accuracies, fmt="%.10f")

