In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import time

In [2]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()

        self.features = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=2, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),

            nn.Conv2d(96, 256, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),

            nn.Conv2d(256, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )

        with torch.no_grad():
            dummy_input = torch.zeros(1, 3, 227, 227)
            dummy_output = self.features(dummy_input)
            self.flattened_size = dummy_output.view(1, -1).shape[1]

        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(self.flattened_size, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [3]:
transform = transforms.Compose([
    transforms.Resize((227, 227)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True,num_workers=4,pin_memory=True)

test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False,num_workers=4,pin_memory=True)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

Using device: cuda


In [5]:
model = AlexNet(num_classes=10).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9,weight_decay=0.0005)

In [6]:
def train(model, device, train_loader, criterion, optimizer, epoch, scaler=None):

    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    start_time = time.time()

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()

        if scaler:
            with torch.cuda.amp.autocast():
                output = model(data)
                loss = criterion(output, target)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(output, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

        if batch_idx % 10 == 0:
            elapsed = time.time() - start_time
            batches_done = batch_idx + 1
            batches_total = len(train_loader)
            eta = elapsed / batches_done * (batches_total - batches_done)
            print(f"[Epoch {epoch}] Batch {batch_idx}/{len(train_loader)} "
                  f"Loss: {loss.item():.4f} Acc: {100*correct/total:.2f}% "
                  f"ETA: {eta:.1f}s")

    avg_loss = running_loss / len(train_loader)
    avg_acc = 100 * correct / total
    print(f"====> Epoch {epoch} Finished: Avg Loss: {avg_loss:.4f}, Avg Acc: {avg_acc:.2f}%")
    return avg_loss, avg_acc

In [7]:
def evaluate(model, device, loader):
    model.eval()
    total = 0
    correct = 0
    start_time = time.time()

    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(loader):
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predicted = torch.max(output, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

            if batch_idx % 5 == 0:
                elapsed = time.time() - start_time
                print(f"[Eval] Batch {batch_idx}/{len(loader)} "
                      f"Acc so far: {100*correct/total:.2f}% "
                      f"Time elapsed: {elapsed:.1f}s")

    accuracy = 100 * correct / total
    print(f"====> Evaluation Finished: Total Accuracy: {accuracy:.2f}%")
    return accuracy

In [8]:
scalar = torch.cuda.amp.GradScaler()
epochs = 10

for epoch in range(epochs):
    train_loss,train_accuracy = train(model, device, train_loader, criterion, optimizer, epoch,scaler=scalar)
    test_accuracy = evaluate(model, device, test_loader)

    print(f'Epoch {epoch}: Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, Test Accuracy: {test_accuracy:.2f}%')

  scalar = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():


[Epoch 0] Batch 0/782 Loss: 2.3051 Acc: 1.56% ETA: 17734.4s
[Epoch 0] Batch 10/782 Loss: 2.3055 Acc: 8.52% ETA: 4792.7s


KeyboardInterrupt: 