In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

import os

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [2]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=2):
        super(ResidualBlock, self).__init__()

        self.conv_block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=5, stride=stride, padding=2, bias=False), # kernel_size=5, padding=2
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=5, stride=1, padding=2, bias=False), # kernel_size=5, padding=2
            nn.BatchNorm2d(out_channels)
        )

        self.relu = nn.ReLU(inplace=True)

        self.downsample = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
            nn.BatchNorm2d(out_channels)
        ) if stride != 1 or in_channels != out_channels else None

    def forward(self, x):
        residual = x
        out = self.conv_block(x)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out


In [3]:
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.initial = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        self.layer1 = self.make_layer(block, 64, layers[0])
        self.layer2 = self.make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self.make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self.make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        self.dropout = nn.Dropout(0.5)

        self.fc = nn.Linear(512, num_classes)

    def make_layer(self, block, out_channels, blocks, stride=1):
        layers = []
        layers.append(block(self.in_channels, out_channels, stride))
        self.in_channels = out_channels
        for _ in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.initial(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)

        x = self.dropout(x)

        x = self.fc(x)
        return x

In [4]:
def find_max_index(predictions, target):
    predicted_index = predictions.index(max(predictions))
    return predicted_index == target

def train_and_evaluate(model, train_loader, test_loader, criterion, optimizer, scheduler, epochs, save_path):
    for epoch in range(epochs):
        model.train()
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        scheduler.step()

        with torch.no_grad():
            model.eval()
            test_loss = 0.0
            acc = 0
            total = 0

            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                output_list = output.cpu().detach().numpy().tolist()
                target_list = target.cpu().detach().numpy().tolist()

                for i in range(len(data)):
                    if find_max_index(output_list[i], target_list[i]):
                        acc += 1

                total += len(data)
                test_loss += criterion(output, target).item()

            average_loss = test_loss / len(test_loader)
            accuracy = acc / total

            print(f'Epoch: {epoch+1} | Train loss: {loss.item():.4f} | Test loss: {average_loss:.4f} | Accuracy: {accuracy:.4f}')

            # Save model
            state = {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }
            torch.save(state, os.path.join(save_path, f'model_epoch_{epoch+1}.pth'))


In [5]:
# Hyperparameters
batch = 64
lr = 0.001
epochs = 1000

save_path = './saved_models'
os.makedirs(save_path, exist_ok=True)

In [6]:
# Data loading and preprocessing
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),

    transforms.ToTensor(),
    # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch, shuffle=True)

test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch, shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 59388573.81it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
# Model, loss function, and optimizer
model = ResNet(ResidualBlock, [3, 4, 6, 3]).to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

# Training and evaluation
train_and_evaluate(model, train_loader, test_loader, criterion, optimizer, scheduler, epochs, save_path)

  return F.conv2d(input, weight, bias, self.stride,


Epoch: 1 | Train loss: 1.9255 | Test loss: 1.8910 | Accuracy: 0.3018
Epoch: 2 | Train loss: 1.6203 | Test loss: 1.7043 | Accuracy: 0.3727
Epoch: 3 | Train loss: 1.5068 | Test loss: 1.6221 | Accuracy: 0.4007
Epoch: 4 | Train loss: 1.6157 | Test loss: 1.5443 | Accuracy: 0.4202
Epoch: 5 | Train loss: 1.4284 | Test loss: 1.4807 | Accuracy: 0.4503
Epoch: 6 | Train loss: 1.7260 | Test loss: 1.4578 | Accuracy: 0.4675
Epoch: 7 | Train loss: 1.6158 | Test loss: 1.3695 | Accuracy: 0.5148
Epoch: 8 | Train loss: 1.6283 | Test loss: 1.3484 | Accuracy: 0.5062
Epoch: 9 | Train loss: 1.7086 | Test loss: 1.3140 | Accuracy: 0.5238
Epoch: 10 | Train loss: 1.8252 | Test loss: 1.4260 | Accuracy: 0.4833
Epoch: 11 | Train loss: 1.4153 | Test loss: 1.2323 | Accuracy: 0.5628
Epoch: 12 | Train loss: 1.6437 | Test loss: 1.1952 | Accuracy: 0.5794
Epoch: 13 | Train loss: 1.4820 | Test loss: 1.1207 | Accuracy: 0.5991
Epoch: 14 | Train loss: 1.3104 | Test loss: 1.1287 | Accuracy: 0.5993
Epoch: 15 | Train loss: 1.520