Torch Model:

In [1]:
from torchvision import transforms
import torch
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4471], std=[0.2023, 0.1994, 0.2010]) # CIFAR-10 stats
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4471], std=[0.2023, 0.1994, 0.2010]) # CIFAR-10 stats
])

In [2]:
# load cifar10 for torch
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader

train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform_test)

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [3]:
from torch import nn
# using distillation
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = self.relu(out)
        return out

    
class ResNet6(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet6, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.layer1 = ResidualBlock(16, 16, stride=1)  # 32x32 → 32x32
        self.layer2 = ResidualBlock(16, 32, stride=2)  # 32x32 → 16x16
        self.relu = nn.ReLU()
        self.fc = nn.Linear(32 * 16 * 16, num_classes)

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        return self.fc(out)
    

In [4]:
from torch.optim import Adam
criterion = nn.CrossEntropyLoss()

def train(model, dataloader, device, ):
    for batch_idx, (data, target) in enumerate(dataloader):
        model.train()
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        if batch_idx % 10 == 0:
            print(f"\rBatch: { batch_idx}/{len(dataloader)}, Loss: {loss.item():.4f}                           ", end = '')
    train_loss, train_acc = test(model, dataloader, device)
    return train_loss, train_acc

def test(model, dataloader, device):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in dataloader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            
            test_loss += criterion(output, target).item()  # sum up batch loss

            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(dataloader.dataset)

    return test_loss, 100. * correct / len(dataloader.dataset)


In [5]:
import time
# Training 


model_gpu = ResNet6(num_classes=10)
model_gpu = model_gpu.to('cuda')
optimizer = Adam(model_gpu.parameters(), lr=0.001)

device = 'cuda'
start_time = time.time()
for epoch in range(1):
    train_loss, train_acc = train(model_gpu, train_dataloader, device)
    test_loss, test_acc = test(model_gpu, test_dataloader, device)

end_time = time.time()

gpu_time_torch = end_time - start_time
print("Time with GPU:", gpu_time_torch)


model_cpu = ResNet6(num_classes=10)
optimizer = Adam(model_gpu.parameters(), lr=0.001)

device = 'cpu'
start_time = time.time()
for epoch in range(1):
    train_loss, train_acc = train(model_cpu, train_dataloader, device)
    test_loss, test_acc = test(model_cpu, test_dataloader, device)
end_time = time.time()

cpu_time_torch = end_time - start_time
print("Time with CPU:", cpu_time_torch)

Batch: 1560/1563, Loss: 1.1564                           Time with GPU: 51.175527572631836
Batch: 1560/1563, Loss: 2.5904                           Time with CPU: 78.76960921287537
