In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as f
import matplotlib as plt
import time

from model import CustomResNet, ConvBlock
from torchvision.models.resnet import BasicBlock, Bottleneck

from torchvision.datasets import CIFAR10, CIFAR100
from torchvision.transforms import ToTensor, Compose, RandomCrop, RandomHorizontalFlip, Normalize
from torch.utils.data import DataLoader, random_split
from torch.optim import SGD, lr_scheduler
from tqdm import tqdm

dataset_name = 'cifar10'
student_name = 'resnet56_backprop'

device = 'cuda' if torch.cuda.is_available() else 'cpu'
batch_size = 128
epoch = 360
gamma = 0.2
milestones = [60, 120, 180, 240, 300]

In [8]:
transform = Compose([
    ToTensor(),
    RandomCrop(size=[32, 32], padding=4),
    RandomHorizontalFlip(p=0.5),
    Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010))
])

transform_test = Compose([
    ToTensor(),
    Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010))
])

train_dataset = CIFAR10(root='./data', train=True, transform=transform, download=True)
test_dataset = CIFAR10(root='./data', train=False, transform=transform_test, download=True)

# DataLoader 정의
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

# 데이터셋 확인
print(f'Train dataset size: {len(train_dataset)}')
print(f'Validation dataset size: {len(test_dataset)}')

Files already downloaded and verified
Files already downloaded and verified
Train dataset size: 50000
Validation dataset size: 10000


In [9]:
model = CustomResNet(block=ConvBlock,
                   layers=[9, 9, 9],
                   num_classes=10).to(device)

total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters: {total_params}')
print(model)

Total number of parameters: 415546
CustomResNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layers): Sequential(
    (0): ConvBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (1): ConvBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (2): ConvBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplac

In [10]:
optimizer = SGD(params=model.parameters(), lr=0.1, nesterov=True, momentum=0.9, weight_decay=0.0001)
scheduler = lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=milestones, gamma=gamma)
criterion = nn.CrossEntropyLoss()

In [11]:
history = dict(train_loss=[], test_acc=[], train_time=[], test_time=[])
for ep in range(epoch):
    # train step
    train_loss = 0.0
    model.train()
    s_time = time.time()
    for image, target in train_loader:
        image = image.to(device)
        target = f.one_hot(target, 10).float().to(device)

        pred = model(image)
        loss = criterion(pred, target)
        train_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    e_time = time.time()
    history['train_loss'].append(train_loss/len(train_loader))
    history['train_time'].append(e_time - s_time)

    # test step
    test_acc = 0.0
    model.eval()
    s_time = time.time()
    for image, target in test_loader:
        image = image.to(device)
        target = f.one_hot(target, 10).float().to(device)

        pred = model(image)
        test_acc += torch.sum(torch.argmax(pred, dim=1) == torch.argmax(target, dim=1)).item()
    e_time = time.time()
    history['test_acc'].append(test_acc/len(test_dataset))
    history['test_time'].append(e_time - s_time)
    print(f'epoch={ep:3d}, train_loss={train_loss/len(train_loader):.4f}, test_acc={test_acc/len(test_dataset):.3f}')

    checkpoint = dict(
        model=model.state_dict(),
        optimizer=optimizer.state_dict(),
        history=history,
        epoch=ep
    )
    torch.save(checkpoint, f'./result/{dataset_name}_{student_name}.pt')
    scheduler.step()

epoch=  0, train_loss=1.8141, test_acc=0.388
epoch=  1, train_loss=1.5649, test_acc=0.399
epoch=  2, train_loss=1.3506, test_acc=0.512
epoch=  3, train_loss=1.1581, test_acc=0.539
epoch=  4, train_loss=0.9983, test_acc=0.619
epoch=  5, train_loss=0.8915, test_acc=0.627
epoch=  6, train_loss=0.8168, test_acc=0.654
epoch=  7, train_loss=0.7593, test_acc=0.694
epoch=  8, train_loss=0.7205, test_acc=0.716
epoch=  9, train_loss=0.6818, test_acc=0.743
epoch= 10, train_loss=0.6543, test_acc=0.728
epoch= 11, train_loss=0.6323, test_acc=0.749
epoch= 12, train_loss=0.6049, test_acc=0.734
epoch= 13, train_loss=0.5889, test_acc=0.713
epoch= 14, train_loss=0.5735, test_acc=0.709
epoch= 15, train_loss=0.5605, test_acc=0.776
epoch= 16, train_loss=0.5447, test_acc=0.692
epoch= 17, train_loss=0.5291, test_acc=0.614
epoch= 18, train_loss=0.5231, test_acc=0.763
epoch= 19, train_loss=0.5113, test_acc=0.785
epoch= 20, train_loss=0.5001, test_acc=0.732
epoch= 21, train_loss=0.4912, test_acc=0.795
epoch= 22,

In [12]:
a = [1,2,3,4]
a[:-1]

[1, 2, 3]