In [1]:
!pip install torch ptflops



In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torchvision.transforms import ToTensor

from ptflops import get_model_complexity_info

In [3]:
#hyperparameters
learning_rate=0.001
epochs = 10
batch_size = 64

criterion = torch.nn.CrossEntropyLoss()

train_dataset = dsets.FashionMNIST(root='MNIST_data/', train=True, download=True, transform=ToTensor())
test_dataset = dsets.FashionMNIST(root='MNIST_data/', train=False, download=True, transform=ToTensor())

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=2)


In [4]:
# Residual Block 구현
class BottleNeck(nn.Module):
    expansion = 4
    def __init__(self, in_channels, out_channels, stride=1):
        super(BottleNeck, self).__init__()
        self.relu = F.relu
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.conv3 = nn.Conv2d(out_channels, out_channels*self.expansion, kernel_size=1, stride=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels*self.expansion)
        self.identity = nn.Sequential()
        if stride != 1 or in_channels != out_channels*self.expansion:
            self.identity = nn.Sequential(
                nn.Conv2d(in_channels, out_channels*self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels*self.expansion)
            )

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.identity(x)
        out = self.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes):
        super(ResNet, self).__init__()
        self.in_channels = 64
        self.conv = nn.Conv2d(1, self.in_channels, kernel_size=7, stride=2, padding=3, bias=False) #1. 7X7 conv layer
        self.bn = nn.BatchNorm2d(self.in_channels)
        self.relu = F.relu
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) #2. 3X3 maxpooling
        self.layer1 = self.make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self.make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self.make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self.make_layer(block, 512, num_blocks[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512  * block.expansion, num_classes)

    def make_layer(self, block, out_channels, num_blocks, stride):
        layers = []
        strides = [stride] + [1] * (num_blocks - 1)
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.relu(self.bn(self.conv(x)))
        out = self.maxpool(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

In [5]:
class BottleNeck2(nn.Module):
    expansion = 2
    def __init__(self, in_channels, out_channels, cardinality, stride=1):
        super(BottleNeck2, self).__init__()
        self.relu = F.relu
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, groups = cardinality, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.conv3 = nn.Conv2d(out_channels, out_channels*self.expansion, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels*self.expansion)
        self.identity = nn.Sequential()
        if stride != 1 or in_channels != out_channels*self.expansion:
            self.identity = nn.Sequential(
                nn.Conv2d(in_channels, out_channels*self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels*self.expansion)
            )
    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.identity(x)
        out = self.relu(out)
        return out
class ResNeXt(nn.Module):
    def __init__(self, block, num_blocks, cardinality = 32, width = 4, num_classes=10):
        super(ResNeXt, self).__init__()
        self.in_channels = 64
        self.group_conv_width = cardinality * width
        self.relu = F.relu

        self.conv = nn.Conv2d(1, self.in_channels, kernel_size=7, stride=2, padding = 3)
        self.bn = nn.BatchNorm2d(self.in_channels)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self.make_layer(block, cardinality, num_blocks[0], stride=1)
        self.layer2 = self.make_layer(block, cardinality, num_blocks[1], stride=2)
        self.layer3 = self.make_layer(block, cardinality, num_blocks[2], stride=2)
        self.layer4 = self.make_layer(block, cardinality, num_blocks[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.linear = nn.Linear(self.group_conv_width, num_classes)

    def make_layer(self, block, cardinality, num_blocks, stride):
        layers = []
        strides = [stride] + [1] * (num_blocks-1)
        for i in range(num_blocks):
            layers.append(block(self.in_channels, self.group_conv_width, cardinality, strides[i]))
            self.in_channels = block.expansion * self.group_conv_width
        self.group_conv_width *= block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.relu(self.bn(self.conv(x)))
        out = self.maxpool(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [7]:
def Train(model, trainloader):
    for epoch in range(epochs):
      avg_loss = 0
      for data, target in train_loader:
          data = data.cuda()
          target = target.cuda()
          pred = model(data)
          optimizer.zero_grad()
          loss = criterion(pred, target)
          loss.backward()
          optimizer.step()
          avg_loss += loss / len(train_loader)
      print('[Epoch: {:>4}] loss = {:>.9}'.format(epoch + 1, avg_loss))

In [8]:
def Test(model, testloader):
    model.eval()
    top_1 = 0
    top_5 = 0
    total = 0
    with torch.no_grad():
        for data, target in testloader:
            data = data.cuda()
            target = target.cuda()
            test = model(data)
            _, predicted = torch.max(test, 1)
            total += target.size(0)
            top_1 += (predicted == target).sum().item()
            _, predicted_top5 = torch.topk(test, 5, dim=1)
            top_5 += torch.any(predicted_top5 == target.view(-1, 1), dim=1).sum().item()

    top_1 = 100 * top_1 / total
    top_5 = 100 * top_5 / total
    return top_1, top_5

In [13]:
# ResNet-50
modelNet = ResNet(BottleNeck, [3, 4, 6, 3], num_classes=10)
optimizer = torch.optim.Adam(modelNet.parameters(), lr=learning_rate)
modelNet = modelNet.to(device)
Train(modelNet, train_loader)

flops, params = get_model_complexity_info(modelNet, (1, 28, 28),print_per_layer_stat=False)
print(f"FLOPs: {flops}")
print(f"Params: {params}")

top_1, top_5 = Test(modelNet, test_loader)
print("Accuracy Top-1 : {:.2f}%".format(top_1))
print("Accuracy Top-5 : {:.2f}%".format(top_5))

[Epoch:    1] loss = 0.617725849
[Epoch:    2] loss = 0.518900931
[Epoch:    3] loss = 0.346054912
[Epoch:    4] loss = 0.429742575
[Epoch:    5] loss = 0.517321348
[Epoch:    6] loss = 0.555085421
[Epoch:    7] loss = 0.39370656
[Epoch:    8] loss = 0.328702599
[Epoch:    9] loss = 0.2954337
[Epoch:   10] loss = 0.277595162
FLOPs: 74.14 MMac
Params: 23.52 M
Accuracy Top-1 : 87.94%
Accuracy Top-5 : 99.78%


In [15]:
# ResNet-101
modelNet = ResNet(BottleNeck, [3, 4, 23, 3], num_classes=10)
optimizer = torch.optim.Adam(modelNet.parameters(), lr=learning_rate)
modelNet = modelNet.to(device)
Train(modelNet, train_loader)

flops, params = get_model_complexity_info(modelNet, (1, 28, 28),print_per_layer_stat=False)
print(f"FLOPs: {flops}")
print(f"Params: {params}")

top_1, top_5 = Test(modelNet, test_loader)
print("Accuracy Top-1 : {:.2f}%".format(top_1))
print("Accuracy Top-5 : {:.2f}%".format(top_5))

[Epoch:    1] loss = 0.639609516
[Epoch:    2] loss = 0.425476164
[Epoch:    3] loss = 0.418670535
[Epoch:    4] loss = 0.365265638
[Epoch:    5] loss = 0.335841924
[Epoch:    6] loss = 0.339536011
[Epoch:    7] loss = 0.32011947
[Epoch:    8] loss = 0.311674684
[Epoch:    9] loss = 0.255258232
[Epoch:   10] loss = 0.238968685
FLOPs: 150.1 MMac
Params: 42.51 M
Accuracy Top-1 : 88.71%
Accuracy Top-5 : 99.77%


In [18]:
# ResNeXt-50
modelNet = ResNeXt(BottleNeck2, [3, 4, 6, 3])
optimizer = torch.optim.Adam(modelNet.parameters(), lr=learning_rate)
modelNet = modelNet.to(device)
Train(modelNet, train_loader)

flops, params = get_model_complexity_info(modelNet, (1, 28, 28),print_per_layer_stat=False)
print(f"FLOPs: {flops}")
print(f"Params: {params}")

top_1, top_5 = Test(modelNet, test_loader)
print("Accuracy Top-1 : {:.2f}%".format(top_1))
print("Accuracy Top-5 : {:.2f}%".format(top_5))

[Epoch:    1] loss = 0.526321411
[Epoch:    2] loss = 0.469188482
[Epoch:    3] loss = 0.364597559
[Epoch:    4] loss = 0.318433136
[Epoch:    5] loss = 0.262901515
[Epoch:    6] loss = 0.24865073
[Epoch:    7] loss = 0.275252879
[Epoch:    8] loss = 0.251390129
[Epoch:    9] loss = 0.230906859
[Epoch:   10] loss = 0.222429499
FLOPs: 81.06 MMac
Params: 22.99 M
Accuracy Top-1 : 88.02%
Accuracy Top-5 : 99.85%


In [17]:
# ResNeXt-101
modelNet = ResNeXt(BottleNeck2, [3, 4, 23, 3])
optimizer = torch.optim.Adam(modelNet.parameters(), lr=learning_rate)
modelNet = modelNet.to(device)
Train(modelNet, train_loader)

flops, params = get_model_complexity_info(modelNet, (1, 28, 28),print_per_layer_stat=False)
print(f"FLOPs: {flops}")
print(f"Params: {params}")

top_1, top_5 = Test(modelNet, test_loader)
print("Accuracy Top-1 : {:.2f}%".format(top_1))
print("Accuracy Top-5 : {:.2f}%".format(top_5))

[Epoch:    1] loss = 0.544577003
[Epoch:    2] loss = 0.346579909
[Epoch:    3] loss = 0.606976986
[Epoch:    4] loss = 0.393099189
[Epoch:    5] loss = 0.324199021
[Epoch:    6] loss = 0.531326652
[Epoch:    7] loss = 0.361041188
[Epoch:    8] loss = 0.304178119
[Epoch:    9] loss = 0.331313461
[Epoch:   10] loss = 0.282746792
FLOPs: 157.66 MMac
Params: 42.14 M
Accuracy Top-1 : 89.61%
Accuracy Top-5 : 99.88%


In [11]:
import torch
torch.cuda.empty_cache()