In [13]:
# import

import numpy as np
from datetime import datetime
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision import datasets
import matplotlib.pyplot as plt
import torchinfo

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
from pytz import timezone

In [3]:
# 하이퍼파라미터 설정
RANDOM_SEED = 4242
LEARNING_RATE = 0.01
BATCH_SIZE = 32
EPOCHS = 90
IMG_SIZE = 227
NUM_CLASSES = 1000

# 규제화 파라미터 설정
DROPOUT = 0.5
LRN_K = 2.0
LRN_ALPHA = 0.0001
LRN_BETA = 0.75
LRN_N = 5

In [5]:
# AlexNet 논문과 최대한 유사하게 구현
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        self.lrn = nn.LocalResponseNorm(LRN_N, alpha=LRN_ALPHA, beta=LRN_BETA, k=LRN_K)
        self.dropout = nn.Dropout(DROPOUT)

        self.conv1_a = nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=0)
        self.conv1_b = nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=0)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)

        self.conv2_a = nn.Conv2d(48, 128, kernel_size=5, stride=1, padding=2)
        self.conv2_b = nn.Conv2d(48, 128, kernel_size=5, stride=1, padding=2)
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)

        self.conv3_a = nn.Conv2d(128 * 2, 192, kernel_size=3, stride=1, padding=1)
        self.conv3_b = nn.Conv2d(128 * 2, 192, kernel_size=3, stride=1, padding=1)

        self.conv4_a = nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1)
        self.conv4_b = nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1)

        self.conv5_a = nn.Conv2d(192, 128, kernel_size=3, stride=1, padding=1)
        self.conv5_b = nn.Conv2d(192, 128, kernel_size=3, stride=1, padding=1)
        self.pool5 = nn.MaxPool2d(kernel_size=3, stride=2)

        self.fc6_a = nn.Linear(128 * 6 * 6 * 2, 2048)
        self.fc6_b = nn.Linear(128 * 6 * 6 * 2, 2048)

        self.fc7_a = nn.Linear(2048 * 2, 2048)
        self.fc7_b = nn.Linear(2048 * 2, 2048)

        self.fc8 = nn.Linear(2048 * 2, NUM_CLASSES)

    def forward(self, x):
        # Conv 1
        x_a = self.lrn(F.relu(self.conv1_a(x)))
        x_b = self.lrn(F.relu(self.conv1_b(x)))
        x_a = self.pool1(x_a)
        x_b = self.pool1(x_b)

        # Conv 2
        x_a = self.lrn(F.relu(self.conv2_a(x_a)))
        x_b = self.lrn(F.relu(self.conv2_b(x_b)))
        x_a = self.pool2(x_a)
        x_b = self.pool2(x_b)

        # Conv 3, GPU 데이터 합치고 각각 연산 수행
        x = torch.cat((x_a, x_b), dim=1)
        x_a = F.relu(self.conv3_a(x))
        x_b = F.relu(self.conv3_b(x))

        # Conv 4
        x_a = F.relu(self.conv4_a(x_a))
        x_b = F.relu(self.conv4_b(x_b))

        # Conv 5
        x_a = F.relu(self.conv5_a(x_a))
        x_b = F.relu(self.conv5_b(x_b))
        x_a = self.pool5(x_a)
        x_b = self.pool5(x_b)

        # FC 6, GPU 데이터 합치고 각각 연산 수행
        x = torch.cat((x_a, x_b), dim=1)
        x = x.view(x.size(0), -1)
        x_a = self.dropout(F.relu(self.fc6_a(x)))
        x_b = self.dropout(F.relu(self.fc6_b(x)))

        # FC 7, GPU 데이터 합치고 각각 연산 수행
        x = torch.cat((x_a, x_b), dim=1)
        x_a = self.dropout(F.relu(self.fc7_a(x)))
        x_b = self.dropout(F.relu(self.fc7_b(x)))

        # FC 8, GPU 데이터 합쳐서 최종 연산
        x = torch.cat((x_a, x_b), dim=1)
        logits = self.fc8(x)

        return logits


torchinfo.summary(
    AlexNet(),
    input_size=(1, 3, IMG_SIZE, IMG_SIZE),
    col_names=["input_size", "output_size", "num_params", "kernel_size"],
    row_settings=["depth", "var_names"],
)

Layer (type (var_name):depth-idx)        Input Shape               Output Shape              Param #                   Kernel Shape
AlexNet (AlexNet)                        [1, 3, 227, 227]          [1, 1000]                 --                        --
├─Conv2d (conv1_a): 1-1                  [1, 3, 227, 227]          [1, 48, 55, 55]           17,472                    [11, 11]
├─LocalResponseNorm (lrn): 1-2           [1, 48, 55, 55]           [1, 48, 55, 55]           --                        --
├─Conv2d (conv1_b): 1-3                  [1, 3, 227, 227]          [1, 48, 55, 55]           17,472                    [11, 11]
├─LocalResponseNorm (lrn): 1-4           [1, 48, 55, 55]           [1, 48, 55, 55]           --                        --
├─MaxPool2d (pool1): 1-5                 [1, 48, 55, 55]           [1, 48, 27, 27]           --                        3
├─MaxPool2d (pool1): 1-6                 [1, 48, 55, 55]           [1, 48, 27, 27]           --                        3
├─Co

In [None]:
# # AlexNet 2개 GPU로 나누는 연습
# class AlexNet(nn.Module):
#     def __init__(self):
#         super(AlexNet, self).__init__()
        
#         self.conv1_a = nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=0).to('cuda:0')
#         self.conv1_b = nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=0).to('cuda:1')
#         self.pool1_a = nn.MaxPool2d(kernel_size=4).to('cuda:0')
#         self.pool1_b = nn.MaxPool2d(kernel_size=4).to('cuda:1')
        
#         self.fc2 = nn.Linear(16224, NUM_CLASSES).to('cuda:0')

#     def forward(self, x):

#         x_a = x.to('cuda:0')
#         x_b = x.to('cuda:1')
#         x_a = F.relu(self.conv1_a(x_a))
#         x_b = F.relu(self.conv1_b(x_b))
#         x_a = self.pool1_a(x_a)
#         x_b = self.pool1_b(x_b)
        
#         print("2nd ", x_a.device)
#         print("2nd ", x_b.device)
        
#         x_b = x_b.to('cuda:0')

#         x = torch.cat((x_a, x_b), dim=1)
#         x = x.view(x.size(0), -1)
#         logits = self.fc2(x)

#         return logits

In [None]:
# AlexNet 실제로 2개의 GPU가 지원되는 경우에 2개의 GPU를 사용하는 방식으로 구현
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        self.lrn = nn.LocalResponseNorm(LRN_N, alpha=LRN_ALPHA, beta=LRN_BETA, k=LRN_K)
        self.dropout_a = nn.Dropout(DROPOUT).to('cuda:0')
        self.dropout_b = nn.Dropout(DROPOUT).to('cuda:1')
        self.pool_a = nn.MaxPool2d(kernel_size=3, stride=2).to('cuda:0')
        self.pool_b = nn.MaxPool2d(kernel_size=3, stride=2).to('cuda:1')

        # Conv 1
        self.conv1_a = nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=0).to('cuda:0')
        self.conv1_b = nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=0).to('cuda:1')

        # Conv 2
        self.conv2_a = nn.Conv2d(48, 128, kernel_size=5, stride=1, padding=2).to('cuda:0')
        self.conv2_b = nn.Conv2d(48, 128, kernel_size=5, stride=1, padding=2).to('cuda:1')

        # Conv 3
        self.conv3_a = nn.Conv2d(128 * 2, 192, kernel_size=3, stride=1, padding=1).to('cuda:0')
        self.conv3_b = nn.Conv2d(128 * 2, 192, kernel_size=3, stride=1, padding=1).to('cuda:1')

        # Conv 4
        self.conv4_a = nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1).to('cuda:0')
        self.conv4_b = nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1).to('cuda:1')

        # Conv 5
        self.conv5_a = nn.Conv2d(192, 128, kernel_size=3, stride=1, padding=1).to('cuda:0')
        self.conv5_b = nn.Conv2d(192, 128, kernel_size=3, stride=1, padding=1).to('cuda:1')

        # FC 6
        self.fc6_a = nn.Linear(128 * 6 * 6 * 2, 2048).to('cuda:0')
        self.fc6_b = nn.Linear(128 * 6 * 6 * 2, 2048).to('cuda:1')

        # FC 7
        self.fc7_a = nn.Linear(2048 * 2, 2048).to('cuda:0')
        self.fc7_b = nn.Linear(2048 * 2, 2048).to('cuda:1')

        # FC 8
        self.fc8 = nn.Linear(2048 * 2, NUM_CLASSES).to('cuda:0')

    def forward(self, x):
        x_a = x.to('cuda:0')
        x_b = x.to('cuda:1')

        # Conv 1
        x_a = self.lrn(F.relu(self.conv1_a(x_a)))
        x_b = self.lrn(F.relu(self.conv1_b(x_b)))
        x_a = self.pool_a(x_a)
        x_b = self.pool_b(x_b)
        print("Conv1 ", x_a.device)
        print("Conv1 ", x_b.device)

        # Conv 2
        x_a = self.lrn(F.relu(self.conv2_a(x_a)))
        x_b = self.lrn(F.relu(self.conv2_b(x_b)))
        x_a = self.pool_a(x_a)
        x_b = self.pool_b(x_b)
        print("Conv2 ", x_a.device)
        print("Conv2 ", x_b.device)

        # Conv 3, GPU 데이터 합치고 각각 연산 수행
        x_b_ = x_b.to('cuda:0')
        x_a_ = x_a.to('cuda:1')
        x_a = torch.cat((x_a, x_b_), dim=1)
        x_b = torch.cat((x_a_, x_b), dim=1)
        x_a = F.relu(self.conv3_a(x_a))
        x_b = F.relu(self.conv3_b(x_b))
        print("Conv3 ", x_a.device)
        print("Conv3 ", x_b.device)

        # Conv 4
        x_a = F.relu(self.conv4_a(x_a))
        x_b = F.relu(self.conv4_b(x_b))
        print("Conv4 ", x_a.device)
        print("Conv4 ", x_b.device)

        # Conv 5
        x_a = F.relu(self.conv5_a(x_a))
        x_b = F.relu(self.conv5_b(x_b))
        x_a = self.pool_a(x_a)
        x_b = self.pool_b(x_b)
        print("Conv5 ", x_a.device)
        print("Conv5 ", x_b.device)

        # FC 6, GPU 데이터 합치고 각각 연산 수행
        x_a = x_a.view(x_a.size(0), -1)
        x_b = x_b.view(x_b.size(0), -1)
        x_b_ = x_b.to('cuda:0')
        x_a_ = x_a.to('cuda:1')
        x_a = torch.cat((x_a, x_b_), dim=1)
        x_b = torch.cat((x_a_, x_b), dim=1)
        x_a = self.dropout_a(F.relu(self.fc6_a(x_a)))
        x_b = self.dropout_b(F.relu(self.fc6_b(x_b)))
        print("FC6 ", x_a.device)
        print("FC6 ", x_b.device)

        # FC 7, GPU 데이터 합치고 각각 연산 수행
        x_b_ = x_b.to('cuda:0')
        x_a_ = x_a.to('cuda:1')
        x_a = torch.cat((x_a, x_b_), dim=1)
        x_b = torch.cat((x_a_, x_b), dim=1)
        x_a = self.dropout_a(F.relu(self.fc7_a(x_a)))
        x_b = self.dropout_b(F.relu(self.fc7_b(x_b)))
        print("FC7 ", x_a.device)
        print("FC7 ", x_b.device)

        # FC 8, GPU 데이터 합쳐서 최종 연산
        x_b_ = x_b.to('cuda:0')
        x = torch.cat((x_a, x_b_), dim=1)
        logits = self.fc8(x)
        print("FC8 ", x.device)

        return logits