# Paper
- ImageNet Classification with Deep Convolutional Neural Networks(NIPS 2012)
- 논문 리뷰 : https://sonstory.tistory.com/41

In [1]:
import torch

import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torchvision
import pandas as pd
import time

from torch.cuda import is_available
from tqdm import tqdm
from torchvision.datasets import CIFAR10
from torchsummary import torchsummary
from torch.utils.data import DataLoader, random_split, Dataset

%matplotlib inline

In [2]:
device='cuda' if is_available() else 'cpu'
print(device)

cuda


# Dataset Load

In [3]:
# Data Load
data_transformer = transforms.Compose([transforms.ToTensor()])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=data_transformer)
test_data = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=data_transformer)

Files already downloaded and verified
Files already downloaded and verified


# Calculate Means and Stds

In [4]:
# 데이터 정규화를 위한 평균, 표준편차 값
meanRGB = [np.mean(x.numpy(), axis=(1,2)) for x, _ in trainset]
stdRGB = [np.std(x.numpy(), axis=(1,2)) for x, _ in trainset]

meanR = np.mean([m[0] for m in meanRGB])
meanG = np.mean([m[1] for m in meanRGB])
meanB = np.mean([m[2] for m in meanRGB])

stdR = np.mean([s[0] for s in stdRGB])
stdG = np.mean([s[1] for s in stdRGB])
stdB = np.mean([s[2] for s in stdRGB])

print(meanR, meanG, meanB)
print(stdR, stdG, stdB)

0.49139965 0.48215845 0.4465309
0.20220213 0.19931543 0.20086348


# Image Transformation

In [11]:
train_transformer = transforms.Compose([
                transforms.Resize(256),
                transforms.RandomResizedCrop(227),
                transforms.RandomHorizontalFlip(), # default=0.5
                transforms.ToTensor(),
                transforms.Normalize([meanR, meanG, meanB], [stdR, stdG, stdB]) # 정규화
])

test_transformer = transforms.Compose([
                transforms.Resize(256),
                transforms.RandomResizedCrop(227),
                transforms.ToTensor(),
                transforms.Normalize([meanR, meanG, meanB], [stdR, stdG, stdB])
])

논문에선 tranformation 이후 이미지의 크기를 224로 지정하지만 이는 오타이며, 227이 맞다

# Train and Valid Split
https://github.com/JJuOn/pytorch-implementation/blob/main/2.AlexNet.ipynb

In [6]:
class CustomSubset(Dataset):
    def __init__(self,Subset,transform=None):
        super(CustomSubset,self).__init__()
        self.Subset=Subset
        self.indices=Subset.indices
        self.transform=transform

    def __len__(self):
        return len(self.Subset)

    def __getitem__(self,idx):
        img,label=self.Subset[idx]
        if self.transform is not None:
            img=self.transform(img)
        return img,label

In [12]:
# train, valid dataset
train_data_len = int(len(trainset)*0.8)
valid_data_len = len(trainset) - train_data_len
train_data, valid_data = random_split(trainset, [train_data_len, valid_data_len])

In [13]:
print(len(train_data), len(valid_data), len(test_data))

40000 10000 10000


In [14]:
trainset.transform=None
train_data = CustomSubset(train_data, train_transformer)
valid_data = CustomSubset(valid_data, test_transformer)

test_data.transform = test_transformer

In [15]:
print(train_data[0][0].size(),valid_data[0][0].size(),test_data[0][0].size())

torch.Size([3, 227, 227]) torch.Size([3, 227, 227]) torch.Size([3, 227, 227])


train, valid, test 데이터셋 모두 transform이 잘 적용된 모습

# DataLoader

In [16]:
batch_size = 32

train_dl=DataLoader(train_data,batch_size=batch_size,shuffle=True)
val_dl=DataLoader(valid_data,batch_size=batch_size,shuffle=False)
test_dl=DataLoader(test_data,batch_size=batch_size,shuffle=False)

In [17]:
for x,y in train_dl:
    print(x.shape)
    print(y.shape)
    break

for x,y in val_dl:
    print(x.shape)
    print(y.shape)
    break
    
for x,y in test_dl:
    print(x.shape)
    print(y.shape)
    break

torch.Size([32, 3, 227, 227])
torch.Size([32])
torch.Size([32, 3, 227, 227])
torch.Size([32])
torch.Size([32, 3, 227, 227])
torch.Size([32])


# Define a Model

In [18]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()# Image input_size=(3, 227, 227)
        # Convolutional layer
        self.net = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=96, kernel_size=(11, 11), stride=4, padding=0), 
            nn.ReLU(), 
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=(5, 5), stride=1, padding=2),
            nn.ReLU(),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2), 
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=(3, 3), stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=(3, 3), stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=(3, 3), stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2), 
        )
        # FC Layer
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(in_features=256*6*6, out_features=4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Linear(in_features=4096, out_features=num_classes),
        )
    
    def init_weight(self):
        for layer in self.net:
            if isinstance(layer, nn.Conv2d):
                nn.init.normal_(layer.weight, mean=0, std=0.01)
                nn.init.constant_(layer.bias, 0)
        # conv 2,4,5, fc layer -> bias 1
        nn.init.constant_(self.net[4].bias, 1)
        nn.init.constant_(self.net[10].bias, 1)
        nn.init.constant_(self.net[12].bias, 1)
        nn.init.constant_(self.classifier[1].bias, 1)
        nn.init.constant_(self.classifier[4].bias, 1)
        nn.init.constant_(self.classifier[6].bias, 1)

    def forward(self,x):
        x = self.net(x)
        x = x.view(-1, 256 * 6* 6)
        x = self.classifier(x)
        return x

In [19]:
model = AlexNet(num_classes=10).to(device)

torchsummary.summary(model, input_size=(3,227,227), device='cuda')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 96, 55, 55]          34,944
              ReLU-2           [-1, 96, 55, 55]               0
 LocalResponseNorm-3           [-1, 96, 55, 55]               0
         MaxPool2d-4           [-1, 96, 27, 27]               0
            Conv2d-5          [-1, 256, 27, 27]         614,656
              ReLU-6          [-1, 256, 27, 27]               0
 LocalResponseNorm-7          [-1, 256, 27, 27]               0
         MaxPool2d-8          [-1, 256, 13, 13]               0
            Conv2d-9          [-1, 384, 13, 13]         885,120
             ReLU-10          [-1, 384, 13, 13]               0
           Conv2d-11          [-1, 384, 13, 13]       1,327,488
             ReLU-12          [-1, 384, 13, 13]               0
           Conv2d-13          [-1, 256, 13, 13]         884,992
             ReLU-14          [-1, 256,

논문에선 class의 개수가 1000개였지만 CIFAR10의 class 개수는 10이므로 10으로 num_classes 설정

In [20]:
print(model)

AlexNet(
  (net): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=2)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (5): ReLU()
    (6): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=2)
    (7): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): Re

In [21]:
# 가중치 초기화 확인
for p in model.parameters():
    print(p)
    break

Parameter containing:
tensor([[[[ 4.2146e-02,  4.9867e-02,  8.8425e-03,  ..., -3.5595e-02,
           -4.3241e-02, -5.2277e-02],
          [ 5.4961e-03,  2.0416e-02, -2.7046e-02,  ..., -4.0930e-02,
           -1.7460e-02,  4.0744e-02],
          [-1.7974e-02, -7.7522e-03,  3.3285e-02,  ...,  4.4507e-04,
            4.6305e-02,  1.8806e-02],
          ...,
          [-3.2933e-02, -3.8534e-02,  3.5855e-02,  ...,  4.4763e-02,
            4.2090e-02,  9.6921e-03],
          [ 4.4572e-02, -9.0005e-03,  2.4313e-02,  ..., -3.1259e-02,
            2.0522e-03,  2.9743e-02],
          [-1.0890e-03,  2.6189e-02, -3.5394e-02,  ..., -7.9948e-03,
            3.5168e-02, -1.3899e-02]],

         [[-1.7538e-02, -1.5827e-02,  1.1224e-03,  ...,  1.4050e-02,
           -3.7949e-02, -3.6977e-02],
          [ 2.4076e-02,  4.4211e-02,  4.9671e-02,  ..., -2.1470e-02,
            2.8966e-02, -2.6277e-03],
          [-3.1357e-02, -3.7113e-02, -4.1873e-03,  ..., -3.9697e-02,
            2.4054e-02,  4.1462e-02]

# Optimizer & Loss function

In [29]:
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0005)
criterion = nn.CrossEntropyLoss()

# Train and Validation

In [30]:
def model_train(model, data_loader, criterion, optimizer, device):
    model.train()
    
    running_size = 0
    running_loss = 0
    corr = 0
    
    prograss_bar = tqdm(data_loader)
    
    for batch_idx, (img, lbl) in enumerate(prograss_bar, start=1):
        img, lbl = img.to(device), lbl.to(device)
        optimizer.zero_grad()
        output = model(img)
        loss = criterion(output, lbl)
        loss.backward()
        optimizer.step()
        _, pred = output.max(dim=1)
        corr += pred.eq(lbl).sum().item()
        running_loss += loss.item() * img.size(0)
        running_size += img.size(0)
        prograss_bar.set_description(f'[Training] loss: {running_loss / running_size:.4f}, accuracy: {corr / running_size:.4f}')
        
    acc = corr / len(data_loader.dataset)
    
    return running_loss / len(data_loader.dataset), acc

In [31]:
def model_evaluate(model, data_loader, criterion, device):
    model.eval()
    
    with torch.no_grad():
        corr = 0
        running_loss = 0
        
        for img, lbl in data_loader:
            img, lbl = img.to(device), lbl.to(device)
            output = model(img)
            _, pred = output.max(dim=1)
            corr += torch.sum(pred.eq(lbl)).item()
            running_loss += criterion(output, lbl).item() * img.size(0)
            
        acc = corr / len(data_loader.dataset)
  
        return running_loss / len(data_loader.dataset), acc

In [32]:
min_loss = np.inf

for epoch in range(30):
    # Model Training
    train_loss, train_acc = model_train(model, train_dl, criterion, optimizer, device)
    val_loss, val_acc = model_evaluate(model, val_dl, criterion, device)   
    
    if val_loss < min_loss:
        print(f'[INFO] val_loss has been improved from {min_loss:.5f} to {val_loss:.5f}. Saving Model!')
        min_loss = val_loss
        torch.save(model.state_dict(), './model/AlexNet.pth')

    print(f'epoch {epoch+1:02d}, loss: {train_loss:.5f}, acc: {train_acc:.5f}, val_loss: {val_loss:.5f}, val_accuracy: {val_acc:.5f}')

[Training] loss: 2.3024, accuracy: 0.1017: 100%|███████████████████████████████████| 1250/1250 [01:13<00:00, 16.94it/s]


[INFO] val_loss has been improved from inf to 2.30149. Saving Model!
epoch 01, loss: 2.30238, acc: 0.10172, val_loss: 2.30149, val_accuracy: 0.10380


[Training] loss: 2.2633, accuracy: 0.1374: 100%|███████████████████████████████████| 1250/1250 [02:21<00:00,  8.81it/s]


[INFO] val_loss has been improved from 2.30149 to 2.13862. Saving Model!
epoch 02, loss: 2.26331, acc: 0.13737, val_loss: 2.13862, val_accuracy: 0.20770


[Training] loss: 2.0508, accuracy: 0.2227: 100%|███████████████████████████████████| 1250/1250 [03:58<00:00,  5.24it/s]


[INFO] val_loss has been improved from 2.13862 to 1.98977. Saving Model!
epoch 03, loss: 2.05082, acc: 0.22270, val_loss: 1.98977, val_accuracy: 0.24130


[Training] loss: 1.8975, accuracy: 0.2819: 100%|███████████████████████████████████| 1250/1250 [03:33<00:00,  5.85it/s]


[INFO] val_loss has been improved from 1.98977 to 1.82526. Saving Model!
epoch 04, loss: 1.89755, acc: 0.28190, val_loss: 1.82526, val_accuracy: 0.32030


[Training] loss: 1.8023, accuracy: 0.3210: 100%|███████████████████████████████████| 1250/1250 [03:25<00:00,  6.08it/s]


[INFO] val_loss has been improved from 1.82526 to 1.75214. Saving Model!
epoch 05, loss: 1.80229, acc: 0.32095, val_loss: 1.75214, val_accuracy: 0.34990


[Training] loss: 1.7449, accuracy: 0.3462: 100%|███████████████████████████████████| 1250/1250 [02:05<00:00,  9.95it/s]


[INFO] val_loss has been improved from 1.75214 to 1.66182. Saving Model!
epoch 06, loss: 1.74488, acc: 0.34623, val_loss: 1.66182, val_accuracy: 0.38560


[Training] loss: 1.6862, accuracy: 0.3700: 100%|███████████████████████████████████| 1250/1250 [02:00<00:00, 10.34it/s]


[INFO] val_loss has been improved from 1.66182 to 1.65720. Saving Model!
epoch 07, loss: 1.68622, acc: 0.37005, val_loss: 1.65720, val_accuracy: 0.38330


[Training] loss: 1.6456, accuracy: 0.3874: 100%|███████████████████████████████████| 1250/1250 [02:06<00:00,  9.92it/s]


[INFO] val_loss has been improved from 1.65720 to 1.62408. Saving Model!
epoch 08, loss: 1.64563, acc: 0.38740, val_loss: 1.62408, val_accuracy: 0.40160


[Training] loss: 1.6103, accuracy: 0.4002: 100%|███████████████████████████████████| 1250/1250 [02:02<00:00, 10.23it/s]


[INFO] val_loss has been improved from 1.62408 to 1.55039. Saving Model!
epoch 09, loss: 1.61027, acc: 0.40018, val_loss: 1.55039, val_accuracy: 0.43710


[Training] loss: 1.5676, accuracy: 0.4216: 100%|███████████████████████████████████| 1250/1250 [02:01<00:00, 10.29it/s]


epoch 10, loss: 1.56764, acc: 0.42163, val_loss: 1.68114, val_accuracy: 0.40620


[Training] loss: 1.5186, accuracy: 0.4411: 100%|███████████████████████████████████| 1250/1250 [02:00<00:00, 10.40it/s]


[INFO] val_loss has been improved from 1.55039 to 1.48384. Saving Model!
epoch 11, loss: 1.51857, acc: 0.44110, val_loss: 1.48384, val_accuracy: 0.45660


[Training] loss: 1.4714, accuracy: 0.4613: 100%|███████████████████████████████████| 1250/1250 [02:09<00:00,  9.63it/s]


[INFO] val_loss has been improved from 1.48384 to 1.45250. Saving Model!
epoch 12, loss: 1.47142, acc: 0.46130, val_loss: 1.45250, val_accuracy: 0.47360


[Training] loss: 1.4365, accuracy: 0.4795: 100%|███████████████████████████████████| 1250/1250 [02:01<00:00, 10.26it/s]


[INFO] val_loss has been improved from 1.45250 to 1.43068. Saving Model!
epoch 13, loss: 1.43652, acc: 0.47947, val_loss: 1.43068, val_accuracy: 0.48420


[Training] loss: 1.3986, accuracy: 0.4912: 100%|███████████████████████████████████| 1250/1250 [02:03<00:00, 10.15it/s]


[INFO] val_loss has been improved from 1.43068 to 1.33891. Saving Model!
epoch 14, loss: 1.39864, acc: 0.49120, val_loss: 1.33891, val_accuracy: 0.51530


[Training] loss: 1.3664, accuracy: 0.5057: 100%|███████████████████████████████████| 1250/1250 [01:57<00:00, 10.64it/s]


[INFO] val_loss has been improved from 1.33891 to 1.31911. Saving Model!
epoch 15, loss: 1.36641, acc: 0.50567, val_loss: 1.31911, val_accuracy: 0.53080


[Training] loss: 1.3218, accuracy: 0.5253: 100%|███████████████████████████████████| 1250/1250 [02:00<00:00, 10.39it/s]


[INFO] val_loss has been improved from 1.31911 to 1.26387. Saving Model!
epoch 16, loss: 1.32182, acc: 0.52533, val_loss: 1.26387, val_accuracy: 0.54400


[Training] loss: 1.2856, accuracy: 0.5383: 100%|███████████████████████████████████| 1250/1250 [02:06<00:00,  9.89it/s]


epoch 17, loss: 1.28565, acc: 0.53830, val_loss: 1.26633, val_accuracy: 0.54230


[Training] loss: 1.2491, accuracy: 0.5523: 100%|███████████████████████████████████| 1250/1250 [01:59<00:00, 10.49it/s]


[INFO] val_loss has been improved from 1.26387 to 1.21660. Saving Model!
epoch 18, loss: 1.24910, acc: 0.55230, val_loss: 1.21660, val_accuracy: 0.56530


[Training] loss: 1.2228, accuracy: 0.5609: 100%|███████████████████████████████████| 1250/1250 [01:57<00:00, 10.66it/s]


[INFO] val_loss has been improved from 1.21660 to 1.14967. Saving Model!
epoch 19, loss: 1.22277, acc: 0.56088, val_loss: 1.14967, val_accuracy: 0.58720


[Training] loss: 1.1815, accuracy: 0.5777: 100%|███████████████████████████████████| 1250/1250 [01:59<00:00, 10.46it/s]


epoch 20, loss: 1.18152, acc: 0.57768, val_loss: 1.18706, val_accuracy: 0.57590


[Training] loss: 1.1631, accuracy: 0.5854: 100%|███████████████████████████████████| 1250/1250 [02:06<00:00,  9.89it/s]


[INFO] val_loss has been improved from 1.14967 to 1.11022. Saving Model!
epoch 21, loss: 1.16315, acc: 0.58537, val_loss: 1.11022, val_accuracy: 0.59350


[Training] loss: 1.1222, accuracy: 0.6001: 100%|███████████████████████████████████| 1250/1250 [02:05<00:00,  9.95it/s]


[INFO] val_loss has been improved from 1.11022 to 1.09565. Saving Model!
epoch 22, loss: 1.12221, acc: 0.60010, val_loss: 1.09565, val_accuracy: 0.60790


[Training] loss: 1.1091, accuracy: 0.6053: 100%|███████████████████████████████████| 1250/1250 [02:15<00:00,  9.20it/s]


[INFO] val_loss has been improved from 1.09565 to 1.08279. Saving Model!
epoch 23, loss: 1.10909, acc: 0.60530, val_loss: 1.08279, val_accuracy: 0.61510


[Training] loss: 1.0764, accuracy: 0.6149: 100%|███████████████████████████████████| 1250/1250 [02:40<00:00,  7.78it/s]


[INFO] val_loss has been improved from 1.08279 to 1.06557. Saving Model!
epoch 24, loss: 1.07639, acc: 0.61493, val_loss: 1.06557, val_accuracy: 0.61980


[Training] loss: 1.0602, accuracy: 0.6229: 100%|███████████████████████████████████| 1250/1250 [02:09<00:00,  9.68it/s]


epoch 25, loss: 1.06023, acc: 0.62285, val_loss: 1.12915, val_accuracy: 0.59790


[Training] loss: 1.0467, accuracy: 0.6303: 100%|███████████████████████████████████| 1250/1250 [02:21<00:00,  8.81it/s]


[INFO] val_loss has been improved from 1.06557 to 1.02332. Saving Model!
epoch 26, loss: 1.04671, acc: 0.63033, val_loss: 1.02332, val_accuracy: 0.63510


[Training] loss: 1.0196, accuracy: 0.6399: 100%|███████████████████████████████████| 1250/1250 [01:58<00:00, 10.51it/s]


[INFO] val_loss has been improved from 1.02332 to 0.98454. Saving Model!
epoch 27, loss: 1.01963, acc: 0.63990, val_loss: 0.98454, val_accuracy: 0.65470


[Training] loss: 1.0045, accuracy: 0.6425: 100%|███████████████████████████████████| 1250/1250 [01:53<00:00, 10.99it/s]


[INFO] val_loss has been improved from 0.98454 to 0.97538. Saving Model!
epoch 28, loss: 1.00446, acc: 0.64250, val_loss: 0.97538, val_accuracy: 0.65360


[Training] loss: 0.9816, accuracy: 0.6530: 100%|███████████████████████████████████| 1250/1250 [02:23<00:00,  8.68it/s]


[INFO] val_loss has been improved from 0.97538 to 0.96035. Saving Model!
epoch 29, loss: 0.98159, acc: 0.65297, val_loss: 0.96035, val_accuracy: 0.65700


[Training] loss: 0.9575, accuracy: 0.6631: 100%|███████████████████████████████████| 1250/1250 [02:28<00:00,  8.43it/s]


epoch 30, loss: 0.95749, acc: 0.66310, val_loss: 0.97508, val_accuracy: 0.65380


# Test

In [33]:
model.load_state_dict(torch.load('./model/AlexNet.pth'))
model = model.to(device)

In [34]:
final_loss, final_acc = model_evaluate(model, test_dl, criterion, device)

print(f'evaluation loss: {final_loss:.5f}, evaluation accuracy: {final_acc:.5f}')

evaluation loss: 0.97061, evaluation accuracy: 0.65230


##### Reference
- https://teddylee777.github.io/pytorch/alexnet-implementation/
- https://github.com/JJuOn/pytorch-implementation/blob/main/2.AlexNet.ipynb
- https://deep-learning-study.tistory.com/518