In [1]:
%matplotlib inline

import torch
from torch.nn import *

import torchvision
from torchvision.transforms import *

import numpy as np
import matplotlib.pyplot as plt

In [45]:
from torchsummary import summary

class YoloLayer(Module):
    def __init__(self, layer_params, pool=True):
        super().__init__()
        layers = []
        for in_channels, out_channels, kernel_size, stride, activation in layer_params:
            layers.append(
                YoloConv(in_channels=in_channels, out_channels=out_channels,
                         kernel_size=kernel_size, stride=stride, activation=activation)
            )
        
        self.in_layers = Sequential(*layers)
        if pool:
            self.pool = MaxPool2d(kernel_size=(2, 2), stride=2)
        else:
            self.pool = Identity()
        
    def forward(self, x):
        x = self.in_layers(x)
        x = self.pool(x)
        return x

class YoloConv(Module):
    def __init__(self, in_channels, out_channels, kernel_size=(3, 3), stride=1, activation='relu'):
        super().__init__()
        self.conv = Conv2d(kernel_size=kernel_size, in_channels=in_channels, out_channels=out_channels,
                          stride=stride, padding=kernel_size[0] // 2)
        if activation.lower() == 'relu':
            self.activation = LeakyReLU(negative_slope=0.1, inplace=True)
        else:
            self.activation = Identity
            
    def forward(self, x):
        x = self.conv(x)
        self.activation(x)  # inplace
        return x
    
class YoloFinalLayer(Module):
    def __init__(self, activation='relu'):
        super().__init__()
        self.classifier0 = Linear(in_features=50176, out_features=4096)
        if activation.lower() == 'relu':
            self.act = LeakyReLU(negative_slope=0.1, inplace=True)
        else:
            self.act = Identity()
        self.classifier1 = Linear(in_features=4096, out_features=1470)
        
    def forward(self, x):
        x = x.view(-1, 50176)
        x = self.classifier0(x)
        self.act(x)
        x = self.classifier1(x)
        x = x.view(-1, 30, 7, 7)
        return x
    
class YoloFeatureExtractor(Module):
    def __init__(self, activation='relu'):
        super().__init__()
        layers = [
            YoloLayer([
                # in, out, kernel, stride, apoolct
                (3, 64, (7, 7), 2, activation),
            ]),
            YoloLayer([
                (64, 192, (3, 3), 1, activation),
            ]),
            YoloLayer([
                (192, 128, (1, 1), 1, activation),
                (128, 256, (3, 3), 1, activation),
                (256, 256, (1, 1), 1, activation),
                (256, 512, (3, 3), 1, activation),
            ]),
            YoloLayer([
                (512, 256, (1, 1), 1, activation),
                (256, 512, (3, 3), 1, activation),
                (512, 256, (1, 1), 1, activation),
                (256, 512, (3, 3), 1, activation),
                (512, 256, (1, 1), 1, activation),
                (256, 512, (3, 3), 1, activation),
                (512, 256, (1, 1), 1, activation),
                (256, 512, (3, 3), 1, activation),
                (512, 512, (1, 1), 1, activation),
                (512, 1024, (3, 3), 1, activation),
            ]),
            YoloLayer([
                (1024, 512, (1, 1), 1, activation),
                (512, 1024, (3, 3), 1, activation),
                (1024, 512, (1, 1), 1, activation),
                (512, 1024, (3, 3), 1, activation),
            ], pool=False)
        ]
        
        self.layers = Sequential(*layers)
        
    def forward(self, x):
        x = self.layers(x)
        return x
            
class YoloClassifier(Module):
    def __init__(self, activation='relu'):
        super().__init__()
        layers = [
            YoloLayer([
                (1024, 1024, (3, 3), 1, 'relu'),
                (1024, 1024, (3, 3), 2, 'relu'),
            ], pool=False),
            YoloLayer([
                (1024, 1024, (3, 3), 1, 'relu'),
                (1024, 1024, (3, 3), 1, 'relu'),
            ], pool=False),
            YoloFinalLayer('relu')
        ]
        
        self.layers = Sequential(*layers)
        
    def forward(self, x):
        x = self.layers(x)
        return x
        

class YOLOv1(Module):
    def __init__(self):
        super().__init__()
        
        self.features = YoloFeatureExtractor('relu')
        self.classifiers = YoloClassifier('relu')
    
    def forward(self, x):
        x = self.features(x)
        x = self.classifiers(x)
        return x
    
class YOLOv1Pretrainer(Module):
    def __init__(self):
        super().__init__()
        
        self.features = YoloFeatureExtractor('relu')
        self.classifiers = Sequential(
            AvgPool2d(kernel_size=(7, 7)),
            Flatten(),
            Linear(in_features=1024, out_features=1000)
        )
        
    def forward(self, x):
        x = self.features(x)
        x = self.classifiers(x)
        return x

# model = YOLOv1()
# summary(model, input_size=(3, 448, 448), batch_size=2, device='cpu')
model = YOLOv1Pretrainer()
summary(model, input_size=(3, 224, 224), batch_size=2, device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [2, 64, 112, 112]           9,472
         LeakyReLU-2          [2, 64, 112, 112]               0
          YoloConv-3          [2, 64, 112, 112]               0
         MaxPool2d-4            [2, 64, 56, 56]               0
         YoloLayer-5            [2, 64, 56, 56]               0
            Conv2d-6           [2, 192, 56, 56]         110,784
         LeakyReLU-7           [2, 192, 56, 56]               0
          YoloConv-8           [2, 192, 56, 56]               0
         MaxPool2d-9           [2, 192, 28, 28]               0
        YoloLayer-10           [2, 192, 28, 28]               0
           Conv2d-11           [2, 128, 28, 28]          24,704
        LeakyReLU-12           [2, 128, 28, 28]               0
         YoloConv-13           [2, 128, 28, 28]               0
           Conv2d-14           [2, 256,

# Prepare dataset
- We will prepare ILSVRC2012 dataset!

In [47]:
class LabelReader(object):
    def __init__(self, label_file_path):
        self.label_file_path = label_file_path
        if 'pretrained' in label_file_path:
            print("INFO: Using Pretrained label list! (not custom one)")

    def load_label(self):
        label_map = {}
        # Read label file into label map
        if os.path.isfile(self.label_file_path):
            with open(self.label_file_path, 'r') as f:
                label_name_body = f.read().strip()
                label_name_lines = label_name_body.split("\n")
                for label_entry in tqdm(label_name_lines, desc='레이블 파일 읽기 작업'):
                    synset_name, label_name = label_entry.strip().split("|")
                    label_map[synset_name] = label_name

            print(f"레이블 파일 읽기 완료: 총 {len(list(label_map.keys()))}개 레이블 검색")
            return label_map
        else:
            return None

class ImageNet(torch.utils.data.Dataset):
    def __init__(self, labels, root_dir, transform=None):
        super(ImageNet, self).__init__()

        self.labels = labels
        self.transform = transform

        self.img_path_list = []
        self.img_class_list = []
        self.load_list(root_dir)

    def load_list(self, root_dir):
        label_index = 0
        for label in tqdm(self.labels.keys(), desc='이미지 파일 리스트 읽기 작업'):
            item_dir = os.path.join(root_dir, label)
            file_list = glob.glob(item_dir + os.sep + "*.JPEG")
            self.img_path_list += file_list
            self.img_class_list += [label_index] * len(file_list)
            label_index += 1

        if len(self.img_path_list) != len(self.img_class_list):
            raise RuntimeError(f"이미지 데이터 {len(self.img_path_list)}개와 클래스 데이터 {len(self.img_class_list)}개가 서로 다릅니다!")

        print(f"총 {len(self.img_path_list)}개 이미지 리스트 데이터 및 실효 레이블 {len(list(set(self.img_class_list)))}개 로드 성공")

    def __len__(self):
        return len(self.img_path_list)

    def __getitem__(self, idx):
        # PIL-version
        image = Image.open(self.img_path_list[idx]).convert("RGB")
        if self.transform is not None:
            image = self.transform(image)
        label = torch.Tensor([self.img_class_list[idx]]).type(torch.int64).squeeze(dim=0)
        return image, label

In [48]:
import os

DATASET_BASEDIR = '/media/jungin500/windows-10/Dataset/ILSVRC/Data/CLS-LOC/train/'
if not os.path.isdir(DATASET_BASEDIR):
    print("ERROR: Dataset path not exists!")