In [1]:
import os
from pathlib import Path
import numpy as np
import torch
from torch.utils.data import Dataset
import torchvision
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
import re

import matplotlib.pyplot as plt



## dataset

In [6]:
root = Path('/home/data/Imagenet')
train_path = root / 'train'
val_path = root / 'val'
label_path = root / 'label'

def ImgNetData(train_path, val_path, distributed=True):
    train_transforms = transforms.Compose([
        transforms.RandomChoice([transforms.Resize(256), transforms.Resize(480)]),
        transforms.RandomCrop(224),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        
        ### 이걸로 사용해볼 순 없나?
        # transforms.RandomResizedCrop((224,224)),
        # transforms.Resize((256,256)),
        # transforms.CenterCrop((224,224)),
        
        transforms.RandomHorizontalFlip(0.5),
    ])
    val_transforms = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(244),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ])
    train_data = ImageFolder(root=train_path, transform=train_transforms)
    val_data = ImageFolder(root=val_path, transform=val_transforms)
    
    return train_data, val_data
    

In [11]:
synset_path = label_path / 'synset_words.txt'

def synset2word(synset_path=synset_path):
    label_dict = {}
    with open(synset_path, 'r') as f:
        synset_word = f.readlines()
        for i in range(len(synset_word)):
            synset = synset_word[i].split()[0]
            word = re.sub(r'[^a-zA-Z]', '', synset_word[i].split()[1])
            label_dict[synset] = word
            
    return label_dict

label_dict = synset2word()
label_dict

{'n01440764': 'tench',
 'n01443537': 'goldfish',
 'n01484850': 'great',
 'n01491361': 'tiger',
 'n01494475': 'hammerhead',
 'n01496331': 'electric',
 'n01498041': 'stingray',
 'n01514668': 'cock',
 'n01514859': 'hen',
 'n01518878': 'ostrich',
 'n01530575': 'brambling',
 'n01531178': 'goldfinch',
 'n01532829': 'house',
 'n01534433': 'junco',
 'n01537544': 'indigo',
 'n01558993': 'robin',
 'n01560419': 'bulbul',
 'n01580077': 'jay',
 'n01582220': 'magpie',
 'n01592084': 'chickadee',
 'n01601694': 'water',
 'n01608432': 'kite',
 'n01614925': 'bald',
 'n01616318': 'vulture',
 'n01622779': 'great',
 'n01629819': 'European',
 'n01630670': 'common',
 'n01631663': 'eft',
 'n01632458': 'spotted',
 'n01632777': 'axolotl',
 'n01641577': 'bullfrog',
 'n01644373': 'tree',
 'n01644900': 'tailed',
 'n01664065': 'loggerhead',
 'n01665541': 'leatherback',
 'n01667114': 'mud',
 'n01667778': 'terrapin',
 'n01669191': 'box',
 'n01675722': 'banded',
 'n01677366': 'common',
 'n01682714': 'American',
 'n0168

## Utils

In [None]:
import sys

class Logger(object):
    def __init__(self, local_rank=0, no_save=False):
        self.terminal = sys.stdout
        self.file = None
        self.local_rank = local_rank
        self.no_save = no_save
    def open(self, fp, mode=None):
        if mode is None: mode = 'w'
        if self.local_rank and not self.no_save == 0: self.file = open(fp, mode)
    def write(self, msg, is_terminal=1, is_file=1):
        if msg[-1] != "\n": msg = msg + "\n"
        if self.local_rank == 0:
            if '\r' in msg: is_file = 0
            if is_terminal == 1:
                self.terminal.write(msg)
                self.terminal.flush()
            if is_file == 1 and not self.no_save:
                self.file.write(msg)
                self.file.flush()
    def flush(self): 
        pass
    
def print_args(args, logger=None):
    if logger is not None:
        logger.write("#### configurations ####")
    for k, v in vars(args).items():
        if logger is not None:
            logger.write('{}: {}\n'.format(k, v))
        else:
            print('{}: {}'.format(k, v))
    if logger is not None:
        logger.write("########################")
      
import argparse
import json
def save_args(args, to_path):
    with open(to_path, "w") as f:
        json.dump(args.__dict__, f, indent=2)
def load_args(from_path):
    parser = argparse.ArgumentParser()
    args = parser.parse_args()
    with open(from_path, "r") as f:
        args.__dict__ = json.load(f)
    return args    

class AverageMeter (object):
    def __init__(self):
        self.reset ()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        
def Accuracy(output, target, topk=(1,)):
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

## Model

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BottleNeck(nn.Module):
    expansion = 4
    def __init__(self, in_channels, out_channels, stride=1):
        super(BottleNeck, self).__init__()
        
        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels * self.expansion, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_channels * self.expansion),
        )
        self.expansion = 4
        self.shortcut = nn.Sequential()
        self.relu = nn.ReLU()
        
        # 하위 layer로 내려갈때 downsampling(1/2줄이기)하면서 feature 수가 달라짐 그러므로 skip connection을 할 수 있게 보정
        if stride != 1 or in_channels != out_channels * self.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * self.expansion)
            )
    def forward(self, x):
        out = self.residual_function(x)
        out = out + self.shortcut(x)
        return self.relu(out)

class ResNet(nn.Module):
    def __init__(self, block=BottleNeck, layers=[3,4,6,3], in_channels=3, num_classes=1000):
        self.num_classes = num_classes
        super().__init__()
        self.in_channels = 64
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
            
        )        
        self.conv2 = self._make_layer(block, 64, layers[0])
        self.conv3 = self._make_layer(block, 128, layers[1], downsampling=True)
        self.conv4 = self._make_layer(block, 256, layers[2], downsampling=True)
        self.conv5 = self._make_layer(block, 512, layers[3], downsampling=True)
        self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)
        self.fc = nn.Linear(2048, num_classes)
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.conv2(out)
        out = self.conv3(out)
        out = self.conv4(out)
        out = self.conv5(out)
        out = self.avgpool(out)
        out = torch.flatten(out, 1)
        out = self.fc(out)
        
        return out
    
    def _make_layer(self, block, out_channels, num_blocks, downsampling=False):
        if downsampling is True:
            stride = 2
        else:
            stride = 1
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion

        return nn.Sequential(*layers)
    

import torchsummary
model = ResNet()
torchsummary.summary(model, (3,224,224), device='cpu')
# print(model)
    

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          16,384
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256,

In [23]:

import torch
import torch.nn as nn

from einops import rearrange, repeat, reduce
'''
rearrange doesn't change number of elements and covers different numpy functions (like transpose, reshape, stack, concatenate, squeeze and expand_dims)
rearrange : string으로 transpose, compose, decompose와 같은 shape 변화를 줄 수 있음

reduce combines same reordering syntax with reductions (mean, min, max, sum, prod, and any others)
reduce : string으로 mean, min, max, sum, prod같은 작업을 할 수 있음.

repeat additionally covers repeating and tiling
repeat : string으로 repeat할 수 있음
'''
from einops.layers.torch import Rearrange # torch에서 layer로 사용하게 함

def make_tuple(x):
    return x if isinstance(x, tuple) else (x, x)

class PreNorm(nn.Module): # 모든 블록 전에 먼저 Norm을 하고 attention 수행함.
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim) # 3(Data 갯수)X6(Feature 갯수) 기준으로 LayerNorm(데이터 샘플 단위) 3개 나옴 VS BatchNorm(특성 단위) 6개 나옴
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)
    

class Attention(nn.Module):
    def __init__(self, dim, heads=8, dim_head=64, dropout=0.):
        super().__init__()
        inner_dim = dim_head * heads # multi head를 내부에서 한 번에 작업하기 위함
        project_out = not (heads == 1 and dim_head == dim) # multi head attention 인지 아닌지
        
        self.heads = heads
        self.scale = dim_head ** -0.5 # root(dim_head)로 나눠 줄 때 필요
        
        self.attend = nn.Softmax(dim = -1)
        self.dropout = nn.Dropout(dropout)
        
        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False) # input을 q, k, v로 만들기
        
        self.to_out = nn.Sequential(
            nn.Linear(inner_dim, dim),
            nn.Dropout(dropout)
        ) if project_out else nn.Identity() # single head일 때는 할 필요 없으므로
        
    def forward(self, x): # [batch_size, seq_len, dim]
        qkv = self.to_qkv(x).chunk(3, dim=-1) # chunk으로 세 덩어리로 나눔
        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = self.heads), qkv) # [batch_size, heads, seq_len, dim_head] X 3
        
        energy = torch.matmul(q, k.transpose(-1, -2)) * self.scale 
        
        attn = self.attend(energy)
        attn = self.dropout(attn)
        
        out = torch.matmul(attn, v) # [batch_size, heads, seq_len, dim_head]
        out = rearrange(out, 'b h n d -> b n (h d)')
        
        return self.to_out(out) # [batch_size, seq_len, emb_dim]
    
class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim, dropout=0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim),
            nn.Dropout(dropout)
        )
    def forward(self, x):
        return self.net(x)

class Transformer(nn.Module):
    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.):
        super().__init__()
        self.layers = nn.ModuleList([])
        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                PreNorm(dim, Attention(dim, heads, dim_head, dropout)),
                PreNorm(dim, FeedForward(dim, mlp_dim, dropout))
            ]))
    def forward(self, x):
        for attn, ff in self.layers:
            x = attn(x) + x # skip connection 필요함
            x = ff(x) + x
        return x # [batch_size, seq_len, emb_dim]
 
class ViT(nn.Module):
    def __init__(self, image_size, patch_size, num_classes, dim, depth, heads, mlp_dim, dim_head=64, dropout=0., pool='cls', channels=3, emb_dropout=0.):
        super().__init__()
        image_height, image_width = make_tuple(image_size)
        patch_height, patch_width = make_tuple(patch_size)
        
        assert image_height % patch_height == 0 and image_width % patch_width == 0, 'Image sizes must be divisible by the patch size'
        
        num_patchs = (image_height // patch_height) * (image_width // patch_width)
        patch_dim = channels * patch_width * patch_height
        assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'
        
        self.to_patch_embedding = nn.Sequential(
            Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=patch_height, p2=patch_width), # [batch_size, patch_num, patch_dim]
            nn.Linear(patch_dim, dim) # [batch_size, patch_num, emb_dim]
        )
        
        self.pos_embedding = nn.Parameter(torch.rand(1, num_patchs + 1, dim))
        self.cls_token = nn.Parameter(torch.rand(1, 1, dim))
        self.dropout = nn.Dropout(emb_dropout)
        
        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout)
        
        self.pool = pool
        # self.to_latent = nn.Identity() BYOL을 사용할거 아니면 필요없음 lucidrain피셜
        
        self.mlp_head = nn.Sequential(
            nn.LayerNorm(dim), # 모든 block 전에 LN사용함.
            nn.Linear(dim, num_classes)
        )
    
    def forward(self, x):
        x = self.to_patch_embedding(x)
        batch_size, num_patchs, emb_size = x.shape
        
        cls_tokens = repeat(self.cls_token, '1 1 d -> b 1 d', b=batch_size) # 맨 앞에 붙일 Class 토큰 만들기
        x = torch.cat((cls_tokens, x), dim=1) # 시퀀스 맨 앞에 붙이기
        x += self.pos_embedding[:, :(num_patchs+1)] # positional embedding 더하기
        x = self.dropout(x)
        
        x = self.transformer(x)
        
        # mlp head에 들어갈 때 맨 앞 cls 토큰만 가져갈지 전체를 평균내어 가져갈지 결정하기
        x = x[:, 0] if self.pool == 'cls' else reduce(x, 'b n d -> b d', 'mean') # [batch_size, emb_size]
        
        # x = self.to_latent(x)
        
        return self.mlp_head(x) # [batch_size, num_classes]
          
import torchsummary
model = ViT(224, 28, 1000, 1024, 6, 16, 2048, dropout=0.1, emb_dropout=0.1)
torchsummary.summary(model, (3,224,224), device='cpu')
# print(model)




----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         Rearrange-1             [-1, 64, 2352]               0
            Linear-2             [-1, 64, 1024]       2,409,472
           Dropout-3             [-1, 65, 1024]               0
         LayerNorm-4             [-1, 65, 1024]           2,048
            Linear-5             [-1, 65, 3072]       3,145,728
           Softmax-6           [-1, 16, 65, 65]               0
           Dropout-7           [-1, 16, 65, 65]               0
            Linear-8             [-1, 65, 1024]       1,049,600
           Dropout-9             [-1, 65, 1024]               0
        Attention-10             [-1, 65, 1024]               0
          PreNorm-11             [-1, 65, 1024]               0
        LayerNorm-12             [-1, 65, 1024]           2,048
           Linear-13             [-1, 65, 2048]       2,099,200
             GELU-14             [-1, 6

In [35]:

# NLP Example
batch, sentence_length, embedding_dim = 1, 5, 3
embedding = torch.randn(batch, sentence_length, embedding_dim)
layer_norm = nn.LayerNorm(embedding_dim)
# Activate module
out = layer_norm(embedding)
print(embedding)
print("==================================")
print(out)

# Image Example
N, C, H, W = 20, 5, 10, 10
input = torch.randn(N, C, H, W)
# Normalize over the last three dimensions (i.e. the channel and spatial dimensions)
# as shown in the image below
layer_norm = nn.LayerNorm([C, H, W])
output = layer_norm(input)

tensor([[[ 1.6741, -0.1814,  0.0143],
         [ 0.8440, -0.5426,  0.8293],
         [ 1.0445, -0.9468,  0.2765],
         [ 1.3212, -0.6894,  0.0998],
         [-1.1613,  1.6704, -1.1912]]])
tensor([[[ 1.4077, -0.8214, -0.5863],
         [ 0.7184, -1.4141,  0.6957],
         [ 1.1217, -1.3067,  0.1851],
         [ 1.3025, -1.1283, -0.1742],
         [-0.6959,  1.4142, -0.7182]]], grad_fn=<NativeLayerNormBackward0>)


In [43]:
from datetime import datetime
now = datetime.now()
('vit' + ' runed at ' + now.strftime('%Y-%m-%d %H:%M:%S'))

'vit runed at 2023-02-02 21:49:47'

In [4]:
import tarfile

with tarfile.open('./runs/vit/checkpoint.pth.tar', 'r') as f:
    print(f.getnames())

ReadError: file could not be opened successfully