In [1]:
!nvidia-smi

Thu Jun  9 16:22:27 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.142.00   Driver Version: 450.142.00   CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            On   | 00000000:00:1E.0 Off |                    0 |
| N/A   37C    P0    26W /  70W |   6767MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [2]:
import numpy as np
import random
import os
import math
import easydict  # hyper-parameter
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler

from glob import glob
import pandas as pd
import cv2
from tqdm.auto import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch_optimizer as optim  # optimizer
from torch.utils.data import DataLoader, Dataset

from torch.autograd import Variable

import torchvision.models as models
from torchvision import transforms

## Config
Hyper-parameter

In [3]:
args = easydict.EasyDict(
    {
     'epochs':30,
     # Optimizer
     'optimizer':'Lamb',
     
     'initial_lr':5e-6,
     'weight_decay':1e-3,

     # Scheduler (OnecycleLR)
     'scheduler':'cycle',
     'warm_epoch':5,
     'max_lr':1e-3,

     # Cosine Annealing
     'min_lr':5e-6,
     'tmax':145,
    })

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using {device} device")

Using cuda device


In [5]:
torch.cuda.is_available()

True

In [6]:
torch.cuda.empty_cache()

### data

In [7]:
def get_train_data(data_dir):
    img_path_list = []
    label_list = []
    
    image_path = os.path.join(data_dir, 'image')
    label_path = os.path.join(data_dir, 'label')
    
    for product_name in os.listdir(image_path):
        product_path = os.path.join(image_path, product_name)
        if os.path.isdir(product_path):
            # get image path
            img_path_list.extend(glob(os.path.join(product_path, '*.jpg')))
            img_path_list.extend(glob(os.path.join(product_path, '*.png')))
            label = list(product_name[:5])
            
            # get label
            label_list.append(''.join(label))
                
    return img_path_list, label_list

In [8]:
img_list, label_list = get_train_data('/home/lab16/jupyter_home/Data/product_image/Training/')

In [9]:
def data_blanced(img, label):
    x = []
    y = []
    
    for i in range(len(label)):
        _img = img[(i * 114): ((i + 1) * 114)]
        _label = label[i]
        
        for img_product in _img:
            x.append(img_product)
            y.append(_label)
            
    return x, y

In [10]:
x, y = data_blanced(img_list, label_list)

In [11]:
le = preprocessing.LabelEncoder()
targets = le.fit_transform(y)
targets = torch.as_tensor(targets)

In [12]:
len(targets)

8664

In [13]:
one_hot_y = F.one_hot(targets)

In [14]:
one_hot_y.shape

torch.Size([8664, 76])

In [15]:
def get_valid_data(data_dir):
    img_valid_list = []
    label_valid_list = []
    
    image_path = os.path.join(data_dir, 'image')
    label_path = os.path.join(data_dir, 'label')
    
    for product_name in os.listdir(image_path):
        product_path = os.path.join(image_path, product_name)
        if os.path.isdir(product_path):
            # get image path
            img_valid_list.extend(glob(os.path.join(product_path, '*.jpg')))
            img_valid_list.extend(glob(os.path.join(product_path, '*.png')))
            label = list(product_name[:5])
            
            # get label
            label_valid_list.append(''.join(label))
                
    return img_valid_list, label_valid_list

In [16]:
def valid_data_blanced(img, label):
    x = []
    y = []
    
    for i in range(len(label)):
        _img = img[(i * 15): ((i + 1) * 15)]
        _label = label[i]
        
        for img_product in _img:
            x.append(img_product)
            y.append(_label)
            
    return x, y

In [17]:
img_valid_list, label_valid_list = get_valid_data('/home/lab16/jupyter_home/Data/product_image/Validation/')

In [18]:
x_valid, y_valid = valid_data_blanced(img_valid_list, label_valid_list)

In [19]:
len(y_valid)

1140

In [20]:
le2 = preprocessing.LabelEncoder()
targets_y = le2.fit_transform(y_valid)
targets_y = torch.as_tensor(targets_y)
one_hot_valid_y = F.one_hot(targets_y)

In [21]:
one_hot_valid_y.shape

torch.Size([1140, 76])

In [22]:
class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, train_mode=True, transforms=None):
        self.transforms = transforms
        self.train_mode = train_mode
        self.img_path_list = img_path_list
        self.label_list = label_list

    def __getitem__(self, index):
        img_path = self.img_path_list[index]
        # Get image data
        image = cv2.imread(img_path)
        if self.transforms is not None:
            image = self.transforms(image)

        if self.train_mode:
            label = self.label_list[index]
            return image, label
        else:
            return image
    
    def __len__(self):
        return len(self.img_path_list)

In [23]:
train_transform = transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Resize((256, 256)),
                    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
                    ])

test_transform = transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Resize((256, 256)),
                    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
                    ])

In [24]:
train_dataset = CustomDataset(x, one_hot_y, train_mode=True, transforms=train_transform)
train_loader = DataLoader(train_dataset, batch_size = 32, shuffle=True, num_workers=0, collate_fn=None)

vali_dataset = CustomDataset(x_valid, one_hot_valid_y, train_mode=True, transforms=test_transform)
vali_loader = DataLoader(vali_dataset, batch_size = 5, shuffle=False, num_workers=0, collate_fn=None)

### model

In [27]:
class ResNet50(torch.nn.Module):
    def __init__(self):
        super(ResNet50, self).__init__()
        model = models.resnet50(pretrained=True)
        modules = list(model.children())[:-1]
        self.feature_extract = nn.Sequential(*modules)
        self.fc1 = nn.Linear(2048, 1000)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(1000,76)

    def forward(self, x):
        x = self.feature_extract(x)
        # x = x.mean(dim=(-2, -1))
        # (batch, 2048, 4, 4)
        x = torch.squeeze(x)
        x = self.relu(self.fc1(x))
        out = self.fc2(x)
        return out

### training

In [28]:
def validation(model, vali_loader, criterion, device):
    model.eval() # Evaluation
    vali_loss = []
    # regressor = nn.Linear(1000, 1).to(device)

    with torch.no_grad():
        for img, label in tqdm(iter(vali_loader)):
            img, label = img.float().to(device), label.float().to(device)

            logit = model(img)
            # logit = regressor(logit)
            logit = torch.squeeze(logit)
            loss = criterion(logit, label)
            
            vali_loss.append(loss.item())

    vali_mae_loss = np.mean(vali_loss)
    return vali_mae_loss

In [29]:
def train(model, optimizer, train_loader, vali_loader, scheduler, device):
    
    model.to(device)
    
    epochs = 30
#     epochs = args.epochs

    loss_plot = []
    vali_loss_plot = []

    # Loss Function
    criterion = torch.nn.CrossEntropyLoss()
    best_loss = 9999
    
    for epoch in range(1,epochs+1):
        model.train()
        train_loss = []
        for img, label in tqdm(iter(train_loader)):
            img, label = img.float().to(device), label.float().to(device)
            
            optimizer.zero_grad()

            # Data -> Model -> Output
            logit = model(img)
            logit = torch.squeeze(logit)
            # Calc loss
            loss = criterion(logit, label)
            loss_plot.append(loss)

            # backpropagation
            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())
            
        if scheduler is not None:
            scheduler.step()
            
        # Evaluation Validation set
        vali_loss = validation(model, vali_loader, criterion, device)
        vali_loss_plot.append(vali_loss)
        
        print(f'Epoch [{epoch}] Train loss : [{np.mean(train_loss):.5f}] Validation loss : [{vali_loss:.5f}]\n')
        
        # Model Saved
        if best_loss > vali_loss:
            best_loss = vali_loss
            torch.save(model.state_dict(), '/home/lab16/jupyter_home/saved_models/resnet50_scheduler.pth')
            print('Model Saved.')

In [30]:
model = ResNet50()

# model_state_dict = torch.load("./saved_models/resnet50.pth", map_location=device)
# model.load_state_dict(model_state_dict)

# set optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.003)

# optimizer = optim.Lamb(model.parameters(), lr=args.initial_lr, weight_decay=args.weight_decay)

### Scheduler

In [31]:
# set scheduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0.001, verbose=True)

Adjusting learning rate of group 0 to 3.0000e-03.


In [33]:
# # scheduler 변경 시 사용 코드

# # hyper-parameter에 들어갈 scheduler list
# scheduler_list = []

# # 사용할 scheduler 선택, parameter 수치 적절히 설정 필요
# if args.scheduler == 'lambda':
#     lambda1 = lambda epoch: 0.65 ** epoch
#     scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1, verbose=True)
# elif args.scheduler == 'multiplicative':
#     lmbda = lambda epoch: 0.65 ** epoch
#     scheduler = torch.optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda=lmbda, verbose=True)
# elif args.scheduler == 'step':
#     scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1, verbose=True)
# elif args.scheduler == 'multistep':
#     scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[6,8,9], gamma=0.1, verbose=True)
    
# elif args.scheduler == 'exponential': 
#     scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1, verbose=True)
    
# elif args.scheduler == 'cos':
#     scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0, verbose=True)
    
# elif args.scheduler == 'cycle':
#     scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.001, max_lr=0.1, step_size_up=5, mode="triangular", verbose=True)
# elif args.scheduler == 'cycle2':
#     scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.001, max_lr=0.1, step_size_up=5, mode="triangular2", verbose=True)
# elif args.scheduler == 'cycle_exp':
#     scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.001, max_lr=0.1, step_size_up=5, mode="exp_range", gamma=0.85, verbose=True)
    
# elif args.scheduler == 'one_linear':
#     scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=10, epochs=10, anneal_strategy='linear', verbose=True)
# elif args.scheduler == 'one_cos':
#     scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=10, epochs=10, anneal_strategy='cos', verbose=True)
    
# elif args.scheduler == 'cos_warm_restarts':
#     scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1, eta_min=0.001, last_epoch=-1, verbose=True)
    
# # input으로 val loss/metric, optimizer에 momentum 설정 필요
# elif args.scheduler == 'reduce':
#     scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', verbose=True)
    
# # elif args.scheduler == 'cos_warmup':
# #     scheduler = transformers.get_cosine_schedule_with_warmup(optimizer, 
# #                                                          num_warmup_steps=num_warmup_steps, 
# #                                                          num_training_steps=num_total_steps, verbose=True)

In [34]:
# # scheduler 목록/설명

# ## 1. Lambda LR - Epoch에 따른 가중치로 lr를 점점 감소
# ## lr_lambda: lr에 곱해질 factor
# lambda1 = lambda epoch: 0.65 ** epoch
# scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)

# ## 2. MultiplicativeLR - 이전 Epoch에 따른 가중치(누적곱)로 lr를 점점 감소
# lmbda = lambda epoch: 0.65 ** epoch
# scheduler = torch.optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda=lmbda)

# ## *3. StepLR - step size마다 * gamma만큼 lr 감소
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)

# ## *4. MultiStepLR - lr를 감소시킬 epoch 지정
# ## milestones: lr를 줄일 epoch index의 list
# scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[6,8,9], gamma=0.1)

# ## *5. ExponentialLR - learing rate decay가 exponential 함수를 따름
# scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1)

# ## 6. CosineAnnealingLR - Cosine 파형 사용 ( lr가 cos함수를 따라서 eat_min까지 감소 후 다시 초기 lr까지 증가)
# ## T_max: 최대 iteration 횟수
# ## eta_min: 최소로 떨어질 수있는 learning rate default=0
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0)

# ## 7-1. CyclicLR - triangular (A basic triangular cycle without amplitude scaling)
# ## step_size_up – Number of training iterations in the increasing half of a cycle. Default: 2000
# scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.001, max_lr=0.1,step_size_up=5,mode="triangular")

# ## 7-2. CyclicLR - triangular2 (A basic triangular cycle that scales initial amplitude by half each cycle)
# scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.001, max_lr=0.1,step_size_up=5,mode="triangular2")

# ## 7-3. CyclicLR - exp_range (A cycle that scales initial amplitude by gamma at each cycle iteration)
# scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.001, max_lr=0.1,step_size_up=5,mode="exp_range",gamma=0.85)

# ## 8-1. OneCycleLR - linear
# scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=10, epochs=10, anneal_strategy='linear')

# ## 8-2. OneCycleLR - cos
# scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=10, epochs=10, anneal_strategy='cos')

# ## 9. CosineAnnealingWarmRestarts
# ## T_0 – Number of iterations for the first restart.
# ## T_mult (int, optional) – A factor increases T_{i} after a restart. Default: 1.
# ## eta_min (float, optional) – Minimum learning rate. Default: 0.
# scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1, eta_min=0.001, last_epoch=-1)
# # scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=0.01, last_epoch=-1)

# ## 10. *ReduceLROnPlateau - 성능 향상 없을 때 lr 감소, input으로 val loss/metric, optimizer에 momentum 설정
# optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
# # for epoch in range(100):
# #      train(...)
# #      val_loss = validate(...)

# #      # Note that step should be called after validate()
# #      scheduler.step(val_loss)
    
# ## 11. get_cosine_schedule_with_warmup - 0과 optimzier에 설정된 초기 lr 사이에서 선형적 증가의 워밍업 기간 후에 opimizer에서 0으로 설정된 초기 lr 사이의 코사인 함수 값에 따라 감소하는 lr
# scheduler = transformers.get_cosine_schedule_with_warmup(optimizer, 
#                                                          num_warmup_steps=num_warmup_steps, 
#                                                          num_training_steps=num_total_steps)


In [35]:
# scheduler = None

train(model, optimizer, train_loader, vali_loader, scheduler, device)

  0%|          | 0/271 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.9511e-03.


  0%|          | 0/228 [00:00<?, ?it/s]

Epoch [1] Train loss : [4.03590] Validation loss : [4.40235]

Model Saved.


  0%|          | 0/271 [00:00<?, ?it/s]

Adjusting learning rate of group 0 to 2.8090e-03.


  0%|          | 0/228 [00:00<?, ?it/s]

Epoch [2] Train loss : [3.38776] Validation loss : [4.62732]



  0%|          | 0/271 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
def predict(model, test_loader, device):
    model.eval()
    model_pred = []
    with torch.no_grad():
        for img, label in tqdm(iter(test_loader)):
            img = img.float().to(device)
            
            pred_logit = model(img)
            pred_logit = pred_logit.squeeze().detach().cpu()
            
            model_pred.extend(pred_logit.tolist())
    return model_pred

In [None]:
checkpoint = torch.load('/home/lab16/jupyter_home/saved_models/resnet50_scheduler.pth')
model = ResNet50().to(device)
model.load_state_dict(checkpoint)

preds = predict(model, vali_loader, device)

In [None]:
pred_labels = np.argmax(preds, axis=1)
true_labels = one_hot_valid_y.argmax(-1)

In [None]:
from sklearn.metrics import accuracy_score

accuracy_score(true_labels, pred_labels)