# Data Load

In [1]:
import os
import sys
from glob import glob
import gc
import yaml
from pathlib import Path
import matplotlib 
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
import torch 
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import RandomSampler
from torch.utils.data.dataset import random_split
from PIL import Image
import PIL
from tqdm import tqdm
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms as T

USE_CUDA = torch.cuda.is_available()
print(USE_CUDA)
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
print('pytorch version : ',torch.__version__)
print('학습을 진행하는 기기:',device)
print('gpu 개수:', torch.cuda.device_count())
print('graphic name:', torch.cuda.get_device_name())

True
pytorch version :  1.9.1
학습을 진행하는 기기: cuda:0
gpu 개수: 1
graphic name: NVIDIA GeForce RTX 3080 Ti


In [2]:
# Yolov5 Extract
%cd C:\Users\user\Drive\s-hero\code\yolov5
!pip install -qr requirements.txt 

C:\Users\user\Drive\s-hero\code\yolov5


In [3]:
#이거
train_image_list = sorted(glob('C:/Users/user/Drive/s-hero/Code/Data/train/images/*.jpg'))
valid_image_list = sorted(glob('C:/Users/user/Drive/s-hero/Code/Data/valid/images/*.jpg'))
train_annotations_list = sorted(glob('C:/Users/user/Drive/s-hero/Code/Data/train/labels/*.txt'))
valid_annotations_list = sorted(glob('C:/Users/user/Drive/s-hero/Code/Data/valid/labels/*.txt'))

assert len(train_image_list) == len(train_annotations_list), "train 이미지 파일의 수와 annotation 파일의 수가 맞지 않습니다."
assert len(valid_image_list) == len(valid_annotations_list), "valid 이미지 파일의 수와 annotation 파일의 수가 맞지 않습니다."
print(len(train_image_list), len(valid_image_list))

575 192


In [4]:
sample_image_path = train_image_list[400]
sample_annot_path = train_annotations_list[400]

# Utilities

In [5]:
#  annotation txt file로부터 ground truth logit 반환 
def get_gt_logit(txt_path):
  txt_path = Path(txt_path)
  with open(txt_path, 'r', encoding='utf-8') as txt:
    string = txt.readline()
    list_str = string.split()
    gt_logit = int(list_str[0])

  return gt_logit

# gt logit을 넣으면 gt name을 반환 
def logit2name(gt_logit):  # input : annotations txt file들의 list
  label_dict = {}
  with open('C:/Users/user/Drive/s-hero/Code/data.yaml', 'r') as f:
    data = yaml.load(f, Loader=yaml.FullLoader)
    class_list = data['names']

    for idx in range(len(class_list)):
      name = class_list[idx]
      label_dict[idx] = name

    gt_name = label_dict[gt_logit]
    return gt_name

# annotation txt file path로부터 bbox 좌표 검출 
def get_bbox(annot_path):
  txt_path = Path(annot_path)
  with open(txt_path, 'r', encoding='UTF8') as txt:
    string = txt.readline()
    list_str = string.split()
    x, y, w, h = list_str[1:5]
    
  return x, y, w, h 

In [6]:
print(get_gt_logit(sample_annot_path))

5


In [7]:
print(logit2name(get_gt_logit(sample_annot_path)))

B_Misordered


In [8]:
x, y, w, h = get_bbox(sample_annot_path)
print(x, y, w, h)

0.468750 0.493056 0.223438 0.791667


# Dataset & Augmentation

## using torchvision

In [9]:
from torchvision import transforms
import numpy as np
from PIL import Image
import os
from pathlib import Path

def Mytransform (size=224, use_resizecrop = True, use_flip = True, use_color_jitter = False, use_gray_scale = False, use_normalize = False):
    resize_crop = transforms.RandomResizedCrop(size=size)
    horizontal_flip = transforms.RandomHorizontalFlip(p=0.5)
    color_jitter = transforms.RandomApply([
        transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)
    ], p=0.8)
    gray_scale = transforms.RandomGrayscale(p=0.2)
    normalize = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    to_tensor = transforms.ToTensor()
    
    transforms_array = np.array([resize_crop, horizontal_flip, color_jitter, gray_scale, to_tensor, normalize])
    transforms_mask = np.array([use_resizecrop, use_flip, use_color_jitter, use_gray_scale, True, use_normalize])
    transform = transforms.Compose(transforms_array[transforms_mask])

    return transform

## using albumentations

In [10]:
import albumentations as A
import albumentations.pytorch as AP
import cv2

AlbuTransform = A.Compose([
        A.Resize(416, 416),                   
        A.RandomResizedCrop(height=300,
                            width=300,
                            scale=(0.5, 1.0),
                            ratio=(0.75, 1.25), 
                            interpolation=1, 
                            always_apply=False, 
                            p=0.5),
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.3),
        A.Affine(rotate=(-180, 180),
                shear=(-45, 45),
                p=0.5),
        A.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        AP.ToTensorV2()
        ]
        ,bbox_params = A.BboxParams(format='yolo', min_area=1024, min_visibility=0.3))

In [11]:
# Dataset 정의

import pandas as pd
import torch
from PIL import Image
from torch.utils.data import Dataset
from skimage import io
from pathlib import Path

class Mydataset(Dataset):
  # root = '/content/
  def __init__(self, root, mode = 'train', transform = None):
    self.root = os.path.join(root, mode)
    self.images = sorted(glob(self.root + '/images/*.jpg'))
    self.annotations = sorted(glob(self.root + '/labels/*.txt'))

  def __len__(self):
    assert len(self.images) == len(self.annotations), "이미지 파일의 수와 annotation 파일의 수가 맞지 않습니다."
    return len(self.annotations) 

  def __getitem__(self, idx): # dataset중에 이미지 혹은 레이블을 하나씩 불러오는 함수 
    annotation = self.annotations[idx]
    label = self.get_gt_logit(annotation)
    if self.transform:
      image = self.transform(image)
    return image, label
      

# Training

## Hyperparameter evolve

In [12]:
# %cd C:\Users\user\Drive\s-hero\code\yolov5
# !python train.py --img 640 --batch 32 --epochs 100  --data C:/Users/user/Drive/s-hero/Code/data.yaml --weights yolov5m.pt --hyp hyp.finetune.yaml --name m_b32_500epoch_100evolve


In [13]:
# from yolov5.utils.plots import plot_evolve
# plot_evolve(r'C:\Users\user\Drive\s-hero\Code\yolov5\runs\evolve\medium_batch32_100epoch_100evolve\evolve.csv')
# Image.open(r'C:\Users\user\Drive\s-hero\Code\yolov5\runs\evolve\medium_batch32_100epoch_100evolve\evolve.png')

In [14]:
#!python train.py --img 640 --batch 32 --epochs 500 --data C:/Users/user/Drive/s-hero/Code/data.yaml --weights yolov5m.pt --hyp C:/Users/user/Drive/s-hero/Code/yolov5/runs/evolve/medium_batch32_100epoch_100evolve/hyp_evolve.yaml --name m_b32_500epoch_100evo

# Inference

In [15]:
# %cd C:\Users\user\Drive\s-hero\Code
# !python ./yolov5/detect.py --source ./sample.mp4 --weights C:/Users/user/Drive/s-hero/Code/yolov5/runs/train/m_b32_300epoch_evo/weights/best.pt --name m_conf_0.6 --img 640 --device 0 --conf 0.6

# Grad-CAM

In [16]:
# %cd c:\Users\user\Drive\s-hero\Code\pytorch-grad-cam 
# from pytorch_grad_cam import GradCAM, GradCAMPlusPlus
# from pytorch_grad_cam.guided_backprop import GuidedBackpropReLUModel
# from pytorch_grad_cam.utils.image import preprocess_image, deprocess_image, show_cam_on_image
%cd c:\Users\user\Drive\s-hero\Code
from grad_cam import GradCam, GuidedBackpropReLUModel, show_cams, show_gbs, preprocess_image

%cd c:\Users\user\Drive\s-hero\Code\yolov5
# from yolov5.utils.augmentations import *
# from yolov5.utils.datasets import *

from yolov5 import train, detect
from yolov5.models.common import *
from yolov5.models.yolo import *
from yolov5.models.yolo import Model

c:\Users\user\Drive\s-hero\Code
c:\Users\user\Drive\s-hero\Code\yolov5


In [None]:

# ckpt = {'epoch': epoch,
#         'best_fitness': best_fitness,
#         'model': deepcopy(de_parallel(model)).half(),
#         'ema': deepcopy(ema.ema).half(),
#         'updates': ema.updates,
#         'optimizer': optimizer.state_dict(),
#         'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None}

In [None]:
# from collections import OrderedDict
# from torchsummary import summary

# new_dict = OrderedDict()
# empty_dict = OrderedDict()

# for key, value in model.state_dict().items():
#     value = value.type(torch.FloatTensor)
#     new_dict.update({key : value})

# model.load_state_dict(empty_dict, strict=False)

# model.load_state_dict(new_dict)

In [17]:
# with open(r'C:\Users\user\Drive\s-hero\Code\yolov5\models\yolov5s.yaml', 'r') as f:
#     v5s = yaml.load(f, Loader=yaml.FullLoader)
# model = Model(v5s, 3, 8, None)
# ckpt = torch.load(r'C:\Users\user\Drive\s-hero\Code\yolov5\runs\train\small_b32_50epoch_100evo\weights\best.pt')
# weight = ckpt['model'].state_dict()
# model.load_state_dict(weight)
# model.eval()

with open(r'C:\Users\user\Drive\s-hero\Code\yolov5\models\yolov5m.yaml', 'r') as f:
    v5m = yaml.load(f, Loader=yaml.FullLoader)   
model = Model(v5m, 3, 8, None)
ckpt = torch.load(r'C:\Users\user\Drive\s-hero\Code\yolov5\runs\train\m_b32_300epoch_evo\weights\best.pt')
weight = ckpt['model'].state_dict()
model.load_state_dict(weight)
model.eval()

Model(
  (model): Sequential(
    (0): Conv(
      (conv): Conv2d(3, 48, kernel_size=(6, 6), stride=(2, 2), padding=(2, 2), bias=False)
      (bn): BatchNorm2d(48, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU()
    )
    (1): Conv(
      (conv): Conv2d(48, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(96, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
      (act): SiLU()
    )
    (2): C3(
      (cv1): Conv(
        (conv): Conv2d(96, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(48, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU()
      )
      (cv2): Conv(
        (conv): Conv2d(96, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(48, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU()
      )
      (cv3): Conv(
        (conv): Conv2d(96, 96, kernel_size=

In [19]:
# from torchsummary import summary
from torchinfo import summary

summary(model, (32, 3, 1980, 1020))

Layer (type:depth-idx)                             Output Shape              Param #
Model                                              --                        --
├─Sequential: 1                                    --                        --
│    └─Detect: 2                                   --                        --
│    │    └─ModuleList: 3-1                        --                        52,533
│    └─Conv: 2-1                                   [32, 48, 990, 510]        --
│    │    └─Conv2d: 3-2                            [32, 48, 990, 510]        5,184
│    │    └─BatchNorm2d: 3-3                       [32, 48, 990, 510]        96
│    │    └─SiLU: 3-4                              [32, 48, 990, 510]        --
│    └─Conv: 2-2                                   [32, 96, 495, 255]        --
│    │    └─Conv2d: 3-5                            [32, 96, 495, 255]        41,472
│    │    └─BatchNorm2d: 3-6                       [32, 96, 495, 255]        192
│    │    └─SiLU: 3-7  

In [51]:
idx1 = np.random.randint(low=0, high=len(valid_annotations_list))
idx2 = np.random.randint(low=0, high=len(valid_annotations_list))
img1 = torch.as_tensor(np.asarray(Image.open(valid_image_list[idx1]))).permute(2,0,1)
img2 = torch.as_tensor(np.asarray(Image.open(valid_image_list[idx2]))).permute(2,0,1)

img1, img2 = img1.view(1,img1.size(0),img1.size(1),img1.size(2))/255, img2.view(1,img2.size(0),img2.size(1),img2.size(2))/255
img = torch.cat((img1, img2))
img.shape

torch.Size([2, 3, 1080, 1920])

In [53]:
model.forward(img)

RuntimeError: torch.cat(): Sizes of tensors must match except in dimension 1. Got 136 and 135 in dimension 2 (The offending index is 1)

In [26]:
#python cam.py --image-path C:/Users/user/Drive/s-hero/Code/Data/valid/images/A1_044.jpg --method gradcam 
model.to('cpu')
grad_cam = GradCam(model=model, blob_name='model', target_layer_names = [i for i in range(24)], use_cuda=False)

idx = np.random.randint(low=0, high=len(valid_annotations_list))
img = cv2.imread(valid_image_list[idx], 1)
img = np.float32(cv2.resize(img, (224, 224))) / 255  # C, H, W

inputs = preprocess_image(img)
print(inputs.shape)
annot = valid_annotations_list[idx]
target_category = get_gt_logit(annot)
print(target_category)

torch.Size([1, 3, 224, 224])
2


In [None]:
cam = grad_cam(inputs=inputs, index=target_category)

show_cams(img, cam)

In [None]:
from efficientnet_pytorch import EfficientNet
from torchvision.models import resnet50
effnet = EfficientNet.from_pretrained("efficientnet-b4", advprop=True)
resnet = resnet50(pretrained=True)

model = effnet
model.eval()
grad_cam = GradCam(model=model, blob_name='_blocks', target_layer_names = [str(i) for i in range(16)], use_cuda=False)

idx = np.random.randint(low=0, high=len(valid_annotations_list))
img = cv2.imread(valid_image_list[idx], 1)
img = np.float32(cv2.resize(img, (224, 224))) / 255  # C, H, W

inputs = preprocess_image(img)
print(inputs.shape)
annot = valid_annotations_list[idx]
target_category = get_gt_logit(annot)

cam = grad_cam(inputs, target_category)
show_cams(img, cam)

In [None]:
cd-