# 1. 데이터셋 살펴보기

In [7]:
import os
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
CLASS_NAME_TO_ID = {'Bus': 0, 'Truck': 1}

In [8]:
data_dir = '../../data/DRIVING-DATASET/Detection'
data_df = pd.read_csv(os.path.join(data_dir, 'df.csv'))
data_df

Unnamed: 0,ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,IsTruncated,...,IsDepiction,IsInside,XClick1X,XClick2X,XClick3X,XClick4X,XClick1Y,XClick2Y,XClick3Y,XClick4Y
0,0000599864fd15b3,xclick,Bus,1,0.343750,0.908750,0.156162,0.650047,1,0,...,0,0,0.421875,0.343750,0.795000,0.908750,0.156162,0.512700,0.650047,0.457197
1,00006bdb1eb5cd74,xclick,Truck,1,0.276667,0.697500,0.141604,0.437343,1,0,...,0,0,0.299167,0.276667,0.697500,0.659167,0.141604,0.241855,0.352130,0.437343
2,00006bdb1eb5cd74,xclick,Truck,1,0.702500,0.999167,0.204261,0.409774,1,1,...,0,0,0.849167,0.702500,0.906667,0.999167,0.204261,0.398496,0.409774,0.295739
3,00010bf498b64bab,xclick,Bus,1,0.156250,0.371250,0.269188,0.705228,0,0,...,0,0,0.274375,0.371250,0.311875,0.156250,0.269188,0.493882,0.705228,0.521691
4,00013f14dd4e168f,xclick,Bus,1,0.287500,0.999375,0.194184,0.999062,0,1,...,0,0,0.920000,0.999375,0.648750,0.287500,0.194184,0.303940,0.999062,0.523452
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24057,fff2b15ad6007d0e,activemil,Truck,1,0.277344,0.622656,0.226389,0.859722,0,0,...,0,0,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000
24058,fff376d20410e4c9,xclick,Bus,1,0.295625,0.558750,0.306667,0.397500,1,0,...,1,0,0.424375,0.316250,0.295625,0.558750,0.306667,0.397500,0.382500,0.335833
24059,fff376d20410e4c9,xclick,Bus,1,0.348125,0.701250,0.423333,0.744167,0,0,...,1,0,0.478750,0.493125,0.348125,0.701250,0.423333,0.744167,0.537500,0.523333
24060,fffde5953a818927,xclick,Bus,1,0.277500,0.605625,0.565000,0.795833,0,0,...,0,0,0.488125,0.277500,0.517500,0.605625,0.565000,0.670000,0.795833,0.704167


In [9]:
# bounding box와 이미지 함께 살펴보기

# yolo boundingbox format = x_cen, y_cen, w, h

BOX_COLOR = {'Bus':(200, 0, 0), 'Truck':(0, 0, 200)}
CLASS_ID_TO_NAME = {0: 'Bus', 1: 'Truck'}

def visualize(img, bboxes, class_ids, color=BOX_COLOR, thickness=2):
    for bbox, class_id in zip(bboxes, class_ids):

        class_name = CLASS_ID_TO_NAME[class_id]
            
        x_cen, y_cen, w, h = bbox
        x_min = int(x_cen - w/2)
        y_min = int(y_cen - h/2)
        x_max = int(x_cen + w/2)
        y_max = int(y_cen + h/2)

        #boundin box
        cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color = color[class_name], thickness= thickness)

        #class_name 표시
        ((text_w, text_h),_) = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1)
        cv2.rectangle(img, (x_min, y_min - int(1.3 * text_h)), (x_min + text_w, y_min), color[class_name], -1)
        cv2.putText(
            img,
            text=class_name,
            org=(x_min, y_min - int(0.3 * text_h)),
            fontFace=cv2.FONT_HERSHEY_SIMPLEX,
            fontScale=0.35, 
            color=(255,255,255), 
            lineType=cv2.LINE_AA,
        )

        return img
        

In [10]:
from ipywidgets import interact

In [11]:
file_dir = '../../data/DRIVING-DATASET/Detection/images'
img_files = [fn for fn in os.listdir(file_dir) if fn.endswith('jpg')]

In [13]:
@interact(index=(0, len(img_files)-1))
def show_imgbox(index=0):
    img_file = img_files[index]
    img_name = img_file.split('.')[0]
    img_df = data_df[data_df['ImageID']==img_name]
    class_names = img_df['LabelName'].values
    class_ids = [CLASS_NAME_TO_ID[class_name] for class_name in class_names]
    box = img_df[["XMin", "XMax", "YMin", "YMax"]].values
    
    img = cv2.imread(os.path.join(file_dir,img_file))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    img_h, img_w, _ = img.shape
    
    yolo_box = box.copy()
    yolo_box[:, [0,1]] *= img_w
    yolo_box[:, [2,3]] *= img_h
    
    yolo_box[:, [1,2]] = yolo_box[:, [2,1]] # ["XMin", "XMax", "YMin", "YMax"] -> [XMin, YMin, XMax, YMax]
    
    yolo_box[:, 2:4] -= yolo_box[:, 0:2]
    yolo_box[:, 0:2] += (yolo_box[:, 2:4]/2)
    
    canvas = visualize(img, yolo_box, class_ids)
    
    plt.figure(figsize = (6,6))
    plt.imshow(canvas)
    plt.axis('off')
    plt.show()


interactive(children=(IntSlider(value=0, description='index', max=15224), Output()), _dom_classes=('widget-int…

# 2. 데이터셋 및 데이터 로더 구축

In [14]:
import torch

In [90]:
class My_Dataset():
    def __init__(self, data_dir, phase, transformer=None):
        self.data_dir = data_dir
        self.data_df = pd.read_csv(os.path.join(data_dir, 'df.csv'))
        self.phase = phase
        self.transformer = transformer

        self.img_files = [fn for fn in os.listdir(os.path.join(self.data_dir, phase)) if fn.endswith('jpg')]

    def __len__(self):
        return len(img_files)

    def __getitem__(self, index):
    
        img, filename = self.get_img(index)
        bboxes, class_ids = self.get_label(filename)
        target = np.concatenate((bboxes, class_ids[:, np.newaxis]), axis=1)
        
        if self.transformer:
            transformed_data = self.transformer(image=img, bboxes=bboxes, class_ids = class_ids)
            img = transformed_data['image']
            bboxes = np.array(transformed_data['bboxes'])
            class_ids = np.array(transformed_data['class_ids'])
            
        target = np.concatenate((bboxes, class_ids[:, np.newaxis]), axis=1)
        
        return img, target, filename


    def get_img(self, index):
        filename = self.img_files[index]
        img = cv2.imread(os.path.join(data_dir, self.phase, filename))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        return img, filename

    def get_label(self, filename):
        img_id = filename.split('.')[0]
        img_df = self.data_df[self.data_df['ImageID'] == img_id]
        class_names = img_df['LabelName'].values
        class_ids = np.array([CLASS_NAME_TO_ID[class_name] for class_name in class_names])
        box = img_df[['XMin', 'YMin', 'XMax', 'YMax']].values

        bbox = box.copy()
        bbox[:,2:4] -= bbox[:, 0:2]
        bbox[:,0:2] += (bbox[:, 2:4]/2)

        return bbox, class_ids

In [91]:
data_dir = '../../data/DRIVING-DATASET/Detection'
dataset = My_Dataset(data_dir, 'train')

In [92]:
img, target, filename = dataset[0]

In [93]:
# 텐서 변환 모듈
# albumentations은 transforms 보다 boundingbox의 변환에 대해서 더 많은 기능을 가지고 있음
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [94]:
IMG_SIZE = 448 #04챕터의 FasterRCNN과 비교를 위함

# albumentations 사용시 노멀라이제이션 먼저 한 후 텐서로 변환해야함
transformer = A.Compose([
        A.Resize(height=IMG_SIZE, width=IMG_SIZE),
        A.Normalize(mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ],
    bbox_params=A.BboxParams(format='yolo', label_fields=['class_ids']), #박스에 대해서도 transformer됨  
)

In [95]:
dataset = My_Dataset(data_dir, 'train', transformer)
dataset[0]

(tensor([[[-2.0665, -2.0665, -2.0665,  ..., -2.0494, -2.0494, -2.0494],
          [-2.0665, -2.0665, -2.0665,  ..., -2.0494, -2.0494, -2.0494],
          [-2.0494, -2.0494, -2.0494,  ..., -2.0494, -2.0494, -2.0494],
          ...,
          [-2.0665, -2.0665, -2.0665,  ..., -2.0494, -2.0494, -2.0494],
          [-2.0665, -2.0665, -2.0665,  ..., -2.0323, -2.0323, -2.0323],
          [-2.0665, -2.0665, -2.0665,  ..., -2.0323, -2.0323, -2.0323]],
 
         [[-1.9832, -1.9832, -1.9832,  ..., -1.9657, -1.9657, -1.9657],
          [-1.9832, -1.9832, -1.9832,  ..., -1.9657, -1.9657, -1.9657],
          [-1.9657, -1.9657, -1.9657,  ..., -1.9657, -1.9657, -1.9657],
          ...,
          [-1.9832, -1.9832, -1.9832,  ..., -1.9657, -1.9657, -1.9657],
          [-1.9832, -1.9832, -1.9832,  ..., -1.9482, -1.9482, -1.9482],
          [-1.9832, -1.9832, -1.9832,  ..., -1.9482, -1.9482, -1.9482]],
 
         [[-1.7522, -1.7522, -1.7522,  ..., -1.7696, -1.7696, -1.7696],
          [-1.7522, -1.7522,

In [98]:
from torch.utils.data import DataLoader

In [115]:
# 데이터 로더
def collate_fn(batch):
    img_list = []
    target_list = []
    filename_list = []

    for a,b,c in batch:
        img_list.append(a)
        target_list.append(b)
        filename_list.append(c)

    return img_list, target_list, filename_list

In [116]:
data_dir = '../../data/DRIVING-DATASET/Detection/'
BATCH_SIZE = 6

trainset = My_Dataset(data_dir=data_dir, phase="train", transformer=transformer)
trainloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)

In [121]:
for index, batch in enumerate(trainloader):
    images = batch[0]
    targets = batch[1]
    filenames = batch[2]

   
    print(len(images),len(targets), len(filenames))
    
    if index == 0:
        break


6 6 6


In [126]:
def build_dataloader(data_dir, transformer, batch_size=4):
    dataloaders = {}

    train_dataset = My_Dataset(data_dir, 'train', transformer)
    val_dataset = My_Dataset(data_dir, 'val', transformer)

    dataloaders['train'] = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn = collate_fn)
    dataloaders['val'] = DataLoader(val_dataset, batch_size = 1, shuffle=False, collate_fn = collate_fn)

    return dataloaders

In [127]:
dataloaders = build_dataloader(data_dir, transformer, 4)

for index, batch in enumerate(dataloaders['train']):
    images = batch[0]
    targets = batch[1]
    filenames = batch[2]

   
    print(len(images),len(targets), len(filenames))
    
    if index == 0:
        break

4 4 4
