In [1]:
import sys, os
sys.path.append('../YOLOv1')

import torch
import torch.optim as optim
from torch.utils.data import DataLoader
import os
import cv2

from dataset import VOCDataset
from model import YOLOv1
from loss import YoloLoss1, YoloLoss2
from trainer import Trainer
from config import config

config = config()

config.batch_size = 16
config.lr = 2e-5
config.device='cuda'

config.print_interval = 50
config.model_dir = '../checkpoints/experiment2'
config.load_model_path = os.path.join(config.model_dir, 'YOLOv1_180epoch.pt')

config.dataset_dir = r'D:\AI\Dataset\VocDetection2'
config.img_dir = os.path.join(config.dataset_dir, 'images')
config.label_dir = os.path.join(config.dataset_dir, 'labels')
config.train_csv = os.path.join(config.dataset_dir, 'train.csv')
config.valid_csv = os.path.join(config.dataset_dir, 'valid.csv')

config.save_csv_path = os.path.join(config.model_dir, 'YOLOv1_result.csv')

train_dataset = VOCDataset(
    config.train_csv,
    config.img_dir,
    config.label_dir,
    config.img_size,
    config.S, config.B, config.C,
    config.test_transform,
    return_img_path=True,
)
valid_dataset = VOCDataset(
    config.valid_csv,
    config.img_dir,
    config.label_dir,
    config.img_size,
    config.S, config.B, config.C,
    config.test_transform,
    return_img_path=True,
)
train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=config.batch_size,
    shuffle=False,
    drop_last=False,
)
valid_loader = DataLoader(
    dataset=valid_dataset,
    batch_size=config.batch_size,
    shuffle=False,
    drop_last=False,
)

model = YOLOv1(config.S, config.B, config.C).to(config.device)
optimizer = optim.Adam(model.parameters(), config.lr)
crit = YoloLoss2(config.S, config.B, config.C)
trainer = Trainer(model, optimizer, crit)

In [2]:
losses, pred_bboxes, true_bboxes, img_paths = trainer.test(train_loader, config)




now loading.....
resume epoch: 181 lowest loss: 37.78397787184942

[1/759] test loss: 17.3782
[51/759] test loss: 13.6361
[101/759] test loss: 6.6943
[151/759] test loss: 7.0980
[201/759] test loss: 17.3682
[251/759] test loss: 11.6920
[301/759] test loss: 17.9308
[351/759] test loss: 12.5553
[401/759] test loss: 8.7950
[451/759] test loss: 13.4154
[501/759] test loss: 11.0005
[551/759] test loss: 10.6583
[601/759] test loss: 11.2985
[651/759] test loss: 14.5172
[701/759] test loss: 19.6226
[751/759] test loss: 9.2087


In [7]:
import sys, os
sys.path.append('../YOLOv1')

from utils import analyze_error

csv_path = './experiment2_error_analysis.csv'
analyze_error(pred_bboxes, true_bboxes, img_paths, csv_path)

In [12]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv(csv_path)
df.head(10)

Unnamed: 0,img_path,train_idx,gt_class,gt_conf,gt_x,gt_y,gt_w,gt_h,dt_class,dt_conf,dt_x,dt_y,dt_w,dt_h,iou,error_type
0,D:\AI\Dataset\VocDetection2\images\2008_006148...,0.0,14.0,1.0,0.321,0.506,0.638,0.738,14.0,0.967628,0.308723,0.510902,0.634,0.730596,0.930621,correct
1,D:\AI\Dataset\VocDetection2\images\2010_000337...,1.0,10.0,1.0,0.602,0.735,0.304,0.28,10.0,1.0,0.602,0.735,0.304,0.28,0.0,not detected
2,D:\AI\Dataset\VocDetection2\images\2007_005691...,2.0,14.0,1.0,0.499883,0.501,0.429738,0.998,14.0,1.059777,0.496484,0.507719,0.426793,0.99365,0.937537,correct
3,D:\AI\Dataset\VocDetection2\images\2010_004059...,3.0,6.0,1.0,0.499,0.57586,0.742,0.393878,6.0,0.906602,0.499543,0.56286,0.784091,0.415121,0.905706,correct
4,D:\AI\Dataset\VocDetection2\images\2012_000918...,4.0,14.0,1.0,0.322,0.352,0.404,0.45,14.0,0.993183,0.317514,0.354409,0.443219,0.435424,0.979343,correct
5,D:\AI\Dataset\VocDetection2\images\2012_000918...,4.0,14.0,1.0,0.322,0.352,0.404,0.45,14.0,0.993183,0.317514,0.354409,0.443219,0.435424,0.979343,correct
6,D:\AI\Dataset\VocDetection2\images\2012_000918...,4.0,14.0,1.0,0.636,0.545,0.352,0.636,14.0,0.761551,0.636347,0.532185,0.353135,0.623676,0.761233,correct
7,D:\AI\Dataset\VocDetection2\images\2012_000918...,4.0,14.0,1.0,0.636,0.545,0.352,0.636,14.0,0.761551,0.636347,0.532185,0.353135,0.623676,0.761233,correct
8,D:\AI\Dataset\VocDetection2\images\2010_001120...,5.0,14.0,1.0,0.285,0.705,0.316,0.59,14.0,0.861552,0.51496,0.63468,0.260682,0.515887,0.508239,correct
9,D:\AI\Dataset\VocDetection2\images\2010_001120...,5.0,14.0,1.0,0.285,0.705,0.316,0.59,14.0,0.861552,0.51496,0.63468,0.260682,0.515887,0.508239,correct


In [None]:
import numpy as np

def save_img(img, bboxes, class_names, colors, mean, std, analysis=True, save_path=None):
    '''
    parameters:
        img (tensor): [C, H, W]
        bboxes (tensor): [class_prediction, prob_score, x, y, w, h]
        class_names (list): 데이터셋의 클래스명
        colors (list): 색상 리스트
    '''

    img_size = [img.shape[1], img.shape[2]] # height, width

    img = inverse_normalize(img, mean, std)
    img = np.array(img, dtype=np.uint8)
    img = np.transpose(img, (1, 2, 0)) # [C, H, W] -> [H, W, C]
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    for bbox in bboxes:
        class_label = int(bbox[0])
        prob = bbox[1]
        coord = bbox[2:6]

        x1, y1, x2, y2 = convert_box_format(img_size, coord)
        color = colors[class_label]

        if analysis:
            error_type = bbox[6]
            text = class_names[class_label] + f" | {prob:0.2f} | {error_type}"
            text_len = 12*len(text)
        else:
            text = class_names[class_label] + f" | {prob:0.2f}"
            text_len = 9*len(text)
            
        if y1 <= 16:
            rect_coord = [[x1, y1], [x1+text_len, y1+13]]
            text_coord = [x1, y1+11]
        else:
            rect_coord = [[x1, y1-16], [x1+text_len, y1]]
            text_coord = [x1, y1-3]

        cv2.rectangle(img, (x1,y1), (x2,y2), color, thickness=2)
        cv2.rectangle(img, rect_coord[0], rect_coord[1], color, thickness=-1)
        cv2.putText(
            img, text=text, org=text_coord, fontFace=cv2.FONT_HERSHEY_SIMPLEX, 
            fontScale=0.5, thickness=1, lineType=cv2.LINE_AA, color=(255, 255, 255)
        )
        cv2.imwrite(save_path, img)

def inverse_normalize(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
    '''
    parameters:
        img (tensor): [C, H, W]
    '''
    
    img[0] = ((img[0]) * std[0]) + mean[0]
    img[1] = ((img[1]) * std[1]) + mean[1]
    img[2] = ((img[2]) * std[2]) + mean[2]
    img = 255 * img

    return img

def set_color(num_classes):
    import random

    colors = []
    for i in range(num_classes):
        b = random.randint(0, 100) # 100 이상의 밝은 색은 눈부심
        g = random.randint(0, 100)
        r = random.randint(0, 100)
        colors.append([b, g, r])

    return colors

def convert_box_format(img_size, coordinate):
    # normalized [x_center, y_center, width, height] -> [x_min, y_min, x_max, y_max]
    img_height, img_width = img_size

    xmin = int(img_width * (2 * coordinate[0] - coordinate[2]) / 2)
    ymin = int(img_height * (2 * coordinate[1] - coordinate[3]) / 2)
    xmax = int(img_width * (2 * coordinate[0] + coordinate[2]) / 2)
    ymax = int(img_height * (2 * coordinate[1] + coordinate[3]) / 2)
    
    coord = [xmin, ymin, xmax, ymax]

    return coord

In [None]:
import os
import numpy as np
from PIL import Image
from tqdm import tqdm
from utils import draw, set_color
import albumentations as A
from albumentations.pytorch import ToTensorV2

test_transform = A.Compose([
    A.Normalize(mean=config.mean, std=config.std, max_pixel_value=255),
    A.LongestMaxSize(max_size=config.img_size),
    A.PadIfNeeded(
        min_height=config.img_size,
        min_width=config.img_size,
        border_mode=cv2.BORDER_CONSTANT,
    ),
    ToTensorV2(),
])

class_names = config.voc_classes
colors = set_color(len(class_names))

for train_idx in tqdm(range(0, int(df['train_idx'].max())+1)):
    temp_df = df[df['train_idx'] == train_idx]

    img_path = temp_df['img_path'].values[0]
    image = np.array(Image.open(img_path).convert('RGB'))

    augmentations = test_transform(image=image)
    image = augmentations['image']

    gt_df = temp_df.iloc[:, 2:8]
    gt_df.columns = ['class', 'confidence', 'x', 'y', 'w', 'h']
    gt_df['error_type'] = 'target'

    dt_df = temp_df.iloc[:, 8:14]
    dt_df.columns = ['class', 'confidence', 'x', 'y', 'w', 'h']
    dt_df['error_type'] = temp_df.iloc[:, 15]

    bboxes= pd.concat([gt_df, dt_df]).values

    save_dir = './images'
    file_name = img_path.split('\\')[-1]
    save_path = os.path.join(save_dir, file_name)

    draw(image, bboxes, class_names, colors, config.mean, config.std, analysis=True)
    # save_img(image, bboxes, class_names, colors, config.mean, config.std, analysis=True, save_path=save_path)