In [5]:
import os
import torch
import numpy as np

# TRAIN SETTING
IMG_SIZE = 416
BATCH_SIZE = 8
EPOCHS = 100
SEED = 12341
CLASS_INFO = {0: 'background', 1:'Buffalo', 2:'Elephant', 3:'Rhinoceros', 4:'Zebra'}
CLASSES = CLASS_INFO.keys()
NUM_CLASSES = len(CLASSES)
PATIENCE = 10
CONFIDENCE = 0.8
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

VISUALIZE_EVALUATED_IMAGE = True
VISUALIZE_LOSS_GRAPH = True

MEAN = np.array([0.485, 0.456, 0.406])
STD = np.array([0.299, 0.224, 0.225])

# PATH SETTING
ROOT_DIR = os.getcwd()
DATA_PATH = os.path.join(ROOT_DIR, 'data')

TRAIN_PATH = os.path.join(DATA_PATH, 'train')
TRAIN_DF_PATH = os.path.join(TRAIN_PATH, 'train_output.csv')

TEST_PATH = os.path.join(DATA_PATH, 'test')
TEST_DF_PATH = os.path.join(TEST_PATH, 'test_output.csv')

OUTPUT_PATH = os.path.join(ROOT_DIR, 'result')

'c:\\Users\\moooo\\git\\KDT_AI_MISSION\\kdt_competition_3'

In [37]:
import os
import random

import numpy as np
import torch
from torchvision import transforms as T
import matplotlib.pyplot as plt

from config import *

import albumentations as A
import matplotlib.pyplot as plt
from albumentations.pytorch.transforms import ToTensorV2


class TrainTransform:
    def __init__(self):
        self.transforms = A.Compose([
                            A.OneOf([
                                A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit= 0.2, val_shift_limit=0.2, p=0.9),
                                A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.9)
                            ],p=0.9),                      
                            A.ToGray(p=0.05),
                            A.HorizontalFlip(p=0.2), 
                            A.Resize(height=IMG_SIZE, width=IMG_SIZE, p=1),
                            A.Cutout(num_holes=8, max_h_size=64, max_w_size=64, fill_value=0, p=0.2),
                            ToTensorV2(p=1.0)
                        ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

    def __call__(self, **kwargs):
        return self.transforms(**kwargs)
    

class TestTransform:
    def __init__(self):
        self.transforms = A.Compose([
                            A.Resize(height=IMG_SIZE, width=IMG_SIZE, p=1.0),
                            ToTensorV2(p=1.0)
                          ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))
    
    def __call__(self, **kwargs):
        return self.transforms(**kwargs)


def set_seed():
    """
    fix seed to control the random variable 
    """
    random.seed(SEED)
    os.environ['PYTHONHASHSEED'] = str(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True


def collate_fn(batch):
    """
    collate function for the ObjectDetectionDataSet.
    Only used by the dataloader.
    """
    return tuple(zip(*batch))

In [46]:
import os
from typing import Optional

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
from albumentations.pytorch.transforms import ToTensorV2
import cv2

from config import config


class ObjDetectionDataset(Dataset):
    def __init__(
        self,
        root: str,
        df: Optional[pd.DataFrame],
        transform: Optional[nn.Module] = None,
        train: bool = True
    ) -> None:
        if not os.path.isdir(root):
            raise Exception(f"Invalid root path: {root}")
        if df is not None:
            df = self.preprocessing(df)

        self.root = root
        self.df = df
        self.transform = transform
        self.img_size = (3, IMG_SIZE, IMG_SIZE)
        self.train = train

    def preprocessing(self, df: pd.DataFrame) -> pd.DataFrame:
        new_data = {
            'label_idx': [],
            'x_center': [],
            'y_center': [],
            'w': [],
            'h': []
        }

        for i in range(len(df)):
            label = df.iloc[i]['label']
            # multi labels
            if '\n' in label:
                label = label.split('\n')
            else:
                label = [label]

            label_idx = []
            x_center = []
            y_center = []
            w = []
            h = []
            for l in label:
                idx, x_c, y_c, width, height = l.split(' ')
                label_idx.append(int(idx))
                x_center.append(float(x_c))
                y_center.append(float(y_c))
                w.append(float(width))
                h.append(float(height))

            new_data['label_idx'].append(label_idx)
            new_data['x_center'].append(x_center)
            new_data['y_center'].append(y_center)
            new_data['w'].append(w)
            new_data['h'].append(h)

        new_data = pd.DataFrame(new_data)
        return pd.concat([df, new_data], axis=1)
        
    def __getitem__(self, idx):
        # get image from filepath
        data = self.df.iloc[idx]
        file_name = data['filename']
        img_path = os.path.join(self.root, str(file_name).zfill(4) + '.jpg')

        # open Image
        image = cv2.imread(img_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        img_size = image.shape
        print(img_size)
        
        # get labels
        labels = np.array(data['label_idx']) + 1  # 0 index -> background
        num_objs = len(labels)

        # get bounding box
        x_center = (np.array(data['x_center']).reshape(-1, 1) * self.img_size[2]).astype(np.int32)
        y_center = (np.array(data['y_center']).reshape(-1, 1) * self.img_size[1]).astype(np.int32)
        width = ((np.array(data['w']) * self.img_size[2]).reshape(-1, 1) // 2).astype(np.int32)
        height = ((np.array(data['h']) * self.img_size[1]).reshape(-1, 1) // 2).astype(np.int32)

        x_0 = x_center - width
        x_1 = x_center + width
        y_0 = y_center - height
        y_1 = y_center + height
        boxes = np.hstack((x_0, y_0, x_1, y_1))
        boxes = np.where( boxes > 0, boxes, 0.)
        
        if self.transform:
            sample = {
                'image': image,
                'bboxes': boxes,
                'labels': labels
            }
            transformed = self.transform(**sample)
            image = transformed['image']
            boxes = np.array(transformed['bboxes'], dtype=np.float64)
            labels = np.array(transformed['labels'], dtype=np.int64)
        else:
            image = np.asarray(image)
            image = ToTensorV2(p=1.0)(image)
        print(type(image))
        print(type(boxes), boxes)
        print(type(labels), labels)
        # get boxes area
        area = ((boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]))
        
        # get iscrowd - 여러 인스턴스가 있는지
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target['boxes'] = torch.from_numpy(boxes).float()
        target['labels'] = torch.from_numpy(labels).to(torch.int64)
        target['image_id'] = torch.tensor([file_name])
        target['area'] = torch.from_numpy(area).float()
        target['iscrowd'] = iscrowd
        
        return image, target
    
    def __len__(self):
        return len(self.df)

In [51]:
import os, json

import pandas as pd
import torch
from torch.utils.data import DataLoader
from pprint import pprint


# fix seed
set_seed()
print(f"Set Seed: {SEED}")
print(f"Torch Device: {DEVICE}")

# Data 불러오기
train_df = pd.read_csv(TRAIN_DF_PATH)
test_df = pd.read_csv(TEST_DF_PATH)

train_dset = ObjDetectionDataset(TRAIN_PATH, train_df, TrainTransform(), train=True)
# test_dset = ObjDetectionDataset(TEST_PATH, test_df, TestTransform, train=False)
print(f"Load Dataset from \n {TRAIN_DF_PATH} \n {TEST_DF_PATH}")
# print(f"Train data size: {len(train_dset)}, Test data size: {len(test_dset)}")

images, labels = next(iter(train_dset))
plt.imshow(images[3])

Set Seed: 12341
Torch Device: cpu
Load Dataset from 
 c:\Users\moooo\git\KDT_AI_MISSION\kdt_competition_3\data\train\train_output.csv 
 c:\Users\moooo\git\KDT_AI_MISSION\kdt_competition_3\data\test\test_output.csv
(423, 640, 3)
<class 'torch.Tensor'>
<class 'numpy.ndarray'> [[187.85        72.77541371 407.55       359.94326241]]
<class 'numpy.ndarray'> [2]




IndexError: index 3 is out of bounds for dimension 0 with size 3

In [None]:
train_loader = DataLoader(train_dset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)
dataloaders = {
    'train': train_loader,
    'test': test_loader
}

# Model 불러오기
model = fasterrcnn_resnet_50()
model = model.to(DEVICE)

# get the model params
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=1e-4)
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.8)

# Train model
best_model, train_loss_list, test_loss_list = train(model, optimizer, dataloaders, lr_scheduler)

# Save best model
make_dir(OUTPUT_PATH)
torch.save(best_model, os.path.join(OUTPUT_PATH, 'best_detector.pth'))

# Get metrics score from test_loader
map_result = mean_average_precision(best_model, test_loader)

print(f"Train score metrix")
pprint(map_result)

# Save metrics score
with open(os.path.join(OUTPUT_PATH, 'metrics.txt'), 'w') as f:
    f.write(json.dumps(map_result, indent='\t'))