In [1]:
import json
import argparse
from easydict import EasyDict
from importlib import import_module

import gc
from tqdm import tqdm
import os
import warnings
import torch
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader


from torch.utils.data import DataLoader
from tqdm import tqdm

from pycocotools.coco import COCO

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

In [2]:
def get_args(config):
    args = EasyDict()
    with open(f'./config/{config}.json', 'r') as f:
        args.update(json.load(f))
    
    return args

def killmemory():
    gc.collect()
    torch.cuda.empty_cache()

def create_dir(path):
    if not os.path.isdir(path):
        os.mkdir(path)
        
def collate_fn(batch):
    return tuple(zip(*batch))

def inference_fn(test_dataloader, model, device):
    model.eval()
    outputs = []
    for images, targets, image_ids in tqdm(test_dataloader):

        images = list(image.float().to(device) for image in images)
        output = model(images)

        for out in output:
            outputs.append({'boxes': out['boxes'].tolist(), 'labels': out['labels'].tolist(), 'scores': out['scores'].tolist()})
    
    return outputs

def make_submission(args, outputs, k, aug_name, resize):
    prediction_strings = []
    file_names = []
    coco = COCO(args.test_annotation)
    
    for i, output in enumerate(outputs):
        prediction_string = ''
        image_info = coco.loadImgs(coco.getImgIds(imgIds=i))[0]
        for box, score, label in zip(output['boxes'], output['scores'], output['labels']):
            if score > args.test_score_threshold:
                prediction_string += str(label) + ' ' + str(score) + ' ' + str(box[0]/(args.resize[0]/512)) + ' ' + str(
                    box[1]/(args.resize[0]/512)) + ' ' + str(box[2]/(args.resize[0]/512)) + ' ' + str(box[3]/(args.resize[0]/512)) + ' '
        prediction_strings.append(prediction_string)
        file_names.append(image_info['file_name'])
    submission = pd.DataFrame()
    submission['PredictionString'] = prediction_strings
    submission['image_id'] = file_names
    submission.to_csv(f'./ensemble/({config})fold{k}_{aug_name}_{resize}.csv', index=None)


In [3]:
config = 'config11'
args = get_args(config)

num_fold = 5
resize = 1024

In [4]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
warnings.filterwarnings(action='ignore')
create_dir('./ensemble')

no_aug = A.Compose([
                A.Resize(resize, resize, p=1.0),
                ToTensorV2(p=1.0)
            ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

flip_aug = A.Compose([
                A.Resize(resize, resize, p=1.0),
                A.HorizontalFlip(p=1.0),
                ToTensorV2(p=1.0)
            ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

rotate_aug = A.Compose([
                A.Resize(resize, resize, p=1.0),
                A.Rotate(limit=[-90,-90], always_apply=True, p=1.0),
                ToTensorV2(p=1.0)
            ],bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

rotateR_aug = A.Compose([
                A.Resize(resize, resize, p=1.0),
                A.Rotate(limit=[90,90], always_apply=True, p=1.0),
                ToTensorV2(p=1.0)
            ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})


no_aug_return = A.Compose([
                    A.Resize(512, 512, p=1.0),
                    ToTensorV2(p=1.0)
                ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

flip_aug_return = A.Compose([
                      A.Resize(512, 512, p=1.0),
                      A.HorizontalFlip(p=1.0),
                      ToTensorV2(p=1.0)
                  ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

rotate_aug_return = A.Compose([
                        A.Resize(512, 512, p=1.0),
                        A.Rotate(limit=[-90,-90], always_apply=True, p=1.0),
                        ToTensorV2(p=1.0)
                    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

rotateR_aug_return = A.Compose([
                         A.Resize(512, 512, p=1.0),
                         A.Rotate(limit=[90,90], always_apply=True, p=1.0),
                         ToTensorV2(p=1.0)
                     ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

dataset_module = getattr(import_module("dataset"), args.dataset)
test_dataset = dataset_module(args.test_annotation, args.data_dir, no_aug)
flip_dataset = dataset_module(args.test_annotation, args.data_dir, flip_aug)
rotate_dataset = dataset_module(args.test_annotation, args.data_dir, rotate_aug)
rotateR_dataset = dataset_module(args.test_annotation, args.data_dir, rotateR_aug)    


numpy_aug = A.Compose([
                 A.Resize(resize, resize, p=1.0),
             ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})
numpy_dataset = dataset_module(args.test_annotation, args.data_dir, numpy_aug)

In [5]:
for k in range(num_fold):
    killmemory()
    print(f'* fold {k}')
    
    test_dataloader = DataLoader(
        test_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=4,
        collate_fn=collate_fn
    )

    flip_dataloader = DataLoader(
        flip_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=4,
        collate_fn=collate_fn
    )

    rotate_dataloader = DataLoader(
        rotate_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=4,
        collate_fn=collate_fn
    )

    rotateR_dataloader = DataLoader(
        rotateR_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=4,
        collate_fn=collate_fn
    )

    model_module = getattr(import_module("model"), args.model)
    model = model_module(num_classes = 11, args=args) 
    check_point = f'./saved_model/({config}){args.config_name}_fold{k}.pth'
    model.load_state_dict(torch.load(check_point))
    model.to(device)
    
    test_outputs = inference_fn(test_dataloader, model, device)
    flip_outputs = inference_fn(flip_dataloader, model, device)
    rotate_outputs = inference_fn(rotate_dataloader, model, device)
    rotateR_outputs = inference_fn(rotateR_dataloader, model, device)
    
    test_final = test_outputs.copy()
    flip_final = flip_outputs.copy()
    rotate_final = rotate_outputs.copy()
    rotateR_final = rotateR_outputs.copy()

    for i in tqdm(range(len(numpy_dataset))):
        test_result = []
        flip_result = []
        rotate_result = []
        rotateR_result = []

        noaug_return = no_aug_return(image = numpy_dataset[i][0], bboxes=test_outputs[i]['boxes'], labels=test_outputs[i]['labels'])
        flip_return = flip_aug_return(image = numpy_dataset[i][0], bboxes=flip_outputs[i]['boxes'], labels=flip_outputs[i]['labels'])
        rotate_return = rotateR_aug_return(image = numpy_dataset[i][0], bboxes=rotate_outputs[i]['boxes'], labels=rotate_outputs[i]['labels'])
        rotateR_return = rotate_aug_return(image = numpy_dataset[i][0], bboxes=rotateR_outputs[i]['boxes'], labels=rotateR_outputs[i]['labels'])

        test_final[i]['boxes'] = noaug_return['bboxes'] 
        flip_final[i]['boxes'] = flip_return['bboxes'] 
        rotate_final[i]['boxes'] = rotate_return['bboxes'] 
        rotateR_final[i]['boxes'] = rotateR_return['bboxes'] 
    
    make_submission(args, test_final, k, 'test', resize)
    make_submission(args, flip_final, k, 'flip', resize)
    make_submission(args, rotate_final, k, 'rotate', resize)
    make_submission(args, rotateR_final, k, 'rotateR', resize)

    print()


* fold 0


100%|██████████| 837/837 [01:05<00:00, 12.72it/s]
100%|██████████| 837/837 [01:06<00:00, 12.57it/s]
100%|██████████| 837/837 [01:07<00:00, 12.34it/s]
100%|██████████| 837/837 [01:07<00:00, 12.43it/s]
100%|██████████| 837/837 [00:40<00:00, 20.84it/s]



* fold 1


100%|██████████| 837/837 [01:05<00:00, 12.86it/s]
100%|██████████| 837/837 [01:04<00:00, 12.95it/s]
100%|██████████| 837/837 [01:05<00:00, 12.80it/s]
100%|██████████| 837/837 [01:04<00:00, 12.99it/s]
100%|██████████| 837/837 [00:38<00:00, 21.52it/s]



* fold 2


100%|██████████| 837/837 [01:04<00:00, 12.94it/s]
100%|██████████| 837/837 [01:04<00:00, 12.93it/s]
100%|██████████| 837/837 [01:05<00:00, 12.69it/s]
100%|██████████| 837/837 [01:05<00:00, 12.85it/s]
100%|██████████| 837/837 [00:40<00:00, 20.53it/s]



* fold 3


100%|██████████| 837/837 [01:05<00:00, 12.87it/s]
100%|██████████| 837/837 [01:04<00:00, 13.08it/s]
100%|██████████| 837/837 [01:04<00:00, 13.03it/s]
100%|██████████| 837/837 [01:03<00:00, 13.16it/s]
100%|██████████| 837/837 [00:38<00:00, 21.64it/s]



* fold 4


100%|██████████| 837/837 [01:04<00:00, 13.05it/s]
100%|██████████| 837/837 [01:03<00:00, 13.18it/s]
100%|██████████| 837/837 [01:04<00:00, 13.06it/s]
100%|██████████| 837/837 [01:04<00:00, 12.92it/s]
100%|██████████| 837/837 [00:35<00:00, 23.46it/s]





# check tta dataset

In [6]:
idx += 1

fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(10,10))

image = (test_dataset[idx][0]*255).cpu().numpy().transpose((1,2,0)).astype(np.uint8)
boxes = [list(map(round, x)) for x in test_outputs[idx]['boxes']]
labels = test_outputs[idx]['labels']
scores = test_outputs[idx]['scores']

for i in range(len(boxes)):
    if scores[i] > 0.01:
        ob = cv2.rectangle(image, boxes[i][:2], boxes[i][2:], RGB_cols[labels[i]], 2)
    #plt.text((boxes[i][0]+boxes[i][2])/2, boxes[i][1]-2, classes[labels[i]], ha='center', va='bottom',color=colors[label2col[labels[i]]] ,weight='semibold', label=labels[i])
   

ax1.imshow(ob)

image = (flip_dataset[idx][0]*255).cpu().numpy().transpose((1,2,0)).astype(np.uint8)
boxes = [list(map(round, x)) for x in flip_outputs[idx]['boxes']]
labels = flip_outputs[idx]['labels']
scores = flip_outputs[idx]['scores']

for i in range(len(boxes)):
    if scores[i] > 0.01:
        ob = cv2.rectangle(image, boxes[i][:2], boxes[i][2:], RGB_cols[labels[i]], 2)
    #plt.text((boxes[i][0]+boxes[i][2])/2, boxes[i][1]-2, classes[labels[i]], ha='center', va='bottom',color=colors[label2col[labels[i]]] ,weight='semibold', label=labels[i])
   

ax2.imshow(ob)

image = (rotate_dataset[idx][0]*255).cpu().numpy().transpose((1,2,0)).astype(np.uint8)
boxes = [list(map(round, x)) for x in rotate_outputs[idx]['boxes']]
labels = rotate_outputs[idx]['labels']
scores = rotate_outputs[idx]['scores']

for i in range(len(boxes)):
    if scores[i] > 0.01:
        ob = cv2.rectangle(image, boxes[i][:2], boxes[i][2:], RGB_cols[labels[i]], 2)
    #plt.text((boxes[i][0]+boxes[i][2])/2, boxes[i][1]-2, classes[labels[i]], ha='center', va='bottom',color=colors[label2col[labels[i]]] ,weight='semibold', label=labels[i])
   

ax3.imshow(ob)

image = (rotateR_dataset[idx][0]*255).cpu().numpy().transpose((1,2,0)).astype(np.uint8)
boxes = [list(map(round, x)) for x in rotateR_outputs[idx]['boxes']]
labels = rotateR_outputs[idx]['labels']
scores = rotateR_outputs[idx]['scores']

for i in range(len(boxes)):
    if scores[i] > 0.01:
        ob = cv2.rectangle(image, boxes[i][:2], boxes[i][2:], RGB_cols[labels[i]], 2)
    #plt.text((boxes[i][0]+boxes[i][2])/2, boxes[i][1]-2, classes[labels[i]], ha='center', va='bottom',color=colors[label2col[labels[i]]] ,weight='semibold', label=labels[i])
   

ax4.imshow(ob)

plt.show()

NameError: name 'idx' is not defined

# check returned dataset

In [None]:
idx = 0
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10,10))

image = (test_dataset[idx][0]*255).cpu().numpy().transpose((1,2,0)).astype(np.uint8)
boxes = [list(map(round, x)) for x in test_outputs[idx]['boxes']]
labels = test_outputs[idx]['labels']
scores = test_outputs[idx]['scores']

for i in range(len(boxes)):
    if scores[i] > 0.01:
        ob = cv2.rectangle(image, boxes[i][:2], boxes[i][2:], RGB_cols[labels[i]], 2)
    #plt.text((boxes[i][0]+boxes[i][2])/2, boxes[i][1]-2, classes[labels[i]], ha='center', va='bottom',color=colors[label2col[labels[i]]] ,weight='semibold', label=labels[i])
   

ax1.imshow(ob)

boxes = [list(map(lambda x: int(round(x)), x)) for x in rotateR_return['bboxes']]
labels = rotateR_return['labels']


for i in range(len(boxes)):

    ob = cv2.rectangle(image, boxes[i][:2], boxes[i][2:], RGB_cols[labels[i]], 2)
    #plt.text((boxes[i][0]+boxes[i][2])/2, boxes[i][1]-2, classes[labels[i]], ha='center', va='bottom',color=colors[label2col[labels[i]]] ,weight='semibold', label=labels[i])
   

ax2.imshow(ob)