# データの読み込み,可視化 と Augmentationの実行

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pylab as plt
import cv2
from PIL import Image

In [None]:
# 自作クラスの読み込み
import sys
sys.path.append('../src/preprocess')
sys.path.append('../src/dataset')
from augmentation import ObjectDetectionAugmentation
from dataset import Dataset

- yamlファイルからconfigの読み込み

In [None]:
import yaml
with open('../config/config_train.yml', 'r') as f:
    config = yaml.load(f, Loader=yaml.SafeLoader)

- train, test csvの読み込み

In [None]:
root_dir  = '../input/tensorflow-great-barrier-reef'
train_path = f'{root_dir}/train.csv'
test_path = f'{root_dir}/test.csv'

def get_img_path(df):
    df['image_path'] = f'{root_dir}/train_images/video_{df.video_id}/{df.video_frame}.jpg'
    return df

def get_annotes(df):
    df['dict_annotations'] = df['annotations'].apply(lambda x: eval(x)) # annotationsが文字列として入っているため、eval()
    df['list_annotations'] = df['dict_annotations'].apply(lambda x: [list(annote.values()) for annote in x]) # リスト型式にcast
    df['n_annotations'] = df['dict_annotations'].apply(len)
    return df

def read_train(train_path):
    train = pd.read_csv(train_path)
    train = get_annotes(train)
    train = get_img_path(train)
    return train

In [None]:
train = read_train(train_path)

- Kaggleのページ上でディスプレイされている画像の中でヒトデが多い画像を探す

In [None]:
max_annots = 0
max_id = "" 
video_id = 0
for video_frame in range(1001, 1020):
    image_id = f'{video_id}-{video_frame}'
    len_annots = train[train['image_id']==image_id]['n_annotations'].item()
    if len_annots > max_annots:
        max_annots = len_annots
        max_id = image_id

print(max_id)
print(max_annots)

#  サンプルデータ読み込み
- 可視化参考元
https://www.kaggle.com/remekkinas/yolox-training-pipeline-cots-dataset-lb-0-507
- image_id: 0-1001, 1-10001

In [None]:
# 全データ存在する場合
image_paths = []
# sample_bboxes = []
# max_frame0 = 
# max_frame1 = 
# max_frame2 = 
# max_frame3 = 
# for video_id, frame_list in enumerate([range(max_frame0 + 1), range(max_frame1 + 1), range(max_frame2 + 1)), range(max_frame3 + 1))]):
#     video += 1
#     for video_frame in flame_list:
#         _img_path = f'../input/tensorflow-great-barrier-reef/train_images/video_{video_id}/{video_frame}.jpg'
#         _bboxes = train[train['image_id']==f'{video_id}-{video_frame}']['list_annotations'].item()
#         image_paths.append(_image_path)
#         sample_bboxes.append(_bboxes)

In [None]:
# 二枚の画像でsample
# 全データを読み込む場合image_pathsを書き換える
image1_path = '../input/tensorflow-great-barrier-reef/train_images/video_0/1001.jpg'
image2_path = '../input/tensorflow-great-barrier-reef/train_images/video_1/10001.jpg'

image_paths = [image1_path, image2_path]

# sample_bboxesをimage_pathsに対応するimage_idにする
sample_ids = ['0-1001', '1-10001']
sample_bboxes = []
for sample_id in sample_ids:
    _bboxes = train[train['image_id']==sample_id]['list_annotations'].item()
    sample_bboxes.append(_bboxes)

- Augmentationをしない場合

In [None]:
dataset = Dataset(file_paths=image_paths, bboxes=sample_bboxes)

- Augmentation をする場合

In [None]:
# # augmentationを考慮したdataset
# transformer = ObjectDetectionAugmentation(config['transforms_args'])
# dataset = Dataset(file_paths=[image1_path, image2_path], bboxes=sample_bboxes, transform=transformer.transform)

- 可視化関数の定義

In [None]:
# 可視化用関数
def draw_bboxes(img, bboxes):
    for i in range(len(bboxes)):
        box = bboxes[i]
        x0 = int(box[0])
        y0 = int(box[1])
        x1 = x0 + int(box[2])
        y1 = y0 + int(box[3])
        cv2.rectangle(img, (x0, y0), (x1, y1), (255, 0, 0), 2)
    return img

def draw_bboxes_predict(img, bboxes, scores, bbclasses, confthre, classes_dict):
    img = draw_bboxes(img, bboxes)
    for i in range(len(bboxes)):
        box = bboxes[i]
        cls_id = int(bbclasses[i])
        score = scores[i]
        if score < confthre:
            continue
        cv2.putText(img, '{}:{:.1f}%'.format(classes_dict[cls_id], score * 100), (box[0], box[1] - 3), cv2.FONT_HERSHEY_PLAIN, 0.8, (255,0,0), thickness = 1)
    return img


# def display_cv2img(cv2_img):
#     out_img = cv2.cvtColor(cv2_img, cv2.COLOR_BGR2RGB)
#     display(Image.fromarray(out_img))

# サンプル画像の表示

In [None]:
out_img= draw_bboxes(*dataset.__getitem__(0))
display(Image.fromarray(out_img))

- 推論時想定可視化サンプル

In [None]:
# # Get predictions
# # bboxes, bbclasses, scores = yolox_inference(img, model, test_size)
# bboxes, bbclasses, scores = sample_bboxes, [0], [0.99]
# # Draw predictions
# out_image = draw_bboxes_predict(sample_img, bboxes, scores, bbclasses, 0.0, {0: 'hitode'})

# # Since we load image using OpenCV we have to convert it 
# out_image = cv2.cvtColor(out_image, cv2.COLOR_BGR2RGB)
# display(Image.fromarray(out_image))

# Augmentation
- 参考
    - https://albumentations.ai/docs/getting_started/bounding_boxes_augmentation/ 
    - https://github.com/albumentations-team/albumentations

In [None]:
config['transforms_args']

In [None]:
transformer = ObjectDetectionAugmentation(config['transforms_args'])

In [None]:
sample_imgs, sample_bboxes = [], []
# for idx in range(len(image_paths)):
for idx in range(2):
    sample_img, sample_bbox = dataset.__getitem__(idx)
    sample_imgs.append(sample_img)
    sample_bboxes.append(sample_bbox)

TODO: サンプルを書き換える

In [None]:
dataset_transform = Dataset(file_paths=image_paths, bboxes=sample_bboxes, transform=transformer.transform)

In [None]:
%%time
# 一枚だけ変換
_img, _bboxes = transformer.transform(sample_img, sample_bbox)
#可視化
# display(Image.fromarray(draw_bboxes(_img, _bboxes)))

In [None]:
%%time
#　リストを全て変換
image_list, bboxes_list = transformer.transform_data(sample_imgs, sample_bboxes)
#可視化
# for _i, _b in zip(image_list, bboxes_list):
#     display(Image.fromarray(draw_bboxes(_i, _b)))

- Augmentationテスト

In [None]:
import albumentations as A

transform = A.Compose([
    A.RandomCrop(width=450, height=450),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
], bbox_params=A.BboxParams(format='coco', min_area=100, min_visibility=0.1, label_fields=[]))

transformed = transform(image=sample_img, bboxes=sample_bbox)
transformed_img = transformed['image']
transformed_bboxes = transformed['bboxes']

transformed_img = draw_bboxes(transformed_img, transformed_bboxes)
display(Image.fromarray(transformed_img))

In [None]:
image_id

In [None]:
dataset = Dataset(file_paths=[image1_path, image2_path], bboxes=sample_bboxes)
img_list, bboxes_list = dataset.__getitem__(0)

img_list.shape

In [None]:
from torch.utils.data import Dataset
class Dataset(Dataset):
     
    def __init__(self, file_paths, bboxes, transform=None):
        self.file_paths = file_paths
        self.bboxes = bboxes
        self.transform = transform
         
    def __len__(self):
        return len(self.file_paths)
 
    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        bboxes = self.bboxes[idx]
 
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform:
            image, bboxes = self.transform(image=image, bboxes=bboxes)
        return image, bboxes

# train_video の可視化
- 参考: https://www.kaggle.com/werooring/basic-eda-starter-for-everyone

In [None]:
from matplotlib import animation, rc
rc('animation', html='jshtml')

def create_animation(imgs, frame_interval=130):
    fig = plt.figure(figsize=(7, 4))
    plt.axis('off')
    img = plt.imshow(imgs[0])

    def animate(i):
        img.set_array(imgs[i])
        return [img]

    return animation.FuncAnimation(fig, animate, frames=len(imgs), interval=frame_interval)

In [None]:
frame_interval = 120 # set smaller number if you want to play fast, otherwise set bigger
create_animation(
    [draw_bboxes(_img, _bbox) for _img, _bbox in dataset],
    frame_interval=frame_interval
)

# DatasetClassの再設計

In [None]:
root_dir  = '../input/tensorflow-great-barrier-reef'
class Dataset(Dataset):    
    def __init__(self, root_dir, file_paths, transform=None):
        train_path = f'{root_dir}/train.csv'
        train = read_train(train_path)
        bboxes = 
        self.file_paths = list(file_paths)
        self.bboxes = list(train.list_annotations)
        self.transform = transform
         
    def __len__(self):
        return len(self.file_paths)
 
    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        bboxes = self.bboxes[idx]
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            image, bboxes = self.transform(image=image, bboxes=bboxes)
        return image, bboxes