### **import dependencies**

In [None]:
import os
import shutil
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.model_selection import train_test_split
import yaml

from kaggle_secrets import UserSecretsClient
import wandb
from wandb.keras import WandbCallback

import cv2
import pydicom

from pathlib import Path
from tqdm.auto import tqdm

### **configuration and initialization**

In [None]:
SIIM_COVID19_DETECTION_DIR = '/kaggle/input/siim-covid19-detection/'
PART0_RESIZED_DIR = '/kaggle/input/part0-siim-covid19-first-look-resized-512px/'
YOLOV5_DIR = '/kaggle/input/yolov5/yolov5/'
YOLOV5_W_DIR = '/kaggle/working/yolov5/yolov5/'

TEMP_DIR = '/kaggle/temp/'
INPUT_DIR = PART0_RESIZED_DIR+'data/'
OUTPUT_DIR = DATASET_DIR = TEMP_DIR+'data/'

TRAIN_IMAGES_DIR = DATASET_DIR + 'images/train/'
VAL_IMAGES_DIR = DATASET_DIR +'images/valid/'
TRAIN_LABELS_DIR = DATASET_DIR + 'labels/train/'
VAL_LABELS_DIR = DATASET_DIR +'labels/valid/'

BATCH_SIZE = 8
EPOCHS = 50
IMG_SIZE = WIDTH = HEIGHT = 512

TRAIN_IMAGE_LEVEL_PATH = SIIM_COVID19_DETECTION_DIR+'train_image_level.csv'
TRAIN_STUDY_LEVEL_PATH = SIIM_COVID19_DETECTION_DIR+'train_study_level.csv'
META_PATH = PART0_RESIZED_DIR+'meta.csv'

INTERPOLATION = cv2.INTER_LANCZOS4

WANDB_PROJECT_NAME = 'project8-kaggle-covid19'
WANDB_ENTITY_NAME = ''

In [None]:
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("WANDB_API_KEY1")
os.environ['WANDB_API_KEY'] = secret_value_0

#wandb.login()
wandb.init(project=WANDB_PROJECT_NAME)
config = wandb.config 
config.batch_size = BATCH_SIZE

%cd ../../

os.makedirs(TRAIN_IMAGES_DIR, exist_ok=True)
os.makedirs(VAL_IMAGES_DIR, exist_ok=True)
os.makedirs(TRAIN_LABELS_DIR, exist_ok=True)
os.makedirs(VAL_LABELS_DIR, exist_ok=True)

In [None]:
shutil.copytree(YOLOV5_DIR, YOLOV5_W_DIR) 

### **load csv file**

In [None]:
df_train_image_level = pd.read_csv(META_PATH)
df_train_image_level['path'] = df_train_image_level.apply(lambda row: INPUT_DIR+(row.path.split('/')[-1]), axis=1)

### **Object Detection with yolov5**

**train test split**

In [None]:
train_df, valid_df = train_test_split(df_train_image_level, test_size=0.2, random_state=42)

train_df = train_df.copy()
valid_df = valid_df.copy()

train_df.loc[:, 'split'] = 'train'
valid_df.loc[:, 'split'] = 'valid'

df_train_image_level = pd.concat([train_df, valid_df]).reset_index(drop=True)
df_train_image_level.sample(4)

**create dir split train valid and copy image for training**

In [None]:
[os.makedirs(dir, exist_ok=True) for dir in [TRAIN_IMAGES_DIR,
                                             VAL_IMAGES_DIR,
                                             TRAIN_LABELS_DIR,
                                             VAL_LABELS_DIR]]
for i in tqdm(range(len(df_train_image_level))):
    row = df_train_image_level.loc[i]
    if os.path.exists(row.path):
        if row.split == 'train':
            shutil.copy(row.path, f'{TRAIN_IMAGES_DIR}{row.id}.jpg')
        else:
            shutil.copy(row.path, f'{VAL_IMAGES_DIR}{row.id}.jpg')

**create data yaml yolov5**

In [None]:
data_yaml = dict(
    train = f'../../../../../.{TRAIN_IMAGES_DIR}',
    val = f'../../../../../.{VAL_IMAGES_DIR}',
    nc = 2,
    names = ['none','opacity']
)

with open(YOLOV5_W_DIR+'data/data.yaml', 'w') as outfile:
    yaml.dump(data_yaml, outfile, default_flow_style=False)

**convert results .txt to df_train_image_level**

In [None]:
def get_bbox(row):
    bboxes = []
    bbox = []
    for i, l in enumerate(row.label.split(' ')):
        if (i % 6 == 0) | (i % 6 == 1):
            continue
        bbox.append(float(l))
        if i % 6 == 5:
            bboxes.append(bbox)
            bbox = []  
            
    return bboxes

def scale_bbox(row, bboxes):
    # Get scaling factor
    scale_x = IMG_SIZE/row.width
    scale_y = IMG_SIZE/row.height
    
    scaled_bboxes = []
    for bbox in bboxes:
        x = int(np.round(bbox[0]*scale_x, 4))
        y = int(np.round(bbox[1]*scale_y, 4))
        x1 = int(np.round(bbox[2]*(scale_x), 4))
        y1= int(np.round(bbox[3]*scale_y, 4))

        scaled_bboxes.append([x, y, x1, y1]) 
        
    return scaled_bboxes

def get_yolo_format_bbox(img_w, img_h, bboxes):
    yolo_boxes = []
    for bbox in bboxes:
        w = bbox[2] - bbox[0] 
        h = bbox[3] - bbox[1] 
        xc = bbox[0] + int(np.round(w/2)) 
        yc = bbox[1] + int(np.round(h/2)) 
        
        yolo_boxes.append([f'{xc/img_w:.6f}',f'{yc/img_h:.6f}' ,f'{w/img_w:.6f}' ,f'{h/img_h:.6f}'])
    
    return yolo_boxes

**create label txt files**

In [None]:
if not os.listdir(f'{TRAIN_LABELS_DIR}'):
    for i in tqdm(range(len(df_train_image_level))):
        row = df_train_image_level.loc[i]
        img_id = row.id
        split = row.split
        label = row.image_level

        if row.split=='train':
            file_name = f'{TRAIN_LABELS_DIR}{row.id}.txt'
        else:
            file_name = f'{VAL_LABELS_DIR}{row.id}.txt'


        if label=='opacity':
            bboxes = get_bbox(row)
            scale_bboxes = scale_bbox(row, bboxes)
            yolo_bboxes = get_yolo_format_bbox(IMG_SIZE, IMG_SIZE, scale_bboxes)

            with open(file_name, 'w') as f:
                for bbox in yolo_bboxes:
                    bbox = [1]+bbox
                    bbox = [str(i) for i in bbox]
                    bbox = ' '.join(bbox)
                    f.write(bbox)
                    f.write('\n')

**training**

In [None]:
%cd {YOLOV5_W_DIR}

In [None]:
!python train.py --img {IMG_SIZE} \
                 --batch-size {BATCH_SIZE} \
                 --epochs {EPOCHS} \
                 --data data.yaml \
                 --weights yolov5s.pt \
                 --save_period 1\
                 --project {WANDB_PROJECT_NAME}

### **ref**


* https://www.kaggle.com/xhlulu
* https://www.kaggle.com/yujiariyasu
* https://www.kaggle.com/ayuraj
* https://www.kaggle.com/dschettler8845   
....