In [None]:
import os
import gc
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from shutil import copyfile
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

#customize iPython writefile so we can write variables
from IPython.core.magic import register_line_cell_magic

@register_line_cell_magic
def writetemplate(line, cell):
    with open(line, 'w') as f:
        f.write(cell.format(**globals()))


In [None]:
Data_dir='../input/siim-covid19-detection'
train=pd.read_csv(f'{Data_dir}/train_image_level.csv')
train_study=pd.read_csv(f'{Data_dir}/train_study_level.csv')

In [None]:
train.head()

In [None]:
%cd ../
!mkdir tmp
%cd tmp

In [None]:
# Download YOLOv5
!git clone https://github.com/ultralytics/yolov5  # clone repo
%cd yolov5
# Install dependencies
%pip install -qr requirements.txt  # install dependencies

%cd ../
import torch
print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

In [None]:
TRAIN_PATH = 'input/siim-covid19-resized-to-256px-jpg/train/'
IMG_SIZE = 256
BATCH_SIZE = 16
EPOCHS = 10

In [None]:
# Everything is done from /kaggle directory.
%cd ../

# Load image level csv file
df = pd.read_csv('input/siim-covid19-detection/train_image_level.csv')
df['id'] = df.apply(lambda row: row.id.split('_')[0], axis=1)
df['image_path']=df.apply(lambda row:TRAIN_PATH+row.id+'.jpg', axis=1)
df['image_level']=df.apply(lambda row: row.label.split(' ')[0],axis=1)

In [None]:
# Load meta.csv file
# Original dimensions are required to scale the bounding box coordinates appropriately.
meta_df = pd.read_csv('input/siim-covid19-resized-to-256px-jpg/meta.csv')
train_meta_df = meta_df.loc[meta_df.split == 'train']
train_meta_df = train_meta_df.drop('split', axis=1)
train_meta_df.columns = ['id', 'dim0', 'dim1']

train_meta_df.head(2)

In [None]:
df1=df.copy()

In [None]:
meta_df['id']=meta_df['image_id']

In [None]:
meta_df=meta_df.drop(['image_id'],axis=1)
meta_df.head()

In [None]:
df1 = df1.merge(meta_df, on='id',how="left")
df1.head(2)

In [None]:
os.makedirs('tmp/covid/images/train', exist_ok=True)
os.makedirs('tmp/covid/images/valid', exist_ok=True)

os.makedirs('tmp/covid/labels/train', exist_ok=True)
os.makedirs('tmp/covid/labels/valid', exist_ok=True)

! ls tmp/covid/images

In [None]:
 df=df.merge(meta_df, on='id',how="left")

In [None]:
# Move the images to relevant split folder.
for i in tqdm(range(len(df1))):
    row = df1.loc[i]
    if row.split == 'train':
        copyfile(row.image_path, f'tmp/covid/images/train/{row.id}.jpg')
    else:
        copyfile(row.image_path, f'tmp/covid/images/valid/{row.id}.jpg')

In [None]:
 #Create .yaml file 
import yaml

data_yaml = dict(
    train = '../covid/images/train',
    val = '../covid/images/valid',
    nc = 2,
    names = ['none', 'opacity']
)

# Note that I am creating the file in the yolov5/data/ directory.
with open('tmp/yolov5/data/data.yaml', 'w') as outfile:
    yaml.dump(data_yaml, outfile, default_flow_style=True)
    
%cat tmp/yolov5/data/data.yaml


In [None]:
def get_bbox(row):
    bboxes = []
    bbox = []
    for i,l in enumerate(row.label.split(' ')):
        if (i % 6 == 0 )| (i % 6 == 1):
            continue 
            bbox.append(float(l))
            if i % 6 == 5:
                bboxes.append(bbox)
                bbox = []
    return bboxes 

def scale_bbox(row, bboxes):
    # Get scaling factor
    scale_x = IMG_SIZE/row.dim1
    scale_y = IMG_SIZE/row.dim0
    
    scaled_bboxes = []
    for bbox in bboxes:
        x = int(np.round(bbox[0]*scale_x, 4))
        y = int(np.round(bbox[1]*scale_y, 4))
        x1 = int(np.round(bbox[2]*(scale_x), 4))
        y1= int(np.round(bbox[3]*scale_y, 4))

        scaled_bboxes.append([x, y, x1, y1]) # xmin, ymin, xmax, ymax
        
    return scaled_bboxes

# Convert the bounding boxes in YOLO format.
def get_yolo_format_bbox(img_w, img_h, bboxes):
    yolo_boxes = []
    for bbox in bboxes:
        w = bbox[2] - bbox[0] # xmax - xmin
        h = bbox[3] - bbox[1] # ymax - ymin
        xc = bbox[0] + int(np.round(w/2)) # xmin + width/2
        yc = bbox[1] + int(np.round(h/2)) # ymin + height/2
        
        yolo_boxes.append([xc/img_w, yc/img_h, w/img_w, h/img_h]) # x_center y_center width height
    
    return yolo_boxes

In [None]:
# Prepare the txt files for bounding box
for i in tqdm(range(len(df))):
    row = df.loc[i]
    # Get image id
    img_id = row.id
    # Get split
    split = row.split
    # Get image-level label
    label = row.image_level
    
    if row.split=='train':
        file_name = f'tmp/covid/labels/train/{row.id}.txt'
    else:
        file_name = f'tmp/covid/labels/valid/{row.id}.txt'
        
    
    if label=='opacity':
        # Get bboxes
        bboxes = get_bbox(row)
        # Scale bounding boxes
        scale_bboxes = scale_bbox(row, bboxes)
        # Format for YOLOv5
        yolo_bboxes = get_yolo_format_bbox(IMG_SIZE, IMG_SIZE, scale_bboxes)
        
        with open(file_name, 'w') as f:
            for bbox in yolo_bboxes:
                bbox = [1]+bbox
                bbox = [str(i) for i in bbox]
                bbox = ' '.join(bbox)
                f.write(bbox)
                f.write('\n')


In [None]:
%cd tmp/yolov5/

In [None]:
!WANDB_MODE="dryrun" python train.py --img 256 \
                 --batch 16 \
                 --epochs 10 \
                 --data data.yaml \
                 --weights yolov5x.pt --cache