In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from shutil import copyfile
from tqdm import tqdm
import os
import sys

In [None]:
pd.read_csv('../input/siim-covid19-detection/train_image_level.csv')

In [None]:
!mkdir train
!tar -C train -zxvf ../input/jpg-data/train.tar.gz

In [None]:
meta=pd.read_csv('../input/jpg-data/meta.csv')
train_csv_org=pd.read_csv('../input/siim-covid19-detection/train_image_level.csv')
train_csv_org['id']=train_csv_org['id'].apply(lambda x:x.split('_')[0])
kf = KFold(n_splits=5,random_state=1,shuffle=True)
new_meta=pd.DataFrame({'id':meta[meta['split']=='train']['image_id'],'x_dimension':meta[meta['split']=='train']['dim0'],'y_dimension':meta[meta['split']=='train']['dim1']})

In [None]:
def histogram_equalization(image):
     return cv2.equalizeHist(image)

In [None]:
!pip install -q --upgrade wandb
# Login 
import wandb
wandb.login()

In [None]:
def get_bbox(row):
    bboxes = []
    bbox = []
    for i, l in enumerate(row.label.split(' ')):
        if (i % 6 == 0) | (i % 6 == 1):
            continue
        bbox.append(float(l))
        if i % 6 == 5:
            bboxes.append(bbox)
            bbox = []  
            
    return bboxes

In [None]:
def scale_bbox(row, bboxes,IMG_SIZE=704):
    # Get scaling factor
    scale_x = IMG_SIZE/row.y_dimension
    scale_y = IMG_SIZE/row.x_dimension
    
    scaled_bboxes = []
    for bbox in bboxes:
        x = int(np.round(bbox[0]*scale_x, 4))
        y = int(np.round(bbox[1]*scale_y, 4))
        x1 = int(np.round(bbox[2]*(scale_x), 4))
        y1= int(np.round(bbox[3]*scale_y, 4))

        scaled_bboxes.append([x, y, x1, y1]) # xmin, ymin, xmax, ymax
        
    return scaled_bboxes

In [None]:
def get_yolo_format_bbox(img_w, img_h, bboxes):
    yolo_boxes = []
    for bbox in bboxes:
        w = bbox[2] - bbox[0] # xmax - xmin
        h = bbox[3] - bbox[1] # ymax - ymin
        xc = bbox[0] + int(np.round(w/2)) # xmin + width/2
        yc = bbox[1] + int(np.round(h/2)) # ymin + height/2
        
        yolo_boxes.append([xc/img_w, yc/img_h, w/img_w, h/img_h]) # x_center y_center width height
    
    return yolo_boxes

In [None]:
import yaml
i=0
for train_split,test_split in kf.split(train_csv_org):
    if i != 1:
        i=i+1
        continue
    train= train_csv_org.iloc[train_split]
    test= train_csv_org.iloc[test_split]
    train['split']='train'
    test['split']='test'
    train_csv=pd.concat([train,test],ignore_index=True)
    joined_meta=pd.merge(train_csv,new_meta,on='id')
    fold='fold_'+str(i)
    os.mkdir(fold)
    os.chdir(fold)
    os.mkdir('data')
    os.mkdir('data/images')
    os.mkdir('data/images/train')
    os.mkdir('data/images/valid')    
    os.mkdir('data/labels')
    os.mkdir('data/labels/train')
    os.mkdir('data/labels/valid')
    os.system('git clone https://github.com/ultralytics/yolov5')
    os.chdir('yolov5')
    os.system('pip install -qr requirements.txt')
    os.chdir('..')
    
    print(os.getcwd())
    for k in range(len(train_csv)):
        row = train_csv.iloc[k]
        if row['split']=='train':
            copyfile('../train/'+row['id']+'.jpg','data/images/train/'+row['id']+'.jpg')
        else:
            copyfile('../train/'+row['id']+'.jpg','data/images/valid/'+row['id']+'.jpg')

    data_yaml = dict(
    train = '../data/images/train',
    val = '../data/images/valid',
    nc = 2,
    names = ['none', 'opacity']
    )
    print(os.getcwd())
    with open('yolov5/data/data.yaml', 'w') as outfile:
        yaml.dump(data_yaml, outfile, default_flow_style=True)
        
    IMG_SIZE=680
    for j in tqdm(range(len(joined_meta))):
        row = joined_meta.loc[j]
    # Get image id
        img_id = row.id
    # Get split
        split = row.split
    # Get image-level label
        if row.split=='train':
            file_name = f'data/labels/train/{row.id}.txt'
        else:
            file_name = f'data/labels/valid/{row.id}.txt'
        
    
        if row.label.split()[0]=='opacity':
        # Get bboxes
            bboxes = get_bbox(row)
        # Scale bounding boxes
            scale_bboxes = scale_bbox(row, bboxes)
        # Format for YOLOv5
            yolo_bboxes = get_yolo_format_bbox(IMG_SIZE, IMG_SIZE, scale_bboxes)
            with open(file_name, 'w') as f:
                for bbox in yolo_bboxes:
                    bbox = [1]+bbox
                    bbox = [str(i) for i in bbox]
                    bbox = ' '.join(bbox)
                    f.write(bbox)
                    f.write('\n')
    os.chdir('yolov5')
    print(os.getcwd())
    !python train.py --img 704 \
                 --batch  20 \
                 --epochs 20 \
                 --data data.yaml \
                 --weights yolov5s.pt \
                 --save_period 1\
                 --project kaggle
    i=i+1
    os.chdir('..')
    os.chdir('..')

In [None]:
!mkdir yolo_weights
#%cp  fold_0/yolov5/kaggle/exp/weights/best.pt yolov5sfold0.pt
%cp  fold_1/yolov5/kaggle/exp/weights/best.pt yolo_weights/yolov5sfold1.pt
#%cp  fold_2/yolov5/kaggle-siim-covid/exp/weights/best.pt yolo_weights/yolov5sfold2.pt
#%cp  fold_3/yolov5/kaggle-siim-covid/exp/weights/best.pt yolo_weights/yolov5sfold3.pt
#%cp  fold_4/yolov5/kaggle-siim-covid/exp/weights/best.pt yolo_weights/yolov5sfold4.pt

In [None]:
! zip -r yolo_weights_med2.zip yolo_weights 

In [None]:
!ls fold_0/yolov5.