In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold 
from tqdm import tqdm
import shutil 
import yaml
import torch
from IPython.display import Image, clear_output
from glob import glob

In [None]:
test_df = pd.read_csv('../input/custom-csv/test_meta.csv')
test_df.head()

In [None]:
'''
train_df = pd.read_csv('../input/vinbigdata-chest-xray-abnormalities-detection/train.csv')
train_meta = pd.read_csv('../input/custom-csv/train_meta.csv')

df = pd.merge(train_df, train_meta, on='image_id')
df.head()
'''

In [None]:
'''
CFG = {
    'dim': 512,
    'fold': 4,
}

df.fillna(0, inplace=True)

#Time to fix the other two coordinate values of the "No finding" category
df.loc[df["class_id"] == 14, ["x_max", "y_max"]] = 1.0

df['x_min'] = df.apply(lambda row: (row.x_min)/row.dim1, axis =1)

df['y_min'] = df.apply(lambda row: (row.y_min)/row.dim0, axis =1)

df['x_max'] = df.apply(lambda row: (row.x_max)/row.dim1, axis =1)

df['y_max'] = df.apply(lambda row: (row.y_max)/row.dim0, axis =1)

df['x_mid'] = df.apply(lambda row: (row.x_max+row.x_min)/2, axis =1)

df['y_mid'] = df.apply(lambda row: (row.y_max+row.y_min)/2, axis =1)

df['w'] = df.apply(lambda row: (row.x_max-row.x_min), axis =1)

df['h'] = df.apply(lambda row: (row.y_max-row.y_min), axis =1)

df['area'] = df['w']*df['h']

df.head()

features = ['x_min', 'y_min', 'x_max', 'y_max', 'x_mid', 'y_mid', 'w', 'h', 'area']
X = df[features]
y = df['class_id']
X.shape, y.shape


skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=123) 

df['fold'] = -1
for fold, (train_idx, val_idx) in enumerate(skf.split(X,y, groups = df.image_id.tolist())):
    df.loc[val_idx, 'fold'] = fold
df.head()

train_files = []
val_files   = []
val_files += list(df[df.fold==CFG['fold']].image_id.unique())
train_files += list(df[df.fold!=CFG['fold']].image_id.unique())
len(train_files), len(val_files)

os.makedirs('./vinbigdata-chest-xray-abnormalities-detection/train/labels', exist_ok = True)
os.makedirs('./vinbigdata-chest-xray-abnormalities-detection/train/images', exist_ok = True)
os.makedirs('./vinbigdata-chest-xray-abnormalities-detection/valid/labels', exist_ok = True)
os.makedirs('./vinbigdata-chest-xray-abnormalities-detection/valid/images', exist_ok = True)

TRAIN_LABELS_PATH = './vinbigdata-chest-xray-abnormalities-detection/train/labels'
VAL_LABELS_PATH = './vinbigdata-chest-xray-abnormalities-detection/valid/labels'

for file in tqdm(train_files):
    records = df[df['image_id'] == file]
    attributes = records[['class_id','x_mid','y_mid','w','h']].values
    attributes = np.array(attributes)
    np.savetxt(
        os.path.join(
            TRAIN_LABELS_PATH,
            f"{file}.txt"
        ),
        attributes,
        fmt = ["%d","%f","%f","%f","%f"]
    )
    shutil.copy(f'../input/vinbigdata-jpg/train_jpg/{file}.jpg', './vinbigdata-chest-xray-abnormalities-detection/train/images')
    
    
    
for file in tqdm(val_files):
    records = df[df['image_id'] == file]
    attributes = records[['class_id','x_mid','y_mid','w','h']].values
    attributes = np.array(attributes)
    np.savetxt(
        os.path.join(
            VAL_LABELS_PATH,
            f"{file}.txt"
        ),
        attributes,
        fmt = ["%d","%f","%f","%f","%f"]
    )
    shutil.copy(f'../input/vinbigdata-jpg/train_jpg/{file}.jpg', './vinbigdata-chest-xray-abnormalities-detection/train/images')
    
    
    
class_ids, class_names = list(zip(*set(zip(df.class_id, df.class_name))))
classes = list(np.array(class_names)[np.argsort(class_ids)])
classes = list(map(lambda x: str(x), classes))
classes


data = dict(
    train = '../vinbigdata-chest-xray-abnormalities-detection/train/images',
    val   = '../vinbigdata-chest-xray-abnormalities-detection/valid/images',
    nc    = 15,
    names = classes
    )

with open('./data.yaml', 'w') as outfile:
    yaml.dump(data, outfile, default_flow_style=False)
    
f = open('data.yaml', 'r')
print('\nyaml:')
print(f.read())

'''
    

In [None]:
shutil.copytree('../input/yolov5-official-v31-dataset/yolov5', './yolov5')
shutil.copytree('../input/yolov5-model/exp34/exp34', './yolov5/runs/train/exp34')
shutil.copytree('../input/vinbigdata-jpg/test_jpg', './yolov5/data/test_data')


In [None]:
os.chdir('./yolov5')

In [None]:
#!WANDB_MODE="dryrun" python train.py --img 1024 --batch 16 --epochs 10 --data ../data.yaml --weights yolov5x.pt --cache

In [None]:
!python detect.py --weights 'runs/train/exp34/weights/best.pt' --img 1024 --conf 0.15 --iou 0.5 --source data/test_data --exist-ok --save-txt --save-con

In [None]:
def yolo2voc(image_height, image_width, bboxes):
    """
    yolo => [xmid, ymid, w, h] (normalized)
    voc  => [x1, y1, x2, y1]
    
    """ 
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    bboxes[..., [0, 2]] = bboxes[..., [0, 2]]* image_width
    bboxes[..., [1, 3]] = bboxes[..., [1, 3]]* image_height
    
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] - bboxes[..., [2, 3]]/2
    bboxes[..., [2, 3]] = bboxes[..., [0, 1]] + bboxes[..., [2, 3]]
    
    return bboxes

image_ids = []
PredictionStrings = []

for file_path in tqdm(glob('runs/detect/exp/labels/*txt')):
    image_id = file_path.split('/')[-1].split('.')[0]
    w, h = test_df.loc[test_df.image_id==image_id,['dim0', 'dim1']].values[0]
    f = open(file_path, 'r')
    data = np.array(f.read().replace('\n', ' ').strip().split(' ')).astype(np.float32).reshape(-1, 6)
    data = data[:, [0, 5, 1, 2, 3, 4]]
    bboxes = list(np.round(np.concatenate((data[:, :2], np.round(yolo2voc(h, w, data[:, 2:]))), axis =1).reshape(-1), 1).astype(str))
    for idx in range(len(bboxes)):
        bboxes[idx] = str(int(float(bboxes[idx]))) if idx%6!=1 else bboxes[idx]
    image_ids.append(image_id)
    PredictionStrings.append(' '.join(bboxes))

In [None]:
pred_df = pd.DataFrame({'image_id':image_ids,
                        'PredictionString':PredictionStrings})
sub_df = pd.merge(test_df, pred_df, on = 'image_id', how = 'left').fillna("14 1 0 0 1 1")
sub_df = sub_df[['image_id', 'PredictionString']]
sub_df.to_csv('/kaggle/working/submission.csv',index = False)


In [None]:
shutil.rmtree('/kaggle/working/yolov5')