In [None]:
import numpy as np, pandas as pd
from glob import glob
import shutil, os
import matplotlib.pyplot as plt
from sklearn.model_selection import GroupKFold
from tqdm.notebook import tqdm
import seaborn as sns
import torch
from IPython.display import Image, clear_output  # to display images

In [None]:
fold = 0
train_df = pd.read_csv(f'./image_info/train_image_level.csv')
train_df.head()

In [None]:
df = train_df
gkf  = GroupKFold(n_splits = 5)
df['fold'] = -1
for fold, (train_idx, val_idx) in enumerate(gkf.split(df, groups = df.StudyInstanceUID.tolist())):
    df.loc[val_idx, 'fold'] = fold

In [None]:
train_df = df
train_df['image_path'] = f'./images/512jpg/train/' + train_df.id.str.replace("_image", "") + '.jpg'
train_df.head()

In [None]:
train_files = []
val_files   = []
val_files += list(train_df[train_df.fold==fold].image_path.unique())
train_files += list(train_df[train_df.fold!=fold].image_path.unique())
len(train_files), len(val_files)

In [None]:
print(train_files[1])

In [None]:
os.makedirs('./images/yolo_images/labels/train', exist_ok = True)
os.makedirs('./images/yolo_images/labels/val', exist_ok = True)
os.makedirs('./images/yolo_images/images/train', exist_ok = True)
os.makedirs('./images/yolo_images/images/val', exist_ok = True)
label_dir = './images/box_locations'

for file in tqdm(train_files):
    shutil.copy(file, './images/yolo_images/images/train')
    filename = file.split('/')[-1].split('.')[0]
    shutil.copy(os.path.join(label_dir, filename+'.txt'), './images/yolo_images/labels/train')
    
for file in tqdm(val_files):
    shutil.copy(file, './images/yolo_images/images/val')
    filename = file.split('/')[-1].split('.')[0]
    shutil.copy(os.path.join(label_dir, filename+'.txt'), './images/yolo_images/labels/val')

In [None]:
from os import listdir
from os.path import isfile, join
import yaml

classes = ['0. opacity', '1. none']

cwd = '/Users/siyiwei/Desktop/COVID_XRay_Detection/images/yolo_images/'
config = '/Users/siyiwei/Desktop/COVID_XRay_Detection/models/'

with open(join( config , 'train.txt'), 'w') as f:
    for path in glob('/Users/siyiwei/Desktop/COVID_XRay_Detection/images/yolo_images/images/train/*'):
        f.write(path+'\n')
            
with open(join( config , 'val.txt'), 'w') as f:
    for path in glob('/Users/siyiwei/Desktop/COVID_XRay_Detection/images/yolo_images/images/val/*'):
        f.write(path+'\n')

data = dict(
    train =  join( config , 'train.txt') ,
    val   =  join( config , 'val.txt' ),
    nc    = 2,
    names = classes
    )

with open(join( config , 'siim-cov19.yaml'), 'w') as outfile:
    yaml.dump(data, outfile, default_flow_style=False)

f = open(join( config , 'siim-cov19.yaml'), 'r')
print('\nyaml:')
print(f.read())

In [None]:
os.chdir('/Users/siyiwei/Desktop/COVID_XRay_Detection/yolov5')
print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

In [None]:
!python3 detect.py --weights yolov5s.pt --img 512 --conf 0.25 --source data/images/

In [None]:
!python3 train.py \
    --img 512 \
    --batch 24 \
    --epochs 35 \
    --data /Users/siyiwei/Desktop/COVID_XRay_Detection/models/siim-cov19.yaml \
    --hyp /Users/siyiwei/Desktop/COVID_XRay_Detection/models/hyp.scratch.yaml \
    --weights yolov5x.pt --cache