In [None]:
!pip install --upgrade seaborn

In [None]:
import numpy as np, pandas as pd
from glob import glob
import shutil, os
import matplotlib.pyplot as plt
from sklearn.model_selection import GroupKFold
from tqdm.notebook import tqdm
import seaborn as sns
import ast

In [None]:
dim = 512 #512, 256, 'original'
fold = 4

In [None]:
train_df = pd.read_csv("../input/siim-covid19-detection/train_image_level.csv", index_col = "id")
id_lst = [ii[:-6] for ii in train_df.index.tolist()]
train_df.index = id_lst
train_df.index.name = "id"
meta_jpg = pd.read_csv(f"../input/siimcovid19converttojpg{dim}px/meta.csv", index_col = "image_id")
train_meta = meta_jpg[meta_jpg["split"] == "train"]
train_df = pd.concat([train_df, train_meta], axis=1, join="inner")
train_df.index.name = "image_id"
train_df.head()

In [None]:
train_df = train_df.dropna(subset=["boxes"])
train_df.head()

In [None]:
train_df = train_df.reset_index(drop=False)

In [None]:
train_df['image_path'] = f'/kaggle/input/siimcovid19converttojpg{dim}px/train/'+train_df.image_id+('.jpg' if dim!='original' else '.png')
#../input/siimcovid19converttojpg1024px/train

In [None]:
train_df.head()

In [None]:
classes = ['opacity']
classes

In [None]:
gkf  = GroupKFold(n_splits = 5)
train_df['fold'] = -1
for fold, (train_idx, val_idx) in enumerate(gkf.split(train_df, groups = train_df.StudyInstanceUID.tolist())):
    train_df.loc[val_idx, 'fold'] = fold
train_df.head()

In [None]:
train_files = []
val_files   = []
val_files += list(train_df[train_df.fold==fold].image_path.unique())
train_files += list(train_df[train_df.fold!=fold].image_path.unique())
len(train_files), len(val_files)

In [None]:
train_df[train_df["StudyInstanceUID"] == "0572ef0d0c1a"]

In [None]:
os.path.exists("/kaggle/input/siim-yolov5-labels/labels/0572ef0d0c1a.txt")

In [None]:
train_df.set_index("image_id").loc["0572ef0d0c1a", :]

In [None]:
os.makedirs('/kaggle/working/siim/labels/train', exist_ok = True)
os.makedirs('/kaggle/working/siim/labels/val', exist_ok = True)
os.makedirs('/kaggle/working/siim/images/train', exist_ok = True)
os.makedirs('/kaggle/working/siim/images/val', exist_ok = True)
label_dir = '/kaggle/input/siim-yolov5-labels/labels/'
for file in tqdm(train_files):
    shutil.copy(file, '/kaggle/working/siim/images/train')
    filename = file.split('/')[-1].split('.')[0]
    shutil.copy(os.path.join(label_dir, filename+'.txt'), '/kaggle/working/siim/labels/train')
    
for file in tqdm(val_files):
    shutil.copy(file, '/kaggle/working/siim/images/val')
    filename = file.split('/')[-1].split('.')[0]
    shutil.copy(os.path.join(label_dir, filename+'.txt'), '/kaggle/working/siim/labels/val')

In [None]:
from os import listdir
from os.path import isfile, join
import yaml

cwd = '/kaggle/working/'

with open(join( cwd , 'train.txt'), 'w') as f:
    for path in glob('/kaggle/working/siim/images/train/*'):
        f.write(path+'\n')
            
with open(join( cwd , 'val.txt'), 'w') as f:
    for path in glob('/kaggle/working/siim/images/val/*'):
        f.write(path+'\n')

data = dict(
    train =  join( cwd , 'train.txt') ,
    val   =  join( cwd , 'val.txt' ),
    nc    = 1,
    names = classes
    )

with open(join( cwd , 'siim.yaml'), 'w') as outfile:
    yaml.dump(data, outfile, default_flow_style=False)

f = open(join( cwd , 'siim.yaml'), 'r')
print('\nyaml:')
print(f.read())

In [None]:
# https://www.kaggle.com/ultralytics/yolov5
# !git clone https://github.com/ultralytics/yolov5  # clone repo
# %cd yolov5
shutil.copytree('/kaggle/input/yolov5-official-v31-dataset/yolov5', '/kaggle/working/yolov5')
os.chdir('/kaggle/working/yolov5')
# %pip install -qr requirements.txt # install dependencies

import torch
from IPython.display import Image, clear_output  # to display images

clear_output()
print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

In [None]:
!python detect.py --weights yolov5x.pt --img 640 --conf 0.25 --source data/images/
Image(filename='runs/detect/exp/zidane.jpg', width=600)

In [None]:
# !WANDB_MODE="dryrun" python train.py --img 640 --batch 16 --epochs 3 --data coco128.yaml --weights yolov5s.pt --nosave --cache 
!WANDB_MODE="dryrun" python train.py --img $dim --batch 16 --epochs 20 --data /kaggle/working/siim.yaml --weights yolov5x.pt --cache

In [None]:
plt.figure(figsize = (20,20))
plt.axis('off')
plt.imshow(plt.imread('runs/train/exp/labels_correlogram.jpg'));

In [None]:
plt.figure(figsize = (20,20))
plt.axis('off')
plt.imshow(plt.imread('runs/train/exp/labels.jpg'));

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize = (15, 15))
plt.imshow(plt.imread('runs/train/exp/train_batch0.jpg'))

plt.figure(figsize = (15, 15))
plt.imshow(plt.imread('runs/train/exp/train_batch1.jpg'))

plt.figure(figsize = (15, 15))
plt.imshow(plt.imread('runs/train/exp/train_batch2.jpg'))

In [None]:
fig, ax = plt.subplots(3, 2, figsize = (2*5,3*5), constrained_layout = True)
for row in range(3):
    ax[row][0].imshow(plt.imread(f'runs/train/exp/test_batch{row}_labels.jpg'))
    ax[row][0].set_xticks([])
    ax[row][0].set_yticks([])
    ax[row][0].set_title(f'runs/train/exp/test_batch{row}_labels.jpg', fontsize = 12)
    
    ax[row][1].imshow(plt.imread(f'runs/train/exp/test_batch{row}_pred.jpg'))
    ax[row][1].set_xticks([])
    ax[row][1].set_yticks([])
    ax[row][1].set_title(f'runs/train/exp/test_batch{row}_pred.jpg', fontsize = 12)

In [None]:
plt.figure(figsize=(30,15))
plt.axis('off')
plt.imshow(plt.imread('runs/train/exp/results.png'));

In [None]:

plt.figure(figsize=(30,15))
plt.axis('off')
plt.imshow(plt.imread('runs/train/exp/confusion_matrix.png'));

In [None]:
!python detect.py --weights 'runs/train/exp/weights/best.pt'\
--img $dim\
--conf 0.15\
--iou 0.5\
--source /kaggle/working/siim/images/val\
--exist-ok

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import numpy as np
import random
import cv2
from glob import glob
from tqdm import tqdm

files = glob('runs/detect/exp/*')
for _ in range(3):
    row = 4
    col = 4
    grid_files = random.sample(files, row*col)
    images     = []
    for image_path in tqdm(grid_files):
        img          = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
        images.append(img)

    fig = plt.figure(figsize=(col*5, row*5))
    grid = ImageGrid(fig, 111,  # similar to subplot(111)
                     nrows_ncols=(col, row),  # creates 2x2 grid of axes
                     axes_pad=0.05,  # pad between axes in inch.
                     )

    for ax, im in zip(grid, images):
        # Iterating over the grid returns the Axes.
        ax.imshow(im)
        ax.set_xticks([])
        ax.set_yticks([])
    plt.show()

In [None]:

shutil.rmtree('/kaggle/working/siim')
shutil.rmtree('runs/detect')
for file in (glob('runs/train/exp/**/*.png', recursive = True)+glob('runs/train/exp/**/*.jpg', recursive = True)):
    os.remove(file)