# Imports

In [None]:


import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)





# Necessary/extra dependencies. 
import os
import gc
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from shutil import copyfile
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
#! conda install -c conda-forge gdcm -y
#! conda install pylibjpeg pylibjpeg-libjpeg pylibjpeg-openjpeg
#! conda install pillow
#customize iPython writefile so we can write variables
from IPython.core.magic import register_line_cell_magic


import pylab
#import pillow
#import gdcm
#pydicom
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
from fastai.imports import *
#from fastai.medical.imaging import *
from PIL import Image

@register_line_cell_magic
def writetemplate(line, cell):
    with open(line, 'w') as f:
        f.write(cell.format(**globals()))
        


# Clone  Yolo  from GIT

In [None]:
# Download YOLOv5
!git clone https://github.com/ultralytics/yolov5  # clone repo
%cd yolov5
# Install dependencies
%pip install -qr requirements.txt  # install dependencies

%cd ../
import torch
print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

In [None]:
# Install W&B 
!pip install -q --upgrade wandb
#Login 
import wandb
wandb.login()

Read Files

In [None]:

df=pd.read_csv('/kaggle/input/df-train/df_train.csv')

TRAIN_PATH='/kaggle/input/siim-covid19-resized-to-256px-jpg/train/'

# TRAIN_PATH=  '/kaggle/working/siim-covid19-resized-to-256px-jpg/train/'
# Add absolute path
df['path'] = df.apply(lambda row: TRAIN_PATH+row.id+'.jpg', axis=1)

In [None]:
df['path'][0]

In [None]:
labels = df[['Negative for Pneumonia','Typical Appearance','Indeterminate Appearance','Atypical Appearance']].values
labels = np.argmax(labels, axis=1)

In [None]:
df[['label_y','label_int']]

# dim 0 -->h
# dim 1 -->w

In [None]:
df['x_min'] = df.apply(lambda row: (row.x_min)/row.w, axis =1)
df['y_min'] = df.apply(lambda row: (row.y_min)/row.h, axis =1)

df['x_max'] = df.apply(lambda row: (row.x_max)/row.w, axis =1)
df['y_max'] = df.apply(lambda row: (row.y_max)/row.h, axis =1)

df['x_mid'] = df.apply(lambda row: (row.x_max+row.x_min)/2, axis =1)
df['y_mid'] = df.apply(lambda row: (row.y_max+row.y_min)/2, axis =1)

df['w'] = df.apply(lambda row: (row.x_max-row.x_min), axis =1)
df['h'] = df.apply(lambda row: (row.y_max-row.y_min), axis =1)

df['area'] = df['w']*df['h']


In [None]:
del df['class']
df['image_level'] = df.apply(lambda row: row.label.split(' ')[0], axis=1)



In [None]:
from sklearn.model_selection import train_test_split
# Create train and validation split.
train_df, valid_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df.image_level.values)

train_df.loc[:, 'split'] = 'train'
valid_df.loc[:, 'split'] = 'valid'

df = pd.concat([train_df, valid_df]).reset_index(drop=True)

In [None]:

#TRAIN_PATH = 'input/siim-covid19-resized-to-256px-jpg/train/'
IMG_SIZE = 256
BATCH_SIZE = 16
EPOCHS = 10



Create Images DataSets

In [None]:
#import pylibjpeg 
from fastai.imports import *
from fastai.medical.imaging import *
def loadfilename(filename,voi_lut = True, fix_monochrome = True):
    
    
    information={}

    img = pydicom.read_file(filename)   


    information['PatientID'] = img.PatientID

    information['PatientName'] = img.PatientName

    information['PatientSex'] = img.PatientSex

    information['StudyID'] = img.StudyID

    information['StudyDate'] = img.StudyDate

    information['StudyTime'] = img.StudyTime
    
    if voi_lut:
        img_data = apply_voi_lut(img.pixel_array, img)
        
    else:
        img_data=img.pixel_array
        
    if fix_monochrome and img.PhotometricInterpretation == "MONOCHROME1":
        img_data = np.amax(img_data) - img_data  

    #print(np.max(img_data))
    #print(np.min(img_data))

    img_data=img_data-np.min(img_data)
    img_data=img_data/np.max(img_data)
    img_data=(img_data * 255).astype(np.uint8)

    # return information,img_data
    return img_data

In [None]:
# os.makedirs('siim-covid19-resized-to-256px-jpg/train',exist_ok=True)

def create_dataset():
    for i in  tqdm(range(len(df))):
        row=df.loc[i]
        img_name=row.id
        study_id=(row.StudyInstanceUID)
        dicom_path= ("../input/siim-covid19-detection/train/{}".format(study_id))

        path_x=(os.path.join(dicom_path,os.listdir(dicom_path)[0]))
        img_path=os.path.join(path_x,os.listdir(path_x)[0])
        #print(img_path)
        #info,img=loadfilename(img_path)
        img=loadfilename(img_path)
        img_s = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
        #print('siim-covid19-resized-to-256px-jpg/train/'+str(img_name)+".jpg")
        cv2.imwrite('siim-covid19-resized-to-256px-jpg/train/'+str(img_name)+".jpg", img_s)

In [None]:
# Load meta.csv file
# Original dimensions are required to scale the bounding box coordinates appropriately.
meta_df = pd.read_csv('/kaggle/input/siim-covid19-resized-to-256px-jpg/meta.csv')

train_meta_df = meta_df.loc[meta_df.split == 'train']
train_meta_df = train_meta_df.drop('split', axis=1)
train_meta_df.columns = ['id', 'dim0', 'dim1']

train_meta_df.head(2)

In [None]:
df = df.merge(train_meta_df, on='id',how="left")
df.head(2)

df[['w','h','dim0','dim1']]

In [None]:
print(f'Size of dataset: {len(df)}, training images: {len(train_df)}. validation images: {len(valid_df)}')

In [None]:
os.makedirs('covid/images/train', exist_ok=True)
os.makedirs('covid/images/valid', exist_ok=True)

os.makedirs('covid/labels/train', exist_ok=True)
os.makedirs('covid/labels/valid', exist_ok=True)

! ls covid/images

In [None]:
im_path='/kaggle/input/siim-covid19-resized-to-256px-jpg/train'
im_path_list=(os.listdir(im_path))
print('b9175a64ad09.jpg' in im_path_list)

Get Boxes

In [None]:
# Get the raw bounding box by parsing the row value of the label column.
# Ref: https://www.kaggle.com/yujiariyasu/plot-3positive-classes
def get_bbox(row):
    bboxes = []
    bbox = []
    for i, l in enumerate(row.label.split(' ')):
        if (i % 6 == 0) | (i % 6 == 1):
            continue
        bbox.append(float(l))
        if i % 6 == 5:
            bboxes.append(bbox)
            bbox = []  
            
    return bboxes

# Scale the bounding boxes according to the size of the resized image. 
def scale_bbox(row, bboxes):
    # Get scaling factor
    scale_x = IMG_SIZE/row.dim1
    scale_y = IMG_SIZE/row.dim0
    
    scaled_bboxes = []
    for bbox in bboxes:
        x = int(np.round(bbox[0]*scale_x, 4))
        y = int(np.round(bbox[1]*scale_y, 4))
        x1 = int(np.round(bbox[2]*(scale_x), 4))
        y1= int(np.round(bbox[3]*scale_y, 4))

        scaled_bboxes.append([x, y, x1, y1]) # xmin, ymin, xmax, ymax
        
    return scaled_bboxes

# Convert the bounding boxes in YOLO format.
def get_yolo_format_bbox(img_w, img_h, bboxes):
    yolo_boxes = []
    for bbox in bboxes:
        w = bbox[2] - bbox[0] # xmax - xmin
        h = bbox[3] - bbox[1] # ymax - ymin
        xc = bbox[0] + int(np.round(w/2)) # xmin + width/2
        yc = bbox[1] + int(np.round(h/2)) # ymin + height/2
        
        yolo_boxes.append([xc/img_w, yc/img_h, w/img_w, h/img_h]) # x_center y_center width height
    
    return yolo_boxes

In [None]:
from shutil import copyfile
def create_img_dataset():
    # Move the images to relevant split folder.
    for i in tqdm(range(len(df))):
        row = df.loc[i]
        if row.split == 'train':
            copyfile(row.path, f'covid/images/train/{row.id}.jpg')
        else:
            copyfile(row.path, f'covid/images/valid/{row.id}.jpg')

In [None]:
create_img_dataset()

In [None]:
# Prepare the txt files for bounding box
for i in tqdm(range(len(df))):
    row = df.loc[i]
    # Get image id
    img_id = row.id
    # Get split
    split = row.split
    # Get image-level label
    label = row.image_level
    
    if row.split=='train':
        file_name = f'covid/labels/train/{row.id}.txt'
    else:
        file_name = f'covid/labels/valid/{row.id}.txt'
        
    
    if label=='opacity':
        # Get bboxes
        bboxes = get_bbox(row)
        # Scale bounding boxes
        scale_bboxes = scale_bbox(row, bboxes)
        # Format for YOLOv5
        yolo_bboxes = get_yolo_format_bbox(IMG_SIZE, IMG_SIZE, scale_bboxes)
        
        with open(file_name, 'w') as f:
            for bbox in yolo_bboxes:
                bbox = [1]+bbox
                bbox = [str(i) for i in bbox]
                bbox = ' '.join(bbox)
                f.write(bbox)
                f.write('\n')

**Dowload data set in zip folder..!!**

In [None]:

#! zip -r output.zip covid

#! rm -rf covid



In [None]:
# Create .yaml file 
import yaml

## Train Yolo

In [None]:
%cd yolov5

In [None]:
(os.listdir('/kaggle/working/yolov5/data'))

In [None]:
data_yaml = dict(
    train = '/kaggle/working/covid/images/train',
    val = '/kaggle/working/covid/images/valid',
    nc = 2,
    names = ['none', 'opacity']
)

# Note that I am creating the file in the yolov5/data/ directory.
with open('/kaggle/working/yolov5/data/data.yaml', 'w') as outfile:
    yaml.dump(data_yaml, outfile, default_flow_style=True)
    
    


In [None]:

%cat /kaggle/working/yolov5/data/data.yaml

In [None]:
len(os.listdir('/kaggle/working/covid/images/valid'))



In [None]:
len(os.listdir('/kaggle/working/covid/images/train'))

In [None]:
!python train.py --img {IMG_SIZE} \
                 --batch {BATCH_SIZE} \
                 --epochs {EPOCHS} \
                 --data data.yaml \
                 --weights yolov5m.pt \
                 --project kaggle-siim-covid19 \
                 --cache

In [None]:
! zip -r '/kaggle/working/output1.zip' '/kaggle/working/yolov5'

#! rm -rf '/kaggle/working/yolov5'

In [None]:
plt.figure(figsize = (20,20))
plt.axis('off')
plt.imshow(plt.imread('/kaggle/working/yolov5/kaggle-siim-covid19/exp/labels.jpg'));

In [None]:
!ls '/kaggle/working/yolov5/kaggle-siim-covid19/exp'

In [None]:
TEST_PATH = '/kaggle/input/siim-covid19-resized-to-256px-jpg/test/'

In [None]:
weights_dir = 'kaggle-siim-covid19/exp/weights/best.pt'

In [None]:
%cd '/kaggle/working/yolov5'

# # Run Detection

In [None]:


!python detect.py --weights {weights_dir} \
                  --source {TEST_PATH} \
                  --img {IMG_SIZE} \
                  --conf 0.28 \
                  --iou-thres 0.5 \
                  --max-det 3 \
                  --save-txt \
                  --save-conf \
                 --exist-ok


In [None]:
pred_label_list=os.listdir('runs/detect/exp/labels/')

In [None]:
print(f'Number of opacity predicted by YOLOv5: {len(pred_label_list)}')

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import numpy as np
import random
import cv2
from glob import glob
from tqdm import tqdm

files = glob('runs/detect/exp/*')
for _ in range(3):
    row = 4
    col = 3
    grid_files = random.sample(files, row*col)
    images     = []
    for image_path in tqdm(grid_files):
        img= cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
        images.append(img)

    fig = plt.figure(figsize=(col*5, row*5))
    grid = ImageGrid(fig, 111,  # similar to subplot(111)
                     nrows_ncols=(col, row),  # creates 2x2 grid of axes
                     axes_pad=0.05,  # pad between axes in inch.
                     )

    for ax, im in zip(grid, images):
        # Iterating over the grid returns the Axes.
        ax.imshow(im)
        ax.set_xticks([])
        ax.set_yticks([])
    plt.show()