# Imports

In [None]:
!conda install '/kaggle/input/pydicom-conda-helper/libjpeg-turbo-2.1.0-h7f98852_0.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/libgcc-ng-9.3.0-h2828fa1_19.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/gdcm-2.8.9-py37h500ead1_1.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/conda-4.10.1-py37h89c1867_0.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/certifi-2020.12.5-py37h89c1867_1.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/openssl-1.1.1k-h7f98852_0.tar.bz2' -c conda-forge -y

In [None]:

# Necessary/extra dependencies. 
import os
import gc
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from shutil import copyfile
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
#! conda install -c conda-forge gdcm -y
#! conda install pylibjpeg pylibjpeg-libjpeg pylibjpeg-openjpeg
#! conda install pillow
#customize iPython writefile so we can write variables
from IPython.core.magic import register_line_cell_magic


import pylab
#import pillow
#import gdcm
#pydicom
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
from fastai.imports import *
#from fastai.medical.imaging import *
from PIL import Image

@register_line_cell_magic
def writetemplate(line, cell):
    with open(line, 'w') as f:
        f.write(cell.format(**globals()))
        
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [None]:
import torch
print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

Read Files

In [None]:
# Read the submisison file
sub_df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')
print(len(sub_df))
sub_df.head()

In [None]:
study_df = sub_df.loc[sub_df.id.str.contains('_study')]
len(study_df)

In [None]:
study_df = sub_df.loc[sub_df.id.str.contains('_study')]
len(study_df)

In [None]:
image_df = sub_df.loc[sub_df.id.str.contains('_image')]
len(image_df)

In [None]:
# Ref: https://www.kaggle.com/xhlulu/siim-covid-19-convert-to-jpg-256px
def read_xray(path, voi_lut = True, fix_monochrome = True):
    # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to 
    # "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

def resize_xray(array, size, keep_ratio=False, resample=Image.LANCZOS):
    # Original from: https://www.kaggle.com/xhlulu/vinbigdata-process-and-resize-to-image
    im = Image.fromarray(array)
    
    if keep_ratio:
        im.thumbnail((size, size), resample)
    else:
        im = im.resize((size, size), resample)
    
    return im

In [None]:

IMG_SIZE = 256
TEST_PATH = f'/kaggle/tmp/test/'

def prepare_test_images():
    image_id = []
    dim0 = []
    dim1 = []

    os.makedirs(TEST_PATH, exist_ok=True)

    for dirname, _, filenames in tqdm(os.walk(f'../input/siim-covid19-detection/test')):
        for file in filenames:
            # set keep_ratio=True to have original aspect ratio
            xray = read_xray(os.path.join(dirname, file))
            im = resize_xray(xray, size=IMG_SIZE)  
            im.save(os.path.join(TEST_PATH, file.replace('dcm', 'png')))

            image_id.append(file.replace('.dcm', ''))
            dim0.append(xray.shape[0])
            dim1.append(xray.shape[1])
            
    return image_id, dim0, dim1

In [None]:
image_ids, dim0, dim1 = prepare_test_images()
print(f'Number of test images: {len(os.listdir(TEST_PATH))}')

In [None]:
!ls '../input/yolo-model-train-weight/weights'

In [None]:
weights_dir = '../input/yolo-model-train-weight/weights/best.pt'

# # Run Detection

# Meta Files

In [None]:
# meta_df=pd.read_csv('/kaggle/input/siim-covid19-resized-to-256px-jpg/meta.csv')



In [None]:
meta_df = pd.DataFrame.from_dict({'image_id': image_ids, 'dim0': dim0, 'dim1': dim1})

# Associate image-level id with study-level ids.
# Note that a study-level might have more than one image-level ids.
for study_dir in os.listdir('../input/siim-covid19-detection/test'):
    for series in os.listdir(f'../input/siim-covid19-detection/test/{study_dir}'):
        for image in os.listdir(f'../input/siim-covid19-detection/test/{study_dir}/{series}/'):
            image_id = image[:-4]
            meta_df.loc[meta_df['image_id'] == image_id, 'study_id'] = study_dir
        
meta_df.head()


In [None]:
#meta_df_test=meta_df.loc[meta_df['split']=='test']
meta_df_test=meta_df.copy()

In [None]:
#del meta_df_test['split']

Create Meta file for Test  dataset

In [None]:
meta_df_test

Image Detection

In [None]:
# YOLO_MODEL_PATH = '../input/yolo-models/yolov5s-e-100-img-512.pt'
#YOLO_MODEL_PATHS = 'kaggle-siim-covid19/exp/weights/best.pt'
# TEST_PATH = '/kaggle/input/siim-covid19-resized-to-256px-jpg/test/'
TEST_PATH = f'/kaggle/tmp/test/'
!python ../input/yolo-v5/detect.py --weights {weights_dir} \
                  --source {TEST_PATH} \
                  --img {IMG_SIZE} \
                  --conf 0.28 \
                  --iou-thres 0.5 \
                  --max-det 3 \
                  --save-txt \
                  --save-conf \
                  --exist-ok

In [None]:
os.listdir('runs/detect/')

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import numpy as np
import random
import cv2
from glob import glob
from tqdm import tqdm

files = glob('runs/detect/exp/*')
for _ in range(3):
    row = 4
    col = 3
    grid_files = random.sample(files, row*col)
    images     = []
    for image_path in tqdm(grid_files):
        img= cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
        images.append(img)

    fig = plt.figure(figsize=(col*5, row*5))
    grid = ImageGrid(fig, 111,  # similar to subplot(111)
                     nrows_ncols=(col, row),  # creates 2x2 grid of axes
                     axes_pad=0.05,  # pad between axes in inch.
                     )

    for ax, im in zip(grid, images):
        # Iterating over the grid returns the Axes.
        ax.imshow(im)
        ax.set_xticks([])
        ax.set_yticks([])
    plt.show()

In [None]:
PRED_PATH ='runs/detect/exp/labels'
prediction_files = os.listdir(PRED_PATH)
print(f'Number of opacity predicted by YOLOv5: {len(prediction_files)}')

In [None]:
import tensorflow as tf
import gc



In [None]:
AUTOTUNE = tf.data.AUTOTUNE

CONFIG = dict (
    seed = 42,
    num_labels = 4,
    num_folds = 5,
    img_width = 256,
    img_height = 256,
    batch_size = 8,
    architecture = "CNN",
    infra = "GCP",
)

In [None]:
# TEST_PATH = '/kaggle/input/siim-covid19-resized-to-256px-jpg/test/'

In [None]:
image_df['path'] = image_df.apply(lambda row: TEST_PATH+row.id.split('_')[0]+'.png', axis=1)
image_df = image_df.reset_index(drop=True)

In [None]:
image_df.head()

In [None]:
study_df = sub_df.loc[sub_df.id.str.contains('_study')]
len(study_df)

Thanks to https://www.kaggle.com/ayuraj/submission-covid19/data

In [None]:
@tf.function
def decode_image(image):
    # convert the compressed string to a 3D uint8 tensor
    image = tf.image.decode_png(image, channels=3)
    # Normalize image
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image

@tf.function
def load_image(df_dict):
    # Load image
    image = tf.io.read_file(df_dict['path'])
    image = decode_image(image)
    
    # Resize image
    image = tf.image.resize(image, (CONFIG['img_height'], CONFIG['img_width']))
    
    return image

testloader = tf.data.Dataset.from_tensor_slices(dict(image_df))

testloader = (
    testloader
    .shuffle(1024)
    .map(load_image, num_parallel_calls=AUTOTUNE)
    .batch(CONFIG['batch_size'])
    .prefetch(AUTOTUNE)
)

In [None]:
# Load Model
STUDY_MODEL_PATHS = '/kaggle/input/studylevelmodel/SIIM-Study-Level-model/'
study_models = os.listdir(STUDY_MODEL_PATHS)
study_models

# Load Model for Study _pred

In [None]:
# ! pip install -q efficientnet


!pip install ../input/keras-efficientnet-whl/Keras_Applications-1.0.8-py3-none-any.whl
!pip install ../input/keras-efficientnet-whl/efficientnet-1.1.1-py3-none-any.whl
from efficientnet.tfkeras import EfficientNetB5

In [None]:
predictions = []
for model in study_models:
    # Load model
    tf.keras.backend.clear_session()
    model = tf.keras.models.load_model(STUDY_MODEL_PATHS+model)
    # Prediction
    tmp = []
    for img_batch in tqdm(testloader):
        preds = model.predict(img_batch)
        tmp.extend(preds)
        
    predictions.append(tmp)
    
    del model
    _ = gc.collect()
    
predictions = np.mean(predictions, axis=0)

In [None]:
class_labels = ['0', '1', '2', '3']
image_df.loc[:, class_labels] = predictions
image_df.head()

In [None]:
class_to_id = { 
    'negative': 0,
    'typical': 1,
    'indeterminate': 2,
    'atypical': 3}
id_to_class  = {v:k for k, v in class_to_id.items()}

def get_study_prediction_string(preds, threshold=0):
    string = ''
    for idx in range(4):
        conf =  preds[idx]
        if conf>threshold:
            string+=f'{id_to_class[idx]} {conf:0.2f} 0 0 1 1 '
    string = string.strip()
    return string

In [None]:
image_df.head()

In [None]:
meta_df_test.head()

In [None]:
study_ids = []
pred_strings = []

for study_id, df in meta_df_test.groupby('study_id'):
    # accumulate preds for diff images belonging to same study_id
    tmp_pred = []
    
    df = df.reset_index(drop=True)
    for image_id in df.image_id.values:
        preds = image_df.loc[image_df.id == image_id+'_image'].values[0]
        tmp_pred.append(preds[3:])
    
    preds = np.mean(tmp_pred, axis=0)
    pred_string = get_study_prediction_string(preds)
    pred_strings.append(pred_string)
    
    study_ids.append(f'{study_id}_study')
    
study_df = pd.DataFrame.from_dict({'id': study_ids, 'PredictionString': pred_strings})
study_df.head()


In [None]:
# The submisison requires xmin, ymin, xmax, ymax format. 
# YOLOv5 returns x_center, y_center, width, height
def correct_bbox_format(bboxes):
    correct_bboxes = []
    for b in bboxes:
        xc, yc = int(np.round(b[0]*IMG_SIZE)), int(np.round(b[1]*IMG_SIZE))
        w, h = int(np.round(b[2]*IMG_SIZE)), int(np.round(b[3]*IMG_SIZE))

        xmin = xc - int(np.round(w/2))
        ymin = yc - int(np.round(h/2))
        xmax = xc + int(np.round(w/2))
        ymax = yc + int(np.round(h/2))
        
        correct_bboxes.append([xmin, ymin, xmax, ymax])
        
    return correct_bboxes

def scale_bboxes_to_original(row, bboxes):
    # Get scaling factor
    scale_x = IMG_SIZE/row.dim1
    scale_y = IMG_SIZE/row.dim0
    
    scaled_bboxes = []
    for bbox in bboxes:
        xmin, ymin, xmax, ymax = bbox
        
        xmin = int(np.round(xmin/scale_x))
        ymin = int(np.round(ymin/scale_y))
        xmax = int(np.round(xmax/scale_x))
        ymax = int(np.round(ymax/scale_y))
        
        scaled_bboxes.append([xmin, ymin, xmax, ymax])
        
    return scaled_bboxes

# Read the txt file generated by YOLOv5 during inference and extract 
# confidence and bounding box coordinates.
def get_conf_bboxes(file_path):
    confidence = []
    bboxes = []
    with open(file_path, 'r') as file:
        for line in file:
            preds = line.strip('\n').split(' ')
            preds = list(map(float, preds))
            confidence.append(preds[-1])
            bboxes.append(preds[1:-1])
    return confidence, bboxes

In [None]:
image_pred_strings = []
for i in tqdm(range(len(image_df))):
    row = meta_df_test.loc[i]
    id_name = row.image_id
    
    if f'{id_name}.txt' in prediction_files:
        # opacity label
        confidence, bboxes = get_conf_bboxes(f'{PRED_PATH}/{id_name}.txt')
        bboxes = correct_bbox_format(bboxes)
        ori_bboxes = scale_bboxes_to_original(row, bboxes)
        
        pred_string = ''
        for j, conf in enumerate(confidence):
            pred_string += f'opacity {conf} ' + ' '.join(map(str, ori_bboxes[j])) + ' '
        image_pred_strings.append(pred_string[:-1]) 
    else:
        image_pred_strings.append("none 1 0 0 1 1")

## Submission File

In [None]:
meta_df_test['PredictionString'] = image_pred_strings
image_df = meta_df_test[['image_id', 'PredictionString']]
image_df.insert(0, 'id', image_df.apply(lambda row: row.image_id+'_image', axis=1))
image_df = image_df.drop('image_id', axis=1)
image_df.head()

remove unused files

In [None]:
!rm -rf runs

In [None]:
sub_df = pd.concat([study_df, image_df])
sub_df.to_csv('/kaggle/working/submission.csv', index=False)
sub_df