In [None]:
!conda install '/kaggle/input/pydicom-conda-helper/libjpeg-turbo-2.1.0-h7f98852_0.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/libgcc-ng-9.3.0-h2828fa1_19.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/gdcm-2.8.9-py37h500ead1_1.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/conda-4.10.1-py37h89c1867_0.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/certifi-2020.12.5-py37h89c1867_1.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/openssl-1.1.1k-h7f98852_0.tar.bz2' -c conda-forge -y

In [None]:
import tensorflow as tf
print(tf.__version__)
import torch
print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

import os
import gc
import cv2
import glob
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
from shutil import copyfile
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

In [None]:
sub_df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')
print(len(sub_df))
sub_df.head()

In [None]:
study_df = sub_df.loc[sub_df.id.str.contains('_study')]
len(study_df)

In [None]:
image_df = sub_df.loc[sub_df.id.str.contains('_image')]
len(image_df)

In [None]:
def read_xray(path, voi_lut = True, fix_monochrome = True):
    # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to 
    # "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

def resize_xray(array, size, keep_ratio=False, resample=Image.LANCZOS):
    # Original from: https://www.kaggle.com/xhlulu/vinbigdata-process-and-resize-to-image
    im = Image.fromarray(array)
    
    if keep_ratio:
        im.thumbnail((size, size), resample)
    else:
        im = im.resize((size, size), resample)
    
    return im

In [None]:
TEST_PATH = f'/kaggle/tmp/test/'
IMG_SIZE = 512

def prepare_test_images():
    image_id = []
    dim0 = []
    dim1 = []

    os.makedirs(TEST_PATH, exist_ok=True)

    for dirname, _, filenames in tqdm(os.walk(f'../input/siim-covid19-detection/test')):
        for file in filenames:
            # set keep_ratio=True to have original aspect ratio
            xray = read_xray(os.path.join(dirname, file))
            im = resize_xray(xray, IMG_SIZE)
            im = np.array(im)
            equ = cv2.equalizeHist(im)
            clahe = cv2.createCLAHE(clipLimit=40.0, tileGridSize=(8,8))
            clh = clahe.apply(im)
            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 15))
            tophat = cv2.morphologyEx(equ, cv2.MORPH_TOPHAT, kernel)
            bothat = cv2.morphologyEx(equ, cv2.MORPH_BLACKHAT, kernel)
            morph = equ + tophat - bothat
            output = np.dstack((im, clh, morph))
            cv2.imwrite(os.path.join(TEST_PATH, file.replace('dcm', 'jpg')),output)

            image_id.append(file.replace('.dcm', ''))
            dim0.append(xray.shape[0])
            dim1.append(xray.shape[1])

    return image_id, dim0, dim1

In [None]:
image_ids, dim0, dim1 = prepare_test_images()
print(f'Number of test images: {len(os.listdir(TEST_PATH))}')

In [None]:
meta_df = pd.DataFrame.from_dict({'image_id': image_ids, 'dim0': dim0, 'dim1': dim1})

# Associate image-level id with study-level ids.
# Note that a study-level might have more than one image-level ids.
for study_dir in os.listdir('../input/siim-covid19-detection/test'):
    for series in os.listdir(f'../input/siim-covid19-detection/test/{study_dir}'):
        for image in os.listdir(f'../input/siim-covid19-detection/test/{study_dir}/{series}/'):
            image_id = image[:-4]
            meta_df.loc[meta_df['image_id'] == image_id, 'study_id'] = study_dir
        
meta_df.head()

In [None]:
YOLO_MODEL_PATH0 = '/kaggle/input/yolomodels1607/yolov5L512_1607.pt'
YOLO_MODEL_PATH1 = '/kaggle/input/yolov5m6-512/yolov5m6_512.pt'
YOLO_MODEL_PATH2 = '/kaggle/input/yolov5bestlarge1607/yolov5bestlarge.pt'
YOLO_MODEL_PATH3 = ''
YOLO_MODEL_PATH4 = ''


In [None]:

!python /kaggle/input/siimcovidyolov5l/yolov5/detect.py --weights /kaggle/input/yolov5l-3channels/yolov5l_fold0.pt /kaggle/input/yolov5l-3channels/yolov5l_fold1.pt \
                                      --source {TEST_PATH} \
                                      --img {IMG_SIZE} \
                                      --conf 0.22 \
                                      --iou-thres 0.5 \
                                      --max-det 10 \
                                      --save-txt \
                                      --save-conf

In [None]:
PRED_PATH = 'runs/detect/exp/labels'
prediction_files = os.listdir(PRED_PATH)
print(f'Number of opacity predicted by YOLOv5: {len(prediction_files)}')

In [None]:
def correct_bbox_format(bboxes):
    correct_bboxes = []
    for b in bboxes:
        xc, yc = int(np.round(b[0]*IMG_SIZE)), int(np.round(b[1]*IMG_SIZE))
        w, h = int(np.round(b[2]*IMG_SIZE)), int(np.round(b[3]*IMG_SIZE))

        xmin = xc - int(np.round(w/2))
        ymin = yc - int(np.round(h/2))
        xmax = xc + int(np.round(w/2))
        ymax = yc + int(np.round(h/2))
        
        correct_bboxes.append([xmin, ymin, xmax, ymax])
        
    return correct_bboxes

def scale_bboxes_to_original(row, bboxes):
    # Get scaling factor
    scale_x = IMG_SIZE/row.dim1
    scale_y = IMG_SIZE/row.dim0
    
    scaled_bboxes = []
    for bbox in bboxes:
        xmin, ymin, xmax, ymax = bbox
        
        xmin = int(np.round(xmin/scale_x))
        ymin = int(np.round(ymin/scale_y))
        xmax = int(np.round(xmax/scale_x))
        ymax = int(np.round(ymax/scale_y))
        
        scaled_bboxes.append([xmin, ymin, xmax, ymax])
        
    return scaled_bboxes

# Read the txt file generated by YOLOv5 during inference and extract 
# confidence and bounding box coordinates.
def get_conf_bboxes(file_path):
    confidence = []
    bboxes = []
    with open(file_path, 'r') as file:
        for line in file:
            preds = line.strip('\n').split(' ')
            preds = list(map(float, preds))
            confidence.append(preds[-1])
            bboxes.append(preds[1:-1])
    return confidence, bboxes

In [None]:
image_pred_strings = []
for i in tqdm(range(len(image_df))):
    row = meta_df.loc[i]
    id_name = row.image_id
    
    if f'{id_name}.txt' in prediction_files:
        # opacity label
        confidence, bboxes = get_conf_bboxes(f'{PRED_PATH}/{id_name}.txt')
        bboxes = correct_bbox_format(bboxes)
        ori_bboxes = scale_bboxes_to_original(row, bboxes)
        
        pred_string = ''
        for j, conf in enumerate(confidence):
            pred_string += f'opacity {conf} ' + ' '.join(map(str, ori_bboxes[j])) + ' '
        image_pred_strings.append(pred_string[:-1]) 
    else:
        image_pred_strings.append("none 1 0 0 1 1")

In [None]:
meta_df['PredictionString'] = image_pred_strings
image_df = meta_df[['study_id','image_id', 'PredictionString']]
# image_df.insert(0, 'id', image_df.apply(lambda row: row.image_id+'_image', axis=1))
# image_df = image_df.drop('image_id', axis=1)
image_df.head(20)
image_df.to_csv('object_yolo5x512_080721.csv')

In [None]:
# from IPython.display import FileLink
# FileLink(r'object_yolo5x512_080721.csv')
image_df

In [None]:
# imageDict = {}
# for study in os.listdir('../input/siim-covid19-detection/test'):
#     for _,__,files in os.walk('../input/siim-covid19-detection/test/'+study):
#         for file in files:
#             imageDict[file[:-4]]=study

In [None]:
# arr=[]
# imageList = image_df.id
# for image in imageList:
#     arr.append(imageDict[image.split("_")[0]]+"_study")
# image_df.insert(0,"study_id",arr)

In [None]:
import tensorflow as tf
print(tf.__version__)
from tensorflow.keras import layers
from tensorflow.keras import models
import tensorflow_addons as tfa
from tensorflow.keras import mixed_precision


import tensorflow_probability as tfp
tfd = tfp.distributions

import os
import gc
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import StratifiedKFold
from sklearn.utils.class_weight import compute_class_weight

# Imports for augmentations. 
from albumentations import (Compose, RandomResizedCrop, Cutout, Rotate, HorizontalFlip, 
                            VerticalFlip, RandomBrightnessContrast, ShiftScaleRotate, 
                            CenterCrop, Resize)

In [None]:
TEST_PATH = f'/kaggle/tmp/test_study/'
IMG_SIZE = 224

def prepare_test_images():
    image_id = []
    dim0 = []
    dim1 = []

    os.makedirs(TEST_PATH, exist_ok=True)

    for dirname, _, filenames in tqdm(os.walk(f'../input/siim-covid19-detection/test')):
        for file in filenames:
            # set keep_ratio=True to have original aspect ratio
            xray = read_xray(os.path.join(dirname, file))
            im = resize_xray(xray, size=IMG_SIZE)  
            im.save(os.path.join(TEST_PATH, file.replace('dcm', 'png')))

            image_id.append(file.replace('.dcm', ''))
            
    return image_id

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

CONFIG = dict (
    seed = 42,
    num_labels = 4,
    num_folds = 5,
    img_width = 224, # If you change the resolution to 512 reduce batch size. 
    img_height = 224,
    batch_size = 32,
    epochs = 70,
    learning_rate = 1e-3,
    architecture = "CNN",
    competition = 'siim-covid',
    _wandb_kernel = 'aks',
    infra = "GCP",
)

In [None]:
def get_model():
    base_model = tf.keras.applications.EfficientNetB0(include_top=False, weights='../input/effnetweights/efficientnetb0_notop.h5')
    base_model.trainabe = True

    inputs = layers.Input((CONFIG['img_height'], CONFIG['img_width'], 3))
    x = base_model(inputs, training=True)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.5)(x)
    
    outputs = layers.Dense(CONFIG['num_labels'], kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
    outputs = layers.Activation('softmax', dtype='float32', name='predictions')(outputs)
    
    return models.Model(inputs, outputs)

tf.keras.backend.clear_session() 
model = get_model()
model.summary()

In [None]:
CONFIG['model_name'] = 'effnetb0_mixup'
CONFIG['group'] = 'Effnetb0-Mixup-512'

In [None]:
modelpath = '/kaggle/input/effnetmodels/model-best.h5'
model.load_weights(modelpath)

In [None]:
study_list= prepare_test_images()

In [None]:
@tf.function
def decode_image(image):
    # convert the compressed string to a 3D uint8 tensor
    image = tf.image.decode_png(image, channels=3)
    print(image)
    # Normalize image
#     image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    print(image)
    return image
def load_image(df_dict):
    # Load image
    image = tf.io.read_file(df_dict)
    image = decode_image(image)
    
#     # Parse label
#     label = df_dict['study_level']
#     label = tf.one_hot(indices=label, depth=CONFIG['num_labels'])
    
    return image

In [None]:
imageDict = {}
for study in os.listdir('../input/siim-covid19-detection/test'):
    for _,__,files in os.walk('../input/siim-covid19-detection/test/'+study):
        for file in files:
            imageDict[file[:-4]]=study

In [None]:
output_arr = ['negative','typical','indeterminate','atypical']

In [None]:
study_preds = []
for img in tqdm(os.listdir(TEST_PATH)):
#     image = tf.io.read_file()
    image = tf.keras.preprocessing.image.load_img(os.path.join(TEST_PATH,img))
    x = tf.keras.preprocessing.image.img_to_array(image)
    x = np.expand_dims(x, axis=0)
    output = model.predict(x)
#     print(output)
    output_str = ''
    for i,conf in enumerate(output[0]):
        output_str +=output_arr[i]+' '+str(conf)+' 0 0 1 1 '
        #output_arr[np.argmax(output)]+" "+str(output[0][np.argmax(output)])+" 0 0 1 1"]
    study_preds.append([imageDict[img.split(".")[0]],img.split(".")[0],output_str])
#     print(study_preds)
#     break

In [None]:
study_df = pd.DataFrame(study_preds, columns =['study_id','image_id','PredictionString'])

In [None]:
# arr=[]
# study_df = 
# imageList = image_df.id
# for image in imageList:
#     arr.append(imageDict[image.split("_")[0]]+"_study")
# image_df.insert(0,"study_id",arr)

In [None]:
# study_df.to_csv('classification_EffNet_080721.csv')

In [None]:
# from IPython.display import FileLink
# FileLink(r'classification_EffNet_080721.csv')

In [None]:
image_df.insert(0, 'id', image_df.apply(lambda row: row.image_id+'_image', axis=1))

In [None]:
image_df_new = image_df[['id','PredictionString']]

In [None]:
study_df.insert(0, 'id', study_df.apply(lambda row: row.study_id+'_study', axis=1))

In [None]:
study_df_new = study_df[['id','PredictionString']]
ids = {}
for i in range(len(study_df_new)):
    row = study_df_new.loc[i]
    if row.id in ids:
        ids[row.id] = ids[row.id] #+' '+row.PredictionString
    else:
        ids[row.id] = row.PredictionString
studylbls = []
for k,v in ids.items():
    studylbls.append([k,v])
    
study_df_new = pd.DataFrame(studylbls, columns =['id','PredictionString'])


In [None]:
study_df_new = study_df_new.append(image_df_new).reset_index(drop=True)
study_df_new.to_csv('/kaggle/working/submission.csv',index = False)

In [None]:
study_df_new


In [None]:
image_df_new

In [None]:
%rm -rf runs

In [None]:
%rm -rf yolov5