In [None]:
import numpy as np
import pandas as pd
import torch

import matplotlib.pyplot as plt
from PIL import Image
import SimpleITK as sitk
import cv2

import os
from os import listdir, mkdir
import glob
from tqdm.auto import tqdm

In [None]:
df = pd.read_csv('/kaggle/input/siim-covid19-detection/sample_submission.csv')
test_path = '/kaggle/input/siim-covid19-detection/test/'
detection_model_path =  '/kaggle/input/image-detection-model-1/yolov5/kaggle-siim-covid/exp/weights/best.pt'

In [None]:
df['file_level'] = df['id'].apply(lambda x : str(x).split('_')[1])
df_study = df[df['file_level']=='study']
df_image = df[df['file_level']=='image']
df_study['study'] =  df_study['id'].apply(lambda x : str(x).split('_')[0])
df_image['image'] =  df_image['id'].apply(lambda x : str(x).split('_')[0])

### 判断提交或快存

In [None]:
if df.shape[0] == 2477:
    quick_save = True
else: quick_save = False

In [None]:
if quick_save:
    df_study = df_study[35:40]
    

In [None]:
image_study_dict = {}
for i in df_study.index:
    path_list = glob.glob(test_path + f'{df_study.loc[i,"study"]}/' + '*/*'+'.dcm')
    for path in path_list:
        image_code = path.split('/')[7].split('.')[0]
        image_study_dict[image_code] = df_study.loc[i,'study']
        

In [None]:
if quick_save:
    df_image = df_image[df_image['image'].isin(image_study_dict.keys())]

df_image['study'] = ''
df_image['study'] = df_image['image'].apply(lambda x:image_study_dict[x])

### 存储测试集图片路径

In [None]:
df_image['image_path'] = ''
for i in tqdm(df_image.index):
    path = glob.glob(test_path + df_image.loc[i,'study'] +'/*/' + df_image.loc[i,'image'] +'.dcm')[0]
    df_image.loc[i,'image_path'] = path

In [None]:
!mkdir /kaggle/test

In [None]:
def read_xray(path, fix_monochrome = True):
    
    dicom = sitk.ReadImage(path)
    
    data = sitk.GetArrayFromImage(dicom)[0,:,:]
               
    if fix_monochrome and dicom.GetMetaData('0028|0004') == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data,data.shape[0],data.shape[1]

def resize(array, size, keep_ratio=False, resample=Image.LANCZOS):
    
    im = Image.fromarray(array)
    
    if keep_ratio:
        im.thumbnail((size, size), resample)
    else:
        im = im.resize((size, size), resample)
    
    return im

In [None]:
IMG_SIZE = 640

df_image['dim_h'] = 0
df_image['dim_w'] = 0
df_image['path'] =  ''


for i in tqdm(df_image.index):
    image,df_image.loc[i,'dim_h'],df_image.loc[i,'dim_w'] = read_xray(df_image.loc[i,'image_path'])
    im = resize(image,size = IMG_SIZE)
    png_path = '/kaggle/test/' + df_image.loc[i,'image'] +'.png'
    df_image.loc[i,'path'] = png_path
    im.save(png_path) 

In [None]:
df_image['dim_h'] = df_image['dim_h'].astype('int64')
df_image['dim_w'] = df_image['dim_w'].astype('int64')

###  定义函数

In [None]:
!pip install /kaggle/input/kerasapplications/keras-applications-master -q
!pip install /kaggle/input/efficientnet-keras-source-code/ -q --no-deps

import efficientnet.tfkeras as efn
import tensorflow as tf

def auto_select_accelerator():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")

    return strategy


def build_decoder(with_labels=True, target_size=(300, 300), ext='jpg'):
    def decode(path):
        file_bytes = tf.io.read_file(path)
        if ext == 'png':
            img = tf.image.decode_png(file_bytes, channels=3)
        elif ext in ['jpg', 'jpeg']:
            img = tf.image.decode_jpeg(file_bytes, channels=3)
        else:
            raise ValueError("Image extension not supported")

        img = tf.cast(img, tf.float32) / 255.0
        img = tf.image.resize(img, target_size)

        return img

    def decode_with_labels(path, label):
        return decode(path), label

    return decode_with_labels if with_labels else decode


def build_augmenter(with_labels=True):
    def augment(img):
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        return img

    def augment_with_labels(img, label):
        return augment(img), label

    return augment_with_labels if with_labels else augment


def build_dataset(paths, labels=None, bsize=32, cache=True,
                  decode_fn=None, augment_fn=None,
                  augment=True, repeat=True, shuffle=1024, 
                  cache_dir=""):
    if cache_dir != "" and cache is True:
        os.makedirs(cache_dir, exist_ok=True)

    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)

    if augment_fn is None:
        augment_fn = build_augmenter(labels is not None)

    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)

    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls=AUTO)
    dset = dset.cache(cache_dir) if cache else dset
    dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
    dset = dset.repeat() if repeat else dset
    dset = dset.shuffle(shuffle) if shuffle else dset
    dset = dset.batch(bsize).prefetch(AUTO)

    return dset

In [None]:
#COMPETITION_NAME = "siim-cov19-test-img512-study-600"
strategy = auto_select_accelerator()
BATCH_SIZE = strategy.num_replicas_in_sync * 16
#GCS_DS_PATH = KaggleDatasets().get_gcs_path(COMPETITION_NAME)

### study predict

In [None]:
test_paths = df_image['path'].values

In [None]:
df_image['negative'] = 0
df_image['typical'] = 0
df_image['indeterminate'] = 0
df_image['atypical'] = 0

label_cols = df_image.columns[9:]

In [None]:
IMSIZE = (224, 240, 260, 300, 380, 456, 512, 600, 640)

In [None]:
test_decoder = build_decoder(with_labels=False, target_size=(IMSIZE[8], IMSIZE[8]), ext='png')
dtest = build_dataset(
    test_paths, bsize=BATCH_SIZE, repeat=False, 
    shuffle=False, augment=False, cache=False,
    decode_fn=test_decoder
)

with strategy.scope():
    
    models = []
    
    models0 = tf.keras.models.load_model(
        '/kaggle/input/study-class-model-effnetb7/model0.h5'
    )
    models1 = tf.keras.models.load_model(
        '/kaggle/input/study-class-model-effnetb7/model1.h5'
    )
    models2 = tf.keras.models.load_model(
        '/kaggle/input/study-class-model-effnetb7/model2.h5'
    )
    models3 = tf.keras.models.load_model(
        '/kaggle/input/study-class-model-effnetb7/model3.h5'
    )
    models4 = tf.keras.models.load_model(
        '/kaggle/input/study-class-model-effnetb7/model4.h5'
    )
    
    models.append(models0)
    models.append(models1)
    models.append(models2)
    models.append(models3)
    models.append(models4)

    
    
    
df_image[label_cols] = sum([model.predict(dtest, verbose=1) for model in models]) / len(models)

In [None]:
for i in df_image.index:
    negative = df_image.loc[i,'negative']
    typical = df_image.loc[i,'typical']
    indeterminate = df_image.loc[i,'indeterminate']
    atypical = df_image.loc[i,'atypical']
    df_image.loc[i, 'study_pre'] = f'negative {negative} 0 0 1 1 typical {typical} 0 0 1 1 indeterminate {indeterminate} 0 0 1 1 atypical {atypical} 0 0 1 1'

In [None]:
df_study['PredictionString'] = ''
for i in df_study.index:
    df_image_sub = df_image[df_image['study']==df_study.loc[i,'study']]
    df_study.loc[i,'PredictionString'] = df_study.loc[i,'PredictionString'] + df_image_sub.iloc[0]['study_pre']

In [None]:
df_study = df_study[['id','PredictionString']]

### opacity classes prediction

In [None]:
test_paths = df_image['path'].values
df_image['none'] = 0
label_cols = 'none'

IMSIZE = (224, 240, 260, 300, 380, 456, 528, 600, 512)

test_decoder = build_decoder(with_labels=False, target_size=(IMSIZE[8], IMSIZE[8]), ext='png')
dtest = build_dataset(
    test_paths, bsize=BATCH_SIZE, repeat=False, 
    shuffle=False, augment=False, cache=False,
    decode_fn=test_decoder
)

with strategy.scope():
    
    models = []
    
    models0 = tf.keras.models.load_model(
        '/kaggle/input/2-class-models-efficientnetb7/model0.h5'
    )
    models1 = tf.keras.models.load_model(
        '/kaggle/input/2-class-models-efficientnetb7/model1.h5'
    )
    models2 = tf.keras.models.load_model(
        '/kaggle/input/2-class-models-efficientnetb7/model2.h5'
    )
    models3 = tf.keras.models.load_model(
        '/kaggle/input/2-class-models-efficientnetb7/model3.h5'
    )
    models4 = tf.keras.models.load_model(
        '/kaggle/input/2-class-models-efficientnetb7/model4.h5'
    )
    
    models.append(models0)
    models.append(models1)
    models.append(models2)
    models.append(models3)
    models.append(models4)

    
    
    
df_image[label_cols] = sum([model.predict(dtest, verbose=1) for model in models]) / len(models)

In [None]:
del models
del models0, models1, models2, models3, models4

In [None]:
from numba import cuda
import torch
cuda.select_device(0)
cuda.close()
cuda.select_device(0)

### image predict

In [None]:
TEST_PATH = '/kaggle/test/'

In [None]:
import shutil, os

In [None]:
shutil.copytree('/kaggle/input/image-detection-model-1/yolov5', '/kaggle/working/yolov5')
os.chdir('/kaggle/working/yolov5') 

In [None]:
!python detect.py --weights $detection_model_path\
                  --source $TEST_PATH\
                  --img {IMG_SIZE} \
                  --conf 0.10 \
                  --iou-thres 0.5 \
                  --max-det 3 \
                  --save-txt --save-conf --exist-ok

In [None]:
PRED_PATH = '/kaggle/working/yolov5/runs/detect/exp/labels'

In [None]:
prediction_files = os.listdir(PRED_PATH)
print('Number of test images predicted as opacity: ', len(prediction_files))

In [None]:
# The submisison requires xmin, ymin, xmax, ymax format. 
# YOLOv5 returns x_center, y_center, width, height
def correct_bbox_format(bboxes,img_w,img_h):
    correct_bboxes = []
    for b in bboxes:
        xc, yc = b[0]*img_w, b[1]*img_h
        w, h = b[2]*img_w, b[3]*img_h

        xmin = xc - w/2
        xmax = xc + w/2
        ymin = yc - h/2
        ymax = yc + h/2
        
        correct_bboxes.append([xmin, ymin, xmax, ymax])
        
    return correct_bboxes

# Read the txt file generated by YOLOv5 during inference and extract 
# confidence and bounding box coordinates.
def get_conf_bboxes(file_path):
    class_id = []
    confidence = []
    bboxes = []
    with open(file_path, 'r') as file:
        for line in file:
            preds = line.strip('\n').split(' ')
            preds = list(map(float, preds))
            class_id.append(preds[0])
            confidence.append(preds[-1])
            bboxes.append(preds[1:-1])
    return class_id,confidence, bboxes

In [None]:
# Prediction loop for submission
predictions = []

for i in tqdm(df_image.index):
    row = df_image.loc[i]
    id_name = row.image
    img_h = row.dim_h
    img_w = row.dim_w
    
    if f'{id_name}.txt' in prediction_files:
        class_id, confidence, bboxes = get_conf_bboxes(f'{PRED_PATH}/{id_name}.txt')
        bboxes = correct_bbox_format(bboxes,img_w,img_h)
        pred_string = ''
        for j, conf in enumerate(confidence):
            pred_string += 'opacity {:.6f} '.format(conf) + ' '.join(map(str, bboxes[j])) + ' '    
        predictions.append(pred_string[:-1]) 
    else:
        predictions.append("none 1.0 0 0 1 1")
    
df_image['PredictionString'] = predictions

In [None]:
for i in df_image.index: 
    pro = 1 - df_image.loc[i,'none']
    if df_image.loc[i,'PredictionString']!='none 1.0 0 0 1 1':
        df_image.loc[i,'PredictionString'] = df_image.loc[i,'PredictionString']  \
                                                            +' none {:.6f} 0 0 1 1'.format(pro)

In [None]:
df_image = df_image[['id','PredictionString']]

In [None]:
df_submission = pd.concat([df_study,df_image])

In [None]:
df_submission.to_csv('/kaggle/working/submission.csv',index=False)