In [None]:
import pandas as pd
from tqdm.auto import tqdm
import cv2
import os
from os import listdir
from os.path import isdir, join
import keras
import numpy as np

You can see the training section in [this tutorial](https://etrain.xyz/en/posts/siim-covid19-detection)

![EfficientNetV2](https://raw.githubusercontent.com/google/automl/master/efficientnetv2/g3doc/train_params.png)

### Convert to JPG

In [None]:
!conda install '/kaggle/input/pydicom-conda-helper/libjpeg-turbo-2.1.0-h7f98852_0.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/libgcc-ng-9.3.0-h2828fa1_19.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/gdcm-2.8.9-py37h500ead1_1.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/conda-4.10.1-py37h89c1867_0.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/certifi-2020.12.5-py37h89c1867_1.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/openssl-1.1.1k-h7f98852_0.tar.bz2' -c conda-forge -y

In [None]:
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
from PIL import Image

def read_xray(path, voi_lut = True, fix_monochrome = True):
    # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to 
    # "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

In [None]:
image_id_arr = []
dim0_arr = []
dim1_arr = []

save_dir = f'/kaggle/working/test/'

os.makedirs(save_dir, exist_ok=True)
    
for dirname, _, filenames in tqdm(os.walk(f'../input/siim-covid19-detection/test')):
    for file in filenames:
        xray = read_xray(os.path.join(dirname, file))
        im = Image.fromarray(xray)
        im.save(os.path.join(save_dir, file.replace('dcm', 'jpg')))

        image_id_arr.append(file.replace('.dcm', ''))
        dim0_arr.append(xray.shape[0])
        dim1_arr.append(xray.shape[1])

In [None]:
meta_test = pd.DataFrame.from_dict({'image_id': image_id_arr, 'dim0': dim0_arr, 'dim1': dim1_arr})
meta_test

In [None]:
test_path = "/kaggle/input/siim-covid19-detection/test"
studies = [f for f in listdir(test_path) if isdir(join(test_path, f))]
study_lst = []
image_lst = []
for study in studies:
    study_id = study + "_study"
    study_path = join(test_path, study)
    onlyfiles = [name.split(".")[0] for path, subdirs, files in os.walk(study_path) for name in files]
    for file in onlyfiles:
        image_id = file
        study_lst.append(study_id)
        image_lst.append(image_id)
df = pd.DataFrame(list(zip(study_lst, image_lst)), columns =['StudyInstanceUID', 'image_id'])
df

In [None]:
test_df = meta_test.merge(df, on='image_id')
test_df

### Study level

In [None]:
import itertools

import matplotlib.pylab as plt
import numpy as np

import tensorflow as tf
import tensorflow_hub as hub

print('TF version:', tf.__version__)
print('Hub version:', hub.__version__)
print('Phsical devices:', tf.config.list_physical_devices())

In [None]:
# Build model
hub_url = '/kaggle/input/efficientnetv2-tf-hub/efficientnetv2-l-21k-ft1k/feature-vector'
image_size = 480

In [None]:
labels = ["negative", "typical", "indeterminate", "atypical"]

tf.keras.backend.clear_session()
base_model = hub.KerasLayer(hub_url, trainable=False)
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=[image_size, image_size, 3]),
    base_model,
    tf.keras.layers.Dense(
        len(labels),
        kernel_regularizer=tf.keras.regularizers.l2(0.0001),
        activation='sigmoid'
    )
])
model.load_weights('/kaggle/input/k/duythanhng/siim-covid-19-efficientnetv2/efficientnetv2-l-21k-ft1k-study-level.h5')

In [None]:
def get_image_name(image_id):
    image_name = image_id + ".jpg"
    return image_name

test_df["image"] = test_df["image_id"].apply(get_image_name)
for label in labels:
    conf = [0] * test_df.shape[0]
    test_df[label] = conf
test_df

In [None]:
def get_type(row):
    _label = 0
    for c in labels:
        if row[c]==1:
            _label = labels.index(c)
    return _label
test_df["type"] = test_df.apply(get_type, axis=1)

In [None]:
class DataGenerator(keras.utils.Sequence):
    def __init__(self,
                 _X,
                 _y, 
                 batch_size=32,
                 dim=(256,256),
                 n_channels=3,
                 n_classes=4,
                 image_path="",
                 shuffle=True):
        self.dim = dim
        self.batch_size = batch_size
        self.y = _y
        self.X = _X
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.image_path = image_path
        self.shuffle = shuffle
        self.img_indexes = np.arange(len(self.X))
        self.on_epoch_end()
        
    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.img_indexes) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        # Find list of IDs
        list_IDs_temps = [self.img_indexes[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temps)
        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.X))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temps):
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)
        for i, ID in enumerate(list_IDs_temps):
            full_path = os.path.join(self.image_path, self.X["image"].iloc[ID])
            
            im = cv2.imread(full_path)
            old_size = im.shape[:2] # old_size is in (height, width) format

            desired_size = self.dim[0]
            # desired_size = max([h, w])

            ratio = float(desired_size)/max(old_size)
            new_size = tuple([int(x*ratio) for x in old_size])

            im = cv2.resize(im, (new_size[1], new_size[0]))

            delta_w = desired_size - new_size[1]
            delta_h = desired_size - new_size[0]
            top, bottom = delta_h//2, delta_h-(delta_h//2)
            left, right = delta_w//2, delta_w-(delta_w//2)
            color = [0, 0, 0]
            new_im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT,
                value=color)
            
            X[i,] = new_im
            y[i] = self.y["type"].iloc[ID]

        X = np.stack(X).astype('uint8')
        X = X / np.max(X)
        y_convert = keras.utils.to_categorical(y, num_classes=self.n_classes)
        return X, y_convert

In [None]:
dim = (image_size, image_size)

params = dict(
    dim=dim,
    batch_size=1,
    n_classes=4,
    n_channels=3
)
params_valid = dict(
    image_path=save_dir,
    shuffle=False,
    **params
)
test_generator = DataGenerator(test_df[["image"]], test_df[["type"]], **params_valid)

In [None]:
predicts = model.predict(test_generator)

In [None]:
confidences = {}
for i in range(predicts.shape[0]):
    for j, c in enumerate(labels):
        if c not in confidences:
            confidences[c] = []
        if predicts[i, j]:
            confidences[c].append(predicts[i, j])
        else:
            confidences[c].append(0.0)

In [None]:
len(confidences['negative'])

In [None]:
len(test_df)

In [None]:
for label in labels:
    test_df[label] = confidences[label]

test_df

In [None]:
study_df = test_df.groupby(['StudyInstanceUID']).mean().reset_index()
study_df = study_df[['StudyInstanceUID', 'negative', 'typical', 'indeterminate', 'atypical']]
study_df

In [None]:
def get_PredictionString(row):
    string = ''
    for label in labels:
        conf =  row[label]
        string+=f'{label} {conf:0.5f} 0 0 1 1 '
    string = string.strip()
    return string

study_df['PredictionString'] = study_df.apply(get_PredictionString, axis=1)
study_df = study_df.drop(labels, axis=1)
study_df = study_df.rename(columns={
    'StudyInstanceUID': 'id'
})
study_df

### Image level

In [None]:
def convert_id(x):
    return x + "_image"
image_df = test_df[['image_id']].apply(convert_id)
image_df = image_df.rename(columns={
    'image_id': 'id'
})
image_df["PredictionString"] = ["none 1 0 0 1 1"] * test_df.shape[0]
image_df

### Submission

In [None]:
sub_df = pd.concat([study_df, image_df])
sub_df.to_csv('/kaggle/working/submission.csv',index=False)
print(sub_df.shape)
sub_df.head()

In [None]:
import shutil
shutil.rmtree('/kaggle/working/test')