In [None]:
!conda install '/kaggle/input/pydicom-conda-helper/libjpeg-turbo-2.1.0-h7f98852_0.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/libgcc-ng-9.3.0-h2828fa1_19.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/gdcm-2.8.9-py37h500ead1_1.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/conda-4.10.1-py37h89c1867_0.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/certifi-2020.12.5-py37h89c1867_1.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-conda-helper/openssl-1.1.1k-h7f98852_0.tar.bz2' -c conda-forge -y

In [None]:
import sys
sys.path.append('/kaggle/input/efficientnet-keras-dataset/efficientnet_kaggle')
! pip install -e /kaggle/input/efficientnet-keras-dataset/efficientnet_kaggle -q

# Packages

In [None]:
import os
from glob import glob
import shutil
from tqdm.notebook import tqdm
tqdm.pandas()
import efficientnet.tfkeras as efn
import numpy as np
import pandas as pd
import tensorflow as tf
import math

In [None]:
debug=False
IMG_SIZES = [[512, 512]]
DIM=IMG_SIZES[0]
TTA = 3
dim = DIM[0]
aspect_ratio = False
class_labels = ['0', '1', '2', '3']

sat  = (0.7, 1.3)
cont = (0.8, 1.2)
bri  =  0.1
ROT_    = 0.0
SHR_    = 2.0
HZOOM_  = 8.0
WZOOM_  = 8.0
HSHIFT_ = 8.0
WSHIFT_ = 8.0

# Commit or Not

In [None]:
filepaths = glob('/kaggle/input/siim-covid19-detection/test/**/*dcm',recursive=True)
test_df = pd.DataFrame({'filepath':filepaths,})
test_df['image_id'] = test_df.filepath.map(lambda x: x.split('/')[-1].replace('.dcm', '')+'_image')
test_df['study_id'] = test_df.filepath.map(lambda x: x.split('/')[-3].replace('.dcm', '')+'_study')
test_df.head()

In [None]:
os.makedirs('/kaggle/working/test', exist_ok = True)

In [None]:
import numpy as np
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import cv2

import matplotlib.pyplot as plt
%matplotlib inline

# credit @raddar
def read_xray(path, voi_lut = True, fix_monochrome = True):
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

def resize_and_save(file_path):
    split = 'train' if 'train' in file_path else 'test'
    base_dir = f'/kaggle/working/{split}'
    img = read_xray(file_path)
    h, w = img.shape[:2]  # orig hw
    if aspect_ratio:
        r = dim / max(h, w)  # resize image to img_size
        interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR
        if r != 1:  # always resize down, only resize up if training with augmentation
            img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=interp)
    else:
        img = cv2.resize(img, (dim, dim), cv2.INTER_AREA)
    filename = file_path.split('/')[-1].split('.')[0]
    cv2.imwrite(os.path.join(base_dir, f'{filename}.jpg'), img)
    return filename.replace('dcm','')+'_image',w, h


In [None]:
filepaths = test_df.filepath.iloc[:100 if debug else test_df.shape[0]]
info = []
for filepath in tqdm(filepaths):
    info.append(resize_and_save(filepath))

In [None]:
image_id, width, height = list(zip(*info))
df = pd.DataFrame({'image_id':image_id,
                   'width':width,
                   'height':height})
df['image_path'] = '/kaggle/working/test/'+df.image_id.map(lambda x: x.replace('_image',''))+'.jpg'
test_df = pd.merge(test_df, df, on = 'image_id', how = 'left')
test_df.loc[:,class_labels] = 0
test_df.head()

In [None]:
import tensorflow.keras.backend as K
import math
def get_mat(rotation, shear, height_zoom, width_zoom, height_shift, width_shift):
    # returns 3x3 transformmatrix which transforms indicies
        
    # CONVERT DEGREES TO RADIANS
    rotation = math.pi * rotation / 180.
    shear    = math.pi * shear    / 180.

    def get_3x3_mat(lst):
        return tf.reshape(tf.concat([lst],axis=0), [3,3])
    
    # ROTATION MATRIX
    c1   = tf.math.cos(rotation)
    s1   = tf.math.sin(rotation)
    one  = tf.constant([1],dtype='float32')
    zero = tf.constant([0],dtype='float32')
    
    rotation_matrix = get_3x3_mat([c1,   s1,   zero, 
                                   -s1,  c1,   zero, 
                                   zero, zero, one])    
    # SHEAR MATRIX
    c2 = tf.math.cos(shear)
    s2 = tf.math.sin(shear)    
    
    shear_matrix = get_3x3_mat([one,  s2,   zero, 
                                zero, c2,   zero, 
                                zero, zero, one])        
    # ZOOM MATRIX
    zoom_matrix = get_3x3_mat([one/height_zoom, zero,           zero, 
                               zero,            one/width_zoom, zero, 
                               zero,            zero,           one])    
    # SHIFT MATRIX
    shift_matrix = get_3x3_mat([one,  zero, height_shift, 
                                zero, one,  width_shift, 
                                zero, zero, one])
    
    return K.dot(K.dot(rotation_matrix, shear_matrix), 
                 K.dot(zoom_matrix,     shift_matrix))


def transform(image, DIM=IMG_SIZES[0]):    
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image randomly rotated, sheared, zoomed, and shifted
    
    # fixed for non-square image thanks to Chris Deotte
    
    if DIM[0]!=DIM[1]:
        pad = (DIM[0]-DIM[1])//2
        image = tf.pad(image, [[0, 0], [pad, pad+1],[0, 0]])
        
    NEW_DIM = DIM[0]
    
    XDIM = NEW_DIM%2 #fix for size 331
    
    rot = ROT_ * tf.random.normal([1], dtype='float32')
    shr = SHR_ * tf.random.normal([1], dtype='float32') 
    h_zoom = 1.0 + tf.random.normal([1], dtype='float32') / HZOOM_
    w_zoom = 1.0 + tf.random.normal([1], dtype='float32') / WZOOM_
    h_shift = HSHIFT_ * tf.random.normal([1], dtype='float32') 
    w_shift = WSHIFT_ * tf.random.normal([1], dtype='float32') 

    # GET TRANSFORMATION MATRIX
    m = get_mat(rot,shr,h_zoom,w_zoom,h_shift,w_shift) 

    # LIST DESTINATION PIXEL INDICES
    x   = tf.repeat(tf.range(NEW_DIM//2, -NEW_DIM//2,-1), NEW_DIM)
    y   = tf.tile(tf.range(-NEW_DIM//2, NEW_DIM//2), [NEW_DIM])
    z   = tf.ones([NEW_DIM*NEW_DIM], dtype='int32')
    idx = tf.stack( [x,y,z] )
    
    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    idx2 = K.dot(m, tf.cast(idx, dtype='float32'))
    idx2 = K.cast(idx2, dtype='int32')
    idx2 = K.clip(idx2, -NEW_DIM//2+XDIM+1, NEW_DIM//2)
    
    # FIND ORIGIN PIXEL VALUES           
    idx3 = tf.stack([NEW_DIM//2-idx2[0,], NEW_DIM//2-1+idx2[1,]])
    d    = tf.gather_nd(image, tf.transpose(idx3))
    
    if DIM[0]!=DIM[1]:
        image = tf.reshape(d,[NEW_DIM, NEW_DIM,3])
        image = image[:, pad:DIM[1]+pad,:]
    image = tf.reshape(image, [*DIM, 3])
        
    return image

In [None]:
def auto_select_accelerator():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    
    return strategy


def build_decoder(with_labels=True, target_size=(300, 300), ext='jpg'):
    def decode(path):
        file_bytes = tf.io.read_file(path)
        if ext == 'png':
            img = tf.image.decode_png(file_bytes, channels=3)
        elif ext in ['jpg', 'jpeg']:
            img = tf.image.decode_jpeg(file_bytes, channels=3)
        else:
            raise ValueError("Image extension not supported")

        img = tf.cast(img, tf.float32)
        img = tf.image.resize(img, target_size, method='area')
#         img = tf.image.resize(img, target_size)
        img = img/255.0

        return img
    
    def decode_with_labels(path, label):
        return decode(path), label
    
    return decode_with_labels if with_labels else decode


def build_augmenter(with_labels=True):
    def augment(img):
        img = transform(img, DIM = DIM)
        img = tf.image.random_flip_left_right(img)
#         img = tf.image.random_flip_up_down(img)
        img = tf.image.random_saturation(img, sat[0], sat[1])
        img = tf.image.random_contrast(img, cont[0], cont[1])
        img = tf.image.random_brightness(img, bri)
        return img
    
    def augment_with_labels(img, label):
        return augment(img), label
    
    return augment_with_labels if with_labels else augment


def build_dataset(paths, labels=None, bsize=32, cache=True,
                  decode_fn=None, augment_fn=None,
                  augment=True, repeat=True, shuffle=1024, 
                  cache_dir=""):
    if cache_dir != "" and cache is True:
        os.makedirs(cache_dir, exist_ok=True)
    
    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)
    
    if augment_fn is None:
        augment_fn = build_augmenter(labels is not None)
    
    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)
    
    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls=AUTO)
    dset = dset.cache(cache_dir) if cache else dset
    dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
    dset = dset.repeat() if repeat else dset
    dset = dset.shuffle(shuffle) if shuffle else dset
    dset = dset.batch(bsize).prefetch(AUTO)
    
    return dset

In [None]:
strategy = auto_select_accelerator()
BATCH_SIZE = strategy.num_replicas_in_sync * 32

# DataLoader

In [None]:
# IMSIZE = (256, 384, 512, 640, 768, 1024)

test_paths = test_df.image_path.iloc[:100 if debug else test_df.shape[0]]
# Get the multi-labels
# label_cols = sub_df.columns[1:]
test_decoder = build_decoder(with_labels=False, target_size=DIM)
dtest = build_dataset(
    test_paths, bsize=BATCH_SIZE, repeat=True, 
    shuffle=False, augment=True, cache=False,
    decode_fn=test_decoder
)

## Load model and submit

In [None]:
EFNS = [efn.EfficientNetB0, efn.EfficientNetB1, efn.EfficientNetB2, efn.EfficientNetB3, 
        efn.EfficientNetB4, efn.EfficientNetB5, efn.EfficientNetB6, efn.EfficientNetB7]

def build_model(dim=IMG_SIZES[0], ef=0):
    inp = tf.keras.layers.Input(shape=(*dim,3))
    base = EFNS[ef](input_shape=(*dim,3),weights='imagenet',include_top=False)
    x = base(inp)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(64, activation = 'relu')(x)
    x = tf.keras.layers.Dense(4,activation='softmax')(x)
    model = tf.keras.Model(inputs=inp,outputs=x)
#     opt = tf.keras.optimizers.Adam(learning_rate=0.001)
#     loss = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.01) 
# #     acc = tf.keras.metrics.CategoricalAccuracy()
# #     f1  = tfa.metrics.F1Score(num_classes=4,average='macro',threshold=None)
#     model.compile(optimizer=opt,loss=loss,metrics=['AUC', acc, f1])
    return model

In [None]:
base_dir = '/kaggle/input/siim-covid-19-study-level-train-tpu'
model_paths = sorted(glob(os.path.join(base_dir, '*h5')))# preds = np.zeros((count_data_items(files_test),1))
preds=[]
model = build_model(dim=IMG_SIZES[0], ef=7)
for fold, model_path in enumerate(tqdm(model_paths)):
    print(f'Fold: {fold+1}')
    with strategy.scope():
        print('Loading Model...')
        #model = tf.keras.models.load_model(model_path, compile=False)
        model.load_weights(model_path)
    print('Predicting...')
    pred = model.predict(dtest, steps = TTA*len(test_paths)/BATCH_SIZE, verbose=1)[:TTA*len(test_paths),:]
    pred = np.mean(pred.reshape(TTA, len(test_paths), -1), axis=0)
    preds.append(pred)
preds = np.mean(preds, axis=0)

# Process Prediction

In [None]:
name2label = { 
    'negative': 0,
    'indeterminate': 1,
    'atypical': 2,
    'typical': 3}
label2name  = {v:k for k, v in name2label.items()}

In [None]:
test_df.loc[:99 if debug else test_df.shape[0],class_labels] = preds
study_df = test_df.groupby(['study_id'])[class_labels].mean().reset_index()
study_df.rename(columns={'study_id':'id'}, inplace=True)
study_df.head()

In [None]:
def get_PredictionString(row, thr=0):
    string = ''
    for idx in range(4):
        conf =  row[str(idx)]
        if conf>thr:
            string+=f'{label2name[idx]} {conf:0.2f} 0 0 1 1 '
    string = string.strip()
    return string

In [None]:
study_df['PredictionString'] = study_df.progress_apply(get_PredictionString, axis=1)
study_df = study_df.drop(class_labels, axis=1)
study_df.head()

In [None]:
image_df = pd.DataFrame({'id':test_df.image_id.tolist(),
                         'PredictionString':["none 1 0 0 1 1"]*len(test_df.image_id.tolist())})
image_df.head()

In [None]:
sub_df = pd.concat([study_df, image_df])
sub_df.to_csv('/kaggle/working/submission.csv',index=False)
print(sub_df.shape)
sub_df.head()

In [None]:
import shutil
shutil.rmtree('/kaggle/working/test')