# Imports

In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
from glob import glob
from tqdm.notebook import tqdm
from ast import literal_eval
import os
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams.update({'font.size': 22})
from sklearn.metrics import accuracy_score
from skimage import exposure
from tensorflow.keras import layers
from tensorflow.keras.utils import plot_model
import cv2
from matplotlib.patches import Rectangle
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50, DenseNet121, Xception
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout,GlobalMaxPooling2D, GlobalAveragePooling2D, BatchNormalization, LeakyReLU, InputLayer
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import models
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
import tensorflow.keras.backend as K
from tensorflow.math import confusion_matrix

# Data manipulations

In [None]:
df_image = pd.read_csv('../input/siim-covid19-detection/train_image_level.csv')
display(df_image.head(3))
print(df_image.shape)

In [None]:
df_study = pd.read_csv('../input/siim-covid19-detection/train_study_level.csv')
display(df_study.head(3))
print(df_study.shape)

In [None]:
df_sampleSub = pd.read_csv('../input/siim-covid19-detection/sample_submission.csv')
display(df_sampleSub.head(3))
print(df_sampleSub.shape)

In [None]:
df_study['id'] = df_study['id'].str.replace('_study',"")
df_study.rename({'id': 'StudyInstanceUID'},axis=1, inplace=True)
df_study.head(3)

In [None]:
df_train = df_image.merge(df_study, on='StudyInstanceUID')
df_train.head(3)

In [None]:
# train_dir_jpg = '../input/covid-jpg-512/train'
# train_dir_origin ='../input/siim-covid19-detection/train'
# paths_original = []
# paths_jpg = []
# for _, row in tqdm(df_train.iterrows()):
#     image_id = row['id'].split('_')[0]
#     study_id = row['StudyInstanceUID']
#     image_path_jpg = glob(f'{train_dir_jpg}/{image_id}.jpg')
#     image_path_original = glob(f'{train_dir_origin}/{study_id}/*/{image_id}.dcm')
#     paths_jpg.append(image_path_jpg)
#     paths_original.append(image_path_original)

In [None]:
# df_train['path'] = paths_jpg
# df_train['origin'] = paths_original
# df_train.head(3)

In [None]:
df_train.loc[df_train['Negative for Pneumonia']==1, 'study_label'] = 'negative'
df_train.loc[df_train['Typical Appearance']==1, 'study_label'] = 'typical'
df_train.loc[df_train['Indeterminate Appearance']==1, 'study_label'] = 'indeterminate'
df_train.loc[df_train['Atypical Appearance']==1, 'study_label'] = 'atypical'
df_train.drop(['Negative for Pneumonia','Typical Appearance', 'Indeterminate Appearance', 'Atypical Appearance'], axis=1, inplace=True)
df_train['id'] = df_train['id'].str.replace('_image', '.jpg')
df_train['image_label'] = df_train['label'].str.split().apply(lambda x : x[0])
df_train.head(3)

In [None]:
df_size = pd.read_csv('../input/covid-jpg-512/size.csv')
df_size.head(3)

In [None]:
df_train = df_train.merge(df_size, on='id')
df_train.head(3)

# Visualization

In [None]:
n = 20
train_dir = '../input/covid-jpg-512/train'
fig, axs = plt.subplots(4, 5, figsize=(20,20))
fig.subplots_adjust(hspace=.2, wspace=.2)
axs = axs.ravel()
for i in range(n):
    img = cv2.imread(os.path.join(train_dir, df_train['id'][i]))
    axs[i].imshow(img)
    if type(df_train['boxes'][i])==str:
        boxes = literal_eval(df_train['boxes'][i])
        for box in boxes:
            axs[i].add_patch(Rectangle((box['x']*(512/df_train['dim1'][i]), box['y']*(512/df_train['dim0'][i])), box['width']*(512/df_train['dim1'][i]), box['height']*(512/df_train['dim0'][i]), fill=0, color='y', linewidth=2))
            axs[i].set_title(df_train['study_label'][i])
    else:
        axs[i].set_title(df_train['study_label'][i])

# PreProcessing

In [None]:
def preprocess_image(img):
    equ_img = exposure.equalize_hist(img)
    return equ_img

im= cv2.imread('../input/covid-jpg-512/train/007cf31356c6.jpg')
im2 = preprocess_image(im)
res = np.concatenate((im/255, im2), axis=1)
plt.imshow(res)
plt.show()

# ImageGenerators and Augmentations

In [None]:
img_size = 299
batch_size = 16

image_generator = ImageDataGenerator(
        validation_split=0.2,
        #rotation_range=20,
        horizontal_flip = True,
        zoom_range = 0.1,
        #shear_range = 0.1,
        brightness_range = [0.8, 1.1],
        fill_mode='nearest',
        preprocessing_function=preprocess_image
)

image_generator_valid = ImageDataGenerator(validation_split=0.2,preprocessing_function=preprocess_image)

train_generator = image_generator.flow_from_dataframe(
        dataframe = df_train,
        directory='../input/covid-jpg-512/train',
        x_col = 'id',
        y_col =  'study_label',  
        target_size=(img_size, img_size),
        batch_size=batch_size,
        subset='training', seed = 23) 

valid_generator=image_generator_valid.flow_from_dataframe(
    dataframe = df_train,
    directory='../input/covid-jpg-512/train',
    x_col = 'id',
    y_col = 'study_label',
    target_size=(img_size, img_size),
    batch_size=batch_size,
    subset='validation', shuffle=False,  seed=23) 

In [None]:
for j in range(2):
    aug_images = [train_generator[0][0][j] for i in range(5)]
    fig, axes = plt.subplots(1, 5, figsize=(24,24))
    axes = axes.flatten()
    for img, ax in zip(aug_images, axes):
        ax.imshow(img)
        ax.axis('off')
plt.tight_layout()
plt.show()

# Architecture

In [None]:
pre_model = Xception(weights='imagenet', 
                  include_top = False, 
                  input_shape=(img_size, img_size, 3))
pre_model.trainable=False

In [None]:
x = pre_model.output
x = GlobalMaxPooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.2)(x)
output = Dense(4, activation='softmax')(x)
model = models.Model(pre_model.input, output)
model.summary()

In [None]:
plot_model(model)

In [None]:
# def conv_block(f, size=3, stride=1):
#     return models.Sequential([
#         Conv2D(f, kernel_size = size, strides=stride, use_bias=False), 
#         BatchNormalization(),
#         LeakyReLU(alpha=0.1) 
#     ])
# def triple_conv(f):
#     return models.Sequential([
#         conv_block(f),
#         conv_block(f, size=1),  
#         conv_block(f)
#     ])
# def maxpool():
#     return MaxPooling2D(2, strides=2)
# model = models.Sequential ([
#     InputLayer(input_shape=(299, 299, 3)),
#     conv_block(8),
#     maxpool(),
#     conv_block(16),
#     maxpool(),
#     triple_conv(32),
#     maxpool(),
#     triple_conv(64),
#     maxpool(),
#     triple_conv(256),
#     conv_block(128, size=1),
#     conv_block(128),
#     conv_block(256),
#     Flatten(),
#     Dense(4, 'softmax')
# ])

model.summary()

In [None]:
model.compile(Adam(lr=1e-3),loss='categorical_crossentropy',metrics='categorical_accuracy')

In [None]:
rlr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.1, patience = 2, verbose = 1, 
                                min_delta = 1e-4, min_lr = 1e-6, mode = 'min')
es = EarlyStopping(monitor = 'val_loss', min_delta = 1e-4, patience = 5, mode = 'min', 
                    restore_best_weights = True, verbose = 1)
ckp = ModelCheckpoint('model.h5',monitor = 'val_loss',
                      verbose = 0, save_best_only = True, mode = 'min')
history = model.fit(
      train_generator,
      epochs=30,
      validation_data=valid_generator,
      callbacks=[es, rlr, ckp],
      verbose=1)

K.clear_session()

In [None]:
actual =  valid_generator.labels
preds = np.argmax(model.predict(valid_generator), axis=1)
cfmx = confusion_matrix(actual, preds)
acc = accuracy_score(actual, preds)
print ('Test Accuracy:', acc )
print('Confusion matrix:', cfmx)

In [None]:
hist = pd.DataFrame(history.history)
fig, (ax1, ax2) = plt.subplots(figsize=(12,12),nrows=2, ncols=1)
hist['loss'].plot(ax=ax1,c='k',label='training loss')
hist['val_loss'].plot(ax=ax1,c='r',linestyle='--', label='validation loss')
ax1.legend()
hist['categorical_accuracy'].plot(ax=ax2,c='k',label='training accuracy')
hist['val_categorical_accuracy'].plot(ax=ax2,c='r',linestyle='--',label='validation accuracy')
ax2.legend()
plt.show()