In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Activation,Dropout,Conv2D, MaxPooling2D,BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras import regularizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import pandas as pd
import time
import glob
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import os
import cv2
import shutil
from pandas import DataFrame
from sklearn.metrics import confusion_matrix, classification_report
pd.set_option('display.width', 150)

In [None]:
covid_dir='../input/covid19-radiography-dataset/COVID-19_Radiography_Dataset/COVID-19_Radiography_Dataset/COVID'
lung_opacity_dir='../input/covid19-radiography-dataset/COVID-19_Radiography_Dataset/COVID-19_Radiography_Dataset/Lung_Opacity'
normal_dir='../input/covid19-radiography-dataset/COVID-19_Radiography_Dataset/COVID-19_Radiography_Dataset/Normal'
pneumonia_dir='../input/covid19-radiography-dataset/COVID-19_Radiography_Dataset/COVID-19_Radiography_Dataset/Viral Pneumonia'
dirlist=[covid_dir, lung_opacity_dir, normal_dir, pneumonia_dir]
classes=['covid', 'lung_opacity', 'normal', 'pneumonia']
filepaths=[]
labels=[]
for d,c in zip(dirlist, classes):
    flist=os.listdir(d)
    for f in flist:
        fpath=os.path.join (d,f)
        filepaths.append(fpath)
        labels.append(c)
print ('filepaths: ', len(filepaths), '   labels: ', len(labels))

In [None]:
Fseries=pd.Series(filepaths, name='file_paths')
Lseries=pd.Series(labels, name='labels')
df=pd.concat([Fseries,Lseries], axis=1)
df=DataFrame(np.array(df).reshape(21165,2), columns = ['file_paths', 'labels'])
print(df['labels'].value_counts())

In [None]:
df.head()

**Dataset is Inbalanced. We want to detect Covid so we add images for pneumonia from another dataset and remove images from lung opacity and normal**

In [None]:
normal_count = 10192
lung_opacity_count = 6012
covid_count = 3616
normal_image_max_index = (df.labels.values == 'normal').argmax()
print(normal_image_max_index)
lung_opacity_max_index = (df.labels.values == 'lung_opacity').argmax()
print(lung_opacity_max_index)

In [None]:
for i in range(normal_count - covid_count):
    df = df.drop([normal_image_max_index + i])
    
for n in range(lung_opacity_count - covid_count):
    df = df.drop([lung_opacity_max_index + n])

df['labels'].value_counts()

In [None]:
filepaths=[]
labels=[]
for file in glob.glob('../input/chest-xray-pneumonia/chest_xray/train/PNEUMONIA/*.jpeg'):
    filepaths.append(file)
    labels.append('pneumonia')
print ('filepaths: ', len(filepaths), '   labels: ', len(labels))

In [None]:
fseries = pd.Series(filepaths, name='file_name', dtype='str')
lseries = pd.Series(labels, name='label', dtype='str')
extra_df = pd.concat([fseries, lseries], axis=1)
extra_df = DataFrame(np.array(extra_df).reshape(3875,2), columns = ['file_paths', 'labels'])
extra_df.head()

In [None]:
df=pd.concat([df,extra_df], axis=0)
df=df.reset_index()

In [None]:
df['labels'].value_counts()
df.head()

In [None]:
pneumonia_count=5220
pneumonia_max_index=(df.labels.values == 'pneumonia').argmax()
print(pneumonia_max_index)
for i in range(pneumonia_count - covid_count):
    df = df.drop([pneumonia_max_index + i])

In [None]:
print(df['labels'].value_counts())
df.head()

In [None]:
target_size=(299,299)
batch_size=64

In [None]:
train_datagen = ImageDataGenerator(rotation_range=20, zoom_range=0.2, preprocessing_function=tf.keras.applications.inception_resnet_v2.preprocess_input, validation_split=0.1)
test_datagen = ImageDataGenerator(preprocessing_function=tf.keras.applications.inception_resnet_v2.preprocess_input)
train_df, test_df = train_test_split(df, train_size=0.95, shuffle=True)
train_set = train_datagen.flow_from_dataframe(train_df, x_col='file_paths', y_col='labels', target_size=target_size, batch_size=batch_size, color_mode='rgb', shuffle=True, class_mode='categorical', subset='training')
valid_set = train_datagen.flow_from_dataframe(train_df, x_col='file_paths', y_col='labels', target_size=target_size, batch_size=batch_size, color_mode='rgb', shuffle=True, class_mode='categorical', subset='validation')
test_set = test_datagen.flow_from_dataframe(test_df, x_col='file_paths', y_col='labels', target_size=target_size, batch_size=batch_size, color_mode='rgb', shuffle=True, class_mode='categorical')
test_x, test_y = next(test_set)

In [None]:
base_model = tf.keras.applications.InceptionResNetV2(include_top=False, input_shape=(299,299,3))

In [None]:
base_model.summary()

In [None]:
model = tf.keras.Sequential([
    base_model, tf.keras.layers.GlobalAveragePooling2D(), tf.keras.layers.Dense(256, activation='relu'), tf.keras.layers.BatchNormalization(), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(4, activation='softmax')
])
lr=0.001
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=lr), metrics=['accuracy'])

In [None]:
patience = 1
stop_patience = 5
factor = 0.5

callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=stop_patience, monitor='val_loss', verbose=1, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=factor, patience=patience, verbose=1)
]

In [None]:
epochs = 20
history = model.fit(train_set, validation_data=valid_set, epochs=epochs, callbacks=callbacks, verbose=1)

In [None]:
model.evaluate(test_set)