In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")
from sklearn.utils import shuffle
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator,load_img, img_to_array
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D,GlobalAveragePooling2D
from tensorflow.keras.layers import Activation, Dropout, BatchNormalization, Flatten, Dense, AvgPool2D,MaxPool2D
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
import tensorflow as tf
import os
import cv2
import shutil
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
df = pd.read_csv("../input/coronahack-chest-xraydataset/Chest_xray_Corona_Metadata.csv")
summary = pd.read_csv("../input/coronahack-chest-xraydataset/Chest_xray_Corona_dataset_Summary.csv")
df.head()

In [None]:
summary.head()

In [None]:
df.Label.value_counts()

In [None]:
df.Label_2_Virus_category.value_counts()

In [None]:
df.Label_1_Virus_category.value_counts()

In [None]:
df.Dataset_type.value_counts()

In [None]:
train_data = df[df['Dataset_type'] == 'TRAIN']
test_data = df[df['Dataset_type'] == 'TEST']

In [None]:
def create_directory():
    try:
        os.makedirs('../working/train/Pnemonia')
        os.makedirs('../working/train/Normal')
        os.makedirs('../working/test/Pnemonia')
        os.makedirs('../working/test/Normal')
    except:
        pass

In [None]:
test_data[test_data.Label == 'Pnemonia'].head()

In [None]:
train_data[train_data.Label == 'Pnemonia'].head()

In [None]:
create_directory()

In [None]:
#for train data

#COVID
train_pnemonia = '../working/train/Pnemonia/'
source_train = "../input/coronahack-chest-xraydataset/Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/train"
move_train_pnemonia = train_data[ train_data['Label'] == 'Pnemonia']['X_ray_image_name'].values
for i in move_train_pnemonia:
    path = os.path.join(source_train,i)
    shutil.copy(path,train_pnemonia)
    
#Normal
train_normal = '../working/train/Normal/'
move_train_normal = train_data[train_data.Label == 'Normal']['X_ray_image_name'].values
for i in move_train_normal:
    path = os.path.join(source_train,i)
    shutil.copy(path,train_normal)
    

In [None]:
#for test data

#COVID
test_pnemonia = '../working/test/Pnemonia/'
source_test = "../input/coronahack-chest-xraydataset/Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/test"
move_test_pnemonia = test_data[test_data['Label'] == 'Pnemonia']['X_ray_image_name'].values
                               
for i in move_test_pnemonia:
    
    path2 = os.path.join(source_test, i)
    shutil.copy(path2, test_pnemonia)

test_normal = '../working/test/Normal/'
move_test_normal = test_data[test_data.Label == 'Normal']['X_ray_image_name'].values
for i in move_test_normal:
    path3 = os.path.join(source_test, i)
    shutil.copy(path3, test_normal)

In [None]:
#load an image
img = image.load_img("../input/coronahack-chest-xraydataset/Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/train/1-s2.0-S0140673620303706-fx1_lrg.jpg")
plt.imshow(img)

In [None]:
#show the matrix of the image
cv2.imread("../input/coronahack-chest-xraydataset/Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/train/1-s2.0-S0140673620303706-fx1_lrg.jpg")

In [None]:
#see the shape of the image
cv2.imread("../input/coronahack-chest-xraydataset/Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/train/1-s2.0-S0140673620303706-fx1_lrg.jpg").shape

In [None]:
train_datagen = ImageDataGenerator(rescale = 1/255, rotation_range = 0.2, 
                              zoom_range=0.2,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   fill_mode='nearest',
                                   validation_split=0.2)
test_datagen = ImageDataGenerator(rescale=1/255) 

In [None]:
train_gen = train_datagen.flow_from_directory("../working/train/", target_size = (200,200),
                                             batch_size = 50, class_mode = 'binary', 
                                              subset= 'training')
valid_gen = train_datagen.flow_from_directory("../working/train/", target_size = (200,200),
                                             batch_size = 50, class_mode = 'binary', 
                                              subset= 'validation')
test_gen = test_datagen.flow_from_directory("../working/test/", target_size = (200,200),
                                             batch_size = 50, class_mode = 'binary')

In [None]:
train_gen.class_indices

In [None]:
model = tf.keras.models.Sequential([tf.keras.layers.Conv2D(16,(3,3), activation= 'relu',
                                                          input_shape= (200,200,3)),
                                   tf.keras.layers.MaxPool2D(2,2),
                                   tf.keras.layers.Conv2D(32,(3,3), activation= 'relu'),
                                    
                                   tf.keras.layers.MaxPool2D(2,2),
                                    tf.keras.layers.Conv2D(32,(3,3), activation= 'relu'),
                                   tf.keras.layers.MaxPool2D(2,2),
                                    tf.keras.layers.Conv2D(32,(3,3), activation= 'relu'),
                                   tf.keras.layers.MaxPool2D(2,2),
                                    
                                   tf.keras.layers.Flatten(),
                                   tf.keras.layers.Dense(1, activation = 'relu'),
                                   tf.keras.layers.Dense(1,activation = 'sigmoid')])
callbacks = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=1)
model.compile(optimizer=RMSprop(lr=0.0001),
              loss = 'binary_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_gen, validation_data = valid_gen, epochs = 10, 
                    callbacks = [callbacks], verbose = 1)

In [None]:
#test the model on test data

test_dataset = "../input/coronahack-chest-xraydataset/Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/test"
img2 =image.load_img("../input/coronahack-chest-xraydataset/Coronahack-Chest-XRay-Dataset/Coronahack-Chest-XRay-Dataset/test/IM-0001-0001.jpeg")
plt.imshow(img2)

    

In [None]:
#using model.predict as per keras rule to use >0.5 for binary classification
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
pred = (model.predict(test_gen)>0.5).astype("int32")

y_test = test_gen.labels
print('Classification report:\n', classification_report(y_test, pred))
print('Accuracy score:\n', accuracy_score(y_test, pred))

In [None]:
#model.predict_classes() only for sequential model
pred_class= model.predict_classes(test_gen)
print('Classification report:\n', classification_report(y_test, pred_class))
print('Accuracy Score:\n', accuracy_score(y_test, pred_class))

In [None]:
#using ResNet50
INPUT_SHAPE = (200,200,3) 

#get the pretrained model
base_model = tf.keras.applications.ResNet50(input_shape= INPUT_SHAPE,
                                               include_top=False,
                                               weights='imagenet')
base_model.trainable = False
base_model.summary()

In [None]:
model2 = Sequential()
model2.add(base_model)
model2.add(GlobalAveragePooling2D())
model2.add(Dense(128))
model2.add(Dropout(0.2))
model2.add(Dense(1, activation = 'sigmoid'))
model2.summary()

In [None]:

callbacks2 = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2)

model2.compile(optimizer='adam',
              loss = 'binary_crossentropy',
              metrics=['accuracy'])

In [None]:
model2.fit(train_gen, validation_data = valid_gen, epochs = 10, 
                    callbacks = [callbacks2], verbose = 1) 

In [None]:
#using model.predict as per keras rule to use >0.5 for binary classification
predictions = (model2.predict(test_gen)>0.5).astype("int32")

y_test2 = test_gen.labels
print('Classification report:\n', classification_report(y_test2, predictions))
print('Accuracy score:\n', accuracy_score(y_test2, predictions))