In [1]:
import os

In [2]:
os.environ['KAGGLE_USERNAME'] = "prithvimk"
os.environ['KAGGLE_KEY'] = "c49355ce6abe30bd2cdf219e9fdf41d9"

In [3]:
!kaggle datasets download -d tawsifurrahman/covid19-radiography-database

In [4]:
!unzip covid19-radiography-database.zip

In [5]:
import glob
import random
import matplotlib.pyplot as plt
import cv2

In [6]:
covid = r'./COVID-19_Radiography_Dataset/COVID'
lung_opacity = r'./COVID-19_Radiography_Dataset/Lung_Opacity'
normal = r'./COVID-19_Radiography_Dataset/Normal'
viral_pneumonia = r'./COVID-19_Radiography_Dataset/Viral Pneumonia'

filepath_type = [covid+"/*.png",  lung_opacity+"/*.png", normal+"/*.png", viral_pneumonia+"/*.png"]
random_images = []

for path in filepath_type:
    images = glob.glob(path)
    #print(images)
    random_image_path = random.choice(images)
    random_images.append(random_image_path)

_, axs = plt.subplots(2, 2, figsize=(15,15))
axs = axs.flatten()
for img, ax in zip(random_images, axs):
    ax.title.set_text(img.split('/')[3])
    ax.imshow(cv2.imread(img))
plt.show()

In [7]:
num_of_images = [len(glob.glob(path)) for path in filepath_type]
plt.bar([f.split("/")[2] for f in filepath_type], num_of_images)
plt.show()

In [8]:
from os import makedirs

In [9]:
# create directories
dataset_home = './dataset_covid/'
subdirs = ['train/', 'test/']
for subdir in subdirs:
	# create label subdirectories
	labeldirs = ['covid/', 'lung_opacity/', 'normal/', 'viral_pneumonia/']
	for labldir in labeldirs:
		newdir = dataset_home + subdir + labldir
		makedirs(newdir, exist_ok=True)

In [10]:
from pathlib import Path
from shutil import copyfile
import os

In [11]:
# define ratio of pictures to use for validation
val_ratio = 0.25
# copy training dataset images into subdirectories
src_directories = [covid, lung_opacity, normal, viral_pneumonia]

for path in filepath_type:
    images = glob.glob(path)
    for image in images:
        # storing the path of image as 'src'
        src = Path(image)
        dst_dir = 'train/'
        if random.random() < val_ratio:
            dst_dir = 'test/'
        filename = os.path.basename(src)
        if filename.startswith('COVID'):
            dst = dataset_home + dst_dir + 'covid/' + filename
            copyfile(src, dst)
        elif filename.startswith('Lung_Opacity'):
            dst = dataset_home + dst_dir + 'lung_opacity/' + filename
            copyfile(src, dst)
        elif filename.startswith('Normal'):
            dst = dataset_home + dst_dir + 'normal/' + filename
            copyfile(src, dst)
        elif filename.startswith('Viral Pneumonia'):
            dst = dataset_home + dst_dir + 'viral_pneumonia/' + filename
            copyfile(src, dst)


In [12]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [13]:
input_shape = (256, 256)

In [14]:
test_datagen = ImageDataGenerator(rescale=1.0/255.0)

train_datagen = ImageDataGenerator(rescale=1.0/255.0,
                                   rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   fill_mode='nearest')

train_it = train_datagen.flow_from_directory('./dataset_covid/train/',
                                       class_mode='categorical',
                                       batch_size=64,
                                       target_size=input_shape)
test_it = test_datagen.flow_from_directory('./dataset_covid/test/',
                                       class_mode='categorical',
                                       batch_size=64,
                                       shuffle=False,
                                       target_size=input_shape)

In [15]:
from sklearn.utils import class_weight
import numpy as np

In [16]:
class_weights = class_weight.compute_class_weight(
                class_weight='balanced',
                classes=np.unique(train_it.classes), 
                y=train_it.classes)

cw_dict = dict(zip(np.unique(train_it.classes), class_weights))
cw_dict

In [17]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense,Conv2D,Flatten,MaxPooling2D, Dropout, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50

In [18]:
base_model = ResNet50(weights= None, include_top=False, input_shape= (input_shape[0], input_shape[1], 3))

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.7)(x)
predictions = Dense(4, activation= 'softmax')(x)
model = Model(inputs = base_model.input, outputs = predictions)

In [19]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

In [20]:
early_stop = EarlyStopping(monitor='val_loss', patience=8, verbose=1, min_delta=1e-4)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4, verbose=1, min_delta=1e-4)
callbacks_list = [early_stop, reduce_lr]
adam = Adam(learning_rate=0.0001)

model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

In [21]:
print(model.summary())

In [22]:
history = model.fit(train_it, validation_data=test_it, class_weight=cw_dict, epochs=100, callbacks=callbacks_list)

In [26]:
model.save('./CovidModelwithLRDecay_kaggle.h5')

In [27]:
from IPython.display import FileLink
FileLink(r'./CovidModelwithLRDecay.h5')