In [0]:
#https://arxiv.org/ftp/arxiv/papers/2003/2003.13145.pdf

In [3]:
#Setup Kaggle DIR; copy json file ; chmod
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!ls ~/.kaggle

kaggle.json


In [4]:

#Install kaggle packages
!pip install -q kaggle
!pip install -q kaggle-cli

[K     |████████████████████████████████| 81kB 5.6MB/s 
[K     |████████████████████████████████| 5.3MB 25.9MB/s 
[K     |████████████████████████████████| 112kB 59.4MB/s 
[K     |████████████████████████████████| 51kB 8.5MB/s 
[K     |████████████████████████████████| 112kB 58.8MB/s 
[?25h  Building wheel for kaggle-cli (setup.py) ... [?25l[?25hdone
  Building wheel for pyperclip (setup.py) ... [?25l[?25hdone


In [5]:
#!kaggle datasets list
!kaggle datasets list -s tawsifurrahman/covid19-radiography-database

ref                                          title                          size  lastUpdated          downloadCount  
-------------------------------------------  -----------------------------  ----  -------------------  -------------  
tawsifurrahman/covid19-radiography-database  COVID-19 Radiography Database   1GB  2020-03-28 23:36:00             96  


In [6]:
!kaggle datasets download -d tawsifurrahman/covid19-radiography-database

Downloading covid19-radiography-database.zip to /content
100% 1.15G/1.15G [00:20<00:00, 45.2MB/s]
100% 1.15G/1.15G [00:20<00:00, 60.1MB/s]


In [0]:
%%capture
!unzip covid19-radiography-database.zip

In [0]:
%%capture
!pip install efficientnet

In [0]:
import glob
import pandas as pd
import os
import numpy as np

In [0]:

covid=os.listdir('/content/COVID-19 Radiography Database/COVID-19')
normal=os.listdir('/content/COVID-19 Radiography Database/NORMAL')
pneumonia=os.listdir('/content/COVID-19 Radiography Database/Viral Pneumonia')


In [11]:
cov=list(zip(covid,['covid']*(len(covid))))
covid_df = pd.DataFrame(cov,columns=['file','label']) 
covid_df.head()

Unnamed: 0,file,label
0,COVID-19 (48).png,covid
1,COVID-19(178).png,covid
2,COVID-19(180).png,covid
3,COVID-19 (19).png,covid
4,COVID-19(155).png,covid


In [12]:
norm=list(zip(normal,['normal']*(len(normal))))
normal_df = pd.DataFrame(norm,columns=['file','label']) 
normal_df.head()

Unnamed: 0,file,label
0,NORMAL (110).png,normal
1,NORMAL (360).png,normal
2,NORMAL (131).png,normal
3,NORMAL (662).png,normal
4,NORMAL (1009).png,normal


In [13]:
pneumonia=list(zip(pneumonia,['pneumonia']*(len(pneumonia))*2))
pneumonia_df = pd.DataFrame(pneumonia,columns=['file','label']) 
pneumonia_df.head()

Unnamed: 0,file,label
0,Viral Pneumonia (1067).png,pneumonia
1,Viral Pneumonia (1094).png,pneumonia
2,Viral Pneumonia (1287).png,pneumonia
3,Viral Pneumonia (1218).png,pneumonia
4,Viral Pneumonia (876).png,pneumonia


In [14]:
dataframe=pd.concat([covid_df,normal_df,pneumonia_df])
dataframe.head()

Unnamed: 0,file,label
0,COVID-19 (48).png,covid
1,COVID-19(178).png,covid
2,COVID-19(180).png,covid
3,COVID-19 (19).png,covid
4,COVID-19(155).png,covid


In [15]:
df = pd.get_dummies(dataframe['label'])
df.head()

Unnamed: 0,covid,normal,pneumonia
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0


In [16]:
df = pd.concat([dataframe, df], axis=1)
df.head()

Unnamed: 0,file,label,covid,normal,pneumonia
0,COVID-19 (48).png,covid,1,0,0
1,COVID-19(178).png,covid,1,0,0
2,COVID-19(180).png,covid,1,0,0
3,COVID-19 (19).png,covid,1,0,0
4,COVID-19(155).png,covid,1,0,0


In [0]:
df=df.sample(frac=1)

In [0]:
!mkdir data

In [0]:
!cp -r /content/COVID-19\ Radiography\ Database/COVID-19/* data


In [0]:
!cp -r /content/COVID-19\ Radiography\ Database/NORMAL/* data


In [0]:
!cp -r /content/COVID-19\ Radiography\ Database/Viral\ Pneumonia/* data


In [0]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

In [0]:
from keras_preprocessing.image import ImageDataGenerator

data_gen= ImageDataGenerator(
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=90,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=.1,
    rescale=1/255,
    fill_mode='nearest',
    shear_range=0.1,
    brightness_range=[0.5, 1.5])

In [0]:
img_shape=300
batch_size=16

In [0]:
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense, Conv2D, MaxPool2D, BatchNormalization,Input,MaxPooling2D,GlobalMaxPooling2D,concatenate
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.callbacks import ModelCheckpoint,ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K
import tensorflow as tf

In [31]:
from sklearn.model_selection import KFold
import gc

from sklearn.metrics import classification_report,accuracy_score,f1_score
classification_reports=[]
kf = KFold(n_splits=5)
history=[]
accuracy=[]
f1_scores=[]
fold=0
for train_index, test_index in kf.split(df):
  train=df.iloc[train_index,:]
  val=df.iloc[test_index,:]
  print('-----------fold {}--------------'.format(fold))
  train_generator=data_gen.flow_from_dataframe(train,directory='data',
                                                      target_size=(img_shape,img_shape),
                                                      x_col="file",
                                                      y_col=['covid','normal','pneumonia'],
                                                      class_mode='raw',
                                                      shuffle=True,
                                                      batch_size=batch_size)

  val_generator=data_gen.flow_from_dataframe(val,directory='data',
                                                        target_size=(img_shape,img_shape),
                                                        x_col="file",
                                                        y_col=['covid','normal','pneumonia'],
                                                        class_mode='raw',
                                                        shuffle=False,
                                                        batch_size=batch_size)
  train_generator.next()[0].shape,train_generator.next()[1].shape
  import efficientnet.tfkeras as efn
  model =efn.EfficientNetB4(weights ='noisy-student', include_top=False, input_shape = (img_shape,img_shape,3))
    
  x = model.output
  x = GlobalAveragePooling2D()(x)
  x = Dropout(0.3)(x)
  x = Dense(128, activation="relu")(x)
  x = Dropout(0.3)(x)
  x = Dense(64, activation="relu")(x)
  predictions = Dense(3, activation="softmax")(x)
  model = Model(inputs=model.input, outputs=predictions)

  def custom_loss(y_true, y_pred):
      return tf.keras.losses.categorical_crossentropy(y_true, y_pred, label_smoothing=0.1)
  model.compile(optimizer='adam', loss=custom_loss, metrics=['accuracy'])

  results = model.fit(train_generator,epochs=15,
                              steps_per_epoch=train_generator.n/batch_size,
                              validation_data=val_generator,
                             validation_steps=val_generator.n/batch_size,
                              callbacks=[ReduceLROnPlateau(monitor='val_loss', factor=0.3,patience=2, min_lr=0.000001)])
  val_generator.reset()
  y_pred=model.predict(val_generator,steps=val_generator.n/batch_size,verbose=1)
  y_pred=y_pred.round().astype(int)
  y_true=val.iloc[:,2::]
  classification_reports.append(classification_report(y_true, y_pred,target_names=['covid','normal','pneumonia']))#
  accuracy.append(accuracy_score(y_true,y_pred))
  f1_scores.append(f1_score(y_true,y_pred,average='macro'))
  fold+=1
  del model
  tf.keras.backend.clear_session()
  gc.collect()
  

-----------fold 0--------------
Found 2324 validated image filenames.
Found 581 validated image filenames.
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


  _warn_prf(average, modifier, msg_start, len(result))


-----------fold 1--------------
Found 2324 validated image filenames.
Found 581 validated image filenames.
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
-----------fold 2--------------
Found 2324 validated image filenames.
Found 581 validated image filenames.
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
-----------fold 3--------------
Found 2324 validated image filenames.
Found 581 validated image filenames.
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
-----------fold 4--------------
Found 2324 validated image filenames.
Found 581 validated image filenames.
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoc

In [32]:
accuracy

[0.9759036144578314,
 0.9672977624784854,
 0.9845094664371773,
 0.9759036144578314,
 0.9621342512908778]

In [33]:
f1_scores

[0.9846841011524033,
 0.9697631088730342,
 0.9856448236696611,
 0.9762670447641546,
 0.9711372989150767]

In [34]:
np.mean(f1_scores)

0.9774992754748659

In [37]:
for i in range(5):
  print('--------------FOLD {}------------------'.format(i))
  print(classification_reports[i])

--------------FOLD 0------------------
              precision    recall  f1-score   support

       covid       1.00      1.00      1.00        35
      normal       0.98      0.98      0.98       276
   pneumonia       0.98      0.97      0.98       270

   micro avg       0.98      0.98      0.98       581
   macro avg       0.99      0.98      0.98       581
weighted avg       0.98      0.98      0.98       581
 samples avg       0.98      0.98      0.98       581

--------------FOLD 1------------------
              precision    recall  f1-score   support

       covid       0.98      0.96      0.97        54
      normal       0.96      0.98      0.97       267
   pneumonia       0.98      0.95      0.97       260

   micro avg       0.97      0.97      0.97       581
   macro avg       0.97      0.97      0.97       581
weighted avg       0.97      0.97      0.97       581
 samples avg       0.97      0.97      0.97       581

--------------FOLD 2------------------
             

In [0]:
print(classification_reports[0])