# Installing dependencies

In [0]:
! pip install -q kaggle

In [0]:
from google.colab import files

files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"pimonteiro","key":"9cc54ee38fc47b5b0f1048204170263a"}'}

In [0]:
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets list

In [0]:
! pip install livelossplot

# Preparation

## Prepare Covid-Net dataset

In [1]:
!unzip drive/My\ Drive/data.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: data/train/f2f3fb4b-6060-426a-81d2-b84c1ec588bd.png  
  inflating: data/train/f2f6de7e-9fbe-4016-9c43-7031e6382baf.png  
  inflating: data/train/f30e0c38-d83e-4ce4-bfcd-c72211d1fd56.png  
  inflating: data/train/f316fb8c-af2d-42ec-aba0-6761643a7c3e.png  
  inflating: data/train/f32a14a8-be06-42f0-b757-984c98f7e556.png  
  inflating: data/train/f32bd2b1-9ed2-454b-9c4d-edd8e25b6fc7.png  
  inflating: data/train/f33e64a8-868a-4dbf-a88e-105db80d5550.png  
  inflating: data/train/f33f9fff-1560-4ee7-8d8e-35de9bf457f2.png  
  inflating: data/train/f34ab641-fbf5-450e-a8d6-40f8095fdcb4.png  
  inflating: data/train/f35df256-c4bf-4767-b9ab-bdf429af9903.png  
  inflating: data/train/f363306d-1c02-4182-9118-17e28d66d265.png  
  inflating: data/train/f36bbc73-8624-4599-82b2-4c3f882e117e.png  
  inflating: data/train/f3700f5c-2540-4b61-aa4c-1ea915725b37.png  
  inflating: data/train/f37f1b3d-eeb0-4a82-837e-8246e400e810.png

## Kaggle Chest Xray-pneumonia

In [0]:
! kaggle datasets download -d paultimothymooney/chest-xray-pneumonia

Downloading chest-xray-pneumonia.zip to /content
100% 2.28G/2.29G [00:23<00:00, 52.9MB/s]
100% 2.29G/2.29G [00:23<00:00, 105MB/s] 


In [0]:
! unzip chest-xray-pneumonia.zip

# Developing model

## Importing data

In [4]:
import pandas as pd
from keras.utils import to_categorical
import numpy as np

np.random.seed(7)

train_data = pd.read_csv('drive/My Drive/train_split_v3.txt', header=None, sep=' ', names=['id','image', 'result', 'type1', 'type2'])
print(train_data.head())


test_data = pd.read_csv('drive/My Drive/test_split_v3.txt', header=None, sep=' ', names=['id','image', 'result', 'type1', 'type2'])
test_data.head(1)

image_height = 224 #150
image_width = 224 #150
batch_size = 32 #10
no_of_epochs  = 40 #10

  id                                              image     result type1 type2
0  3  SARS-10.1148rg.242035193-g04mr34g0-Fig8a-day0....  pneumonia    PA   NaN
1  3  SARS-10.1148rg.242035193-g04mr34g0-Fig8b-day5....  pneumonia    PA   NaN
2  3  SARS-10.1148rg.242035193-g04mr34g0-Fig8c-day10...  pneumonia    PA   NaN
3  7  SARS-10.1148rg.242035193-g04mr34g04a-Fig4a-day...  pneumonia    PA   NaN
4  7  SARS-10.1148rg.242035193-g04mr34g04b-Fig4b-day...  pneumonia    PA   NaN


In [7]:
from keras.preprocessing.image import ImageDataGenerator

train_generator = ImageDataGenerator(
    rescale=1/255,
    rotation_range=15,
    shear_range=0.2,
    zoom_range=0.2
)

test_generator = ImageDataGenerator(
    rescale=1/255
)

train_set = train_generator.flow_from_dataframe(
    dataframe=train_data,
    x_col='image',
    directory="data/train/",
    y_col='result',
    shuffle=True,
    target_size=(image_width, image_height),
    batch_size = batch_size,
    class_mode='categorical'
)

test_set = test_generator.flow_from_dataframe(
    dataframe=test_data,
    x_col='image',
    directory="data/test/",
    y_col='result',
    shuffle=True,
    target_size=(image_width, image_height),
    batch_size = batch_size,
    class_mode='categorical'
)


Found 13640 validated image filenames belonging to 3 classes.
Found 1510 validated image filenames belonging to 3 classes.


## Analysing Data

In [0]:
import seaborn as sns
import cv2

f,ax = plt.subplots(2,3,figsize=(15,9))
samples_normal = train_data[train_data['result'] == 'normal']['image']

for i in range(0,6):
    im = cv2.imread(PATH+'/train/' + samples_normal.iloc[i])
    ax[i//3,i%3].imshow(im)
    ax[i//3,i%3].axis('off')
f.suptitle('Pulmões Normais')
plt.show()

In [0]:
sns.set_style('whitegrid')
grafico=sns.barplot(x=['Treino-Normal','Treino-Pneumonia','Treino-Covid','Teste-Normal','Teste-Pneumonia','Teste-Covid'],
                    y=[train_data[train_data['result'] == 'normal'].shape[0],
                       train_data[train_data['result'] == 'pneumonia'].shape[0],
                       train_data[train_data['result'] == 'COVID-19'].shape[0],
                       train_data[test_data['result'] == 'normal'].shape[0],
                       train_data[test_data['result'] == 'pneumonia'].shape[0],
                       train_data[test_data['result'] == 'COVID-19'].shape[0],
                    ]) 
grafico.set_title('Data Set')
plt.show()

## Importing pre-trained model

In [0]:
import tensorflow as tf
import numpy as np
import pandas as pd
import tensorflow.keras as keras
import matplotlib.pyplot as plt


model = keras.applications.VGG16(weights='imagenet',include_top=True)
model.summary()

In [0]:
target_size = max(model.layers[0].input_shape)
print(target_size)

(None, 224, 224, 3)


## Creating from scratch

In [0]:
from keras.layers import Conv2D, Dropout, MaxPooling2D, Flatten, Dense
from keras.models import Sequential

model = Sequential()
model.add(Conv2D(32,(3,3),input_shape=(image_height,image_width,3),activation='relu'))
model.add(Conv2D(32,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
model.add(Conv2D(64,(3,3),activation='relu'))
model.add(Conv2D(64,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
model.add(Conv2D(128,(3,3),activation='relu'))
model.add(Conv2D(128,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(units=128,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(units=3,activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

In [0]:
from livelossplot import PlotLossesKeras
from keras.callbacks import ReduceLROnPlateau

reduce_learning_rate = ReduceLROnPlateau(monitor='loss',
                                         factor=0.1,
                                         patience=2,
                                         cooldown=2,
                                         min_lr=0.00001,
                                         verbose=1)
plotlosses = PlotLossesKeras()

callbacks = [reduce_learning_rate, plotlosses]


history = model.fit_generator(
    train_set,
    epochs = no_of_epochs,
    validation_data=test_set,
    callbacks=callbacks
)

Epoch 1/40