In [1]:
import warnings
warnings.filterwarnings("ignore")

from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import GlobalAveragePooling2D,Dense, BatchNormalization, Dropout, Flatten, Conv2D, MaxPooling2D,Activation
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam

import tensorflow as tf
import numpy as np
import random
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline

In [2]:
im_shape = (200,200)

TRAINING_DIR = '../input/chest-xray-pneumoniacovid19tuberculosis/train'
TEST_DIR = '../input/chest-xray-pneumoniacovid19tuberculosis/test'
VAL_DIR = '../input/chest-xray-pneumoniacovid19tuberculosis/val'

seed = 10
BATCH_SIZE = 16

In [3]:
# data_generator = ImageDataGenerator(
#        validation_split=0.2, rotation_range=5, width_shift_range=0.05,
#        height_shift_range=0.05, preprocessing_function=preprocess_input,
#        zoom_range=0.05, horizontal_flip=True, fill_mode='nearest')

# Image dataset 
# Without augmentation
data_generator = ImageDataGenerator(validation_split=0.2,preprocessing_function=preprocess_input)

val_data_generator = ImageDataGenerator(preprocessing_function=preprocess_input,validation_split=0.2)

In [4]:
# Generator para parte train
train_generator = data_generator.flow_from_directory(TRAINING_DIR, target_size=im_shape, shuffle=True, seed=seed,
                                                     class_mode='categorical', batch_size=BATCH_SIZE, subset="training")
# Generator para parte validação
validation_generator = val_data_generator.flow_from_directory(VAL_DIR, target_size=im_shape, shuffle=False, seed=seed,
                                                     class_mode='categorical', batch_size=BATCH_SIZE, subset="validation")

# Generator para dataset de teste
test_generator = ImageDataGenerator(preprocessing_function=preprocess_input)
test_generator = test_generator.flow_from_directory(TEST_DIR, target_size=im_shape, shuffle=False, seed=seed,
                                                     class_mode='categorical', batch_size=BATCH_SIZE)

nb_train_samples = train_generator.samples
nb_validation_samples = validation_generator.samples
nb_test_samples = test_generator.samples
classes = list(train_generator.class_indices.keys())
print('Classes: '+str(classes))
num_classes  = len(classes)

In [5]:
# Visualizando alguns exemplos do dataset por meio do Generator criado
plt.figure(figsize=(15,15))
for i in range(9):
    #gera subfigures
    plt.subplot(330 + 1 + i)
    batch = train_generator.next()[0]*255
    image = batch[0].astype('uint8')
    plt.imshow(image)
plt.show()

In [6]:
# import os
# imageSize=150
# train_dir = "../input/chest-xray-pneumoniacovid19tuberculosis/train/"
# test_dir =  "../input/chest-xray-pneumoniacovid19tuberculosis/test/"
# # ['DME', 'CNV', 'NORMAL', '.DS_Store', 'DRUSEN']
# from tqdm import tqdm
# def get_data(folder):
#     """
#     Load the data and labels from the given folder.
#     """
#     X = []
#     y = []
#     for folderName in os.listdir(folder):
#         if not folderName.startswith('.'):
#             if folderName in ['COVID19']:
#                 label = 0
#             elif folderName in ['NORMAL']:
#                 label = 1
#             elif folderName in ['PNEUMONIA']:
#                 label = 2
#             elif folderName in ['TURBERCULOSIS']:
#                 label = 3
#             else:
#                 label = 4
#             for image_filename in tqdm(os.listdir(folder + folderName)):
#                 img_file = cv2.imread(folder + folderName + '/' + image_filename)
#                 if img_file is not None:
#                     img_file = skimage.transform.resize(img_file, (imageSize, imageSize, 3))
#                     img_arr = np.asarray(img_file)
#                     X.append(img_arr)
#                     y.append(label)
#     X = np.asarray(X)
#     y = np.asarray(y)
#     return X,y
# #X_train, y_train = get_data(train_dir) # Un-comment to use full dataset: Step 1 of 2
# X_test, y_test= get_data(test_dir)

# from sklearn.model_selection import train_test_split
# X_train, X_test, y_train, y_test = train_test_split(X_test, y_test, test_size=0.2) # Re-comment to use full dataset: Step 2 of 2

# # Encode labels to hot vectors (ex : 2 -> [0,0,1,0,0,0,0,0,0,0])
# from keras.utils.np_utils import to_categorical
# y_trainHot = to_categorical(y_train, num_classes = 4)
# y_testHot = to_categorical(y_test, num_classes = 4)

In [7]:
# import pandas as pd
# map_characters = {0: 'COVID19', 1: 'NORMAL', 2: 'PNEUMONIA', 3: 'TURBERCULOSIS'}
# dict_characters=map_characters
# import seaborn as sns
# df = pd.DataFrame()
# df["labels"]=y_train
# lab = df['labels']
# dist = lab.value_counts()
# sns.countplot(lab)
# print(dict_characters)

In [8]:
# from imblearn.over_sampling import RandomOverSampler
# from imblearn.under_sampling import RandomUnderSampler
# # Deal with imbalanced class sizes below
# # Make Data 1D for compatability upsampling methods
# X_trainShape = X_train.shape[1]*X_train.shape[2]*X_train.shape[3]
# X_testShape = X_test.shape[1]*X_test.shape[2]*X_test.shape[3]
# X_trainFlat = X_train.reshape(X_train.shape[0], X_trainShape)
# X_testFlat = X_test.reshape(X_test.shape[0], X_testShape)
# Y_train = y_train
# Y_test = y_test
# #ros = RandomOverSampler(ratio='auto')
# ros = RandomUnderSampler(sampling_strategy='auto')
# X_trainRos, Y_trainRos = ros.fit_resample(X_trainFlat, Y_train)
# X_testRos, Y_testRos = ros.fit_resample(X_testFlat, Y_test)
# # Encode labels to hot vectors (ex : 2 -> [0,0,1,0,0,0,0,0,0,0])
# Y_trainRosHot = to_categorical(Y_trainRos, num_classes = 4)
# Y_testRosHot = to_categorical(Y_testRos, num_classes = 4)
# # Make Data 2D again
# for i in range(len(X_trainRos)):
#     height, width, channels = imageSize,imageSize,3
#     X_trainRosReshaped = X_trainRos.reshape(len(X_trainRos),height,width,channels)
# for i in range(len(X_testRos)):
#     height, width, channels = imageSize,imageSize,3
#     X_testRosReshaped = X_testRos.reshape(len(X_testRos),height,width,channels)
# # Plot Label Distribution
# dfRos = pd.DataFrame()
# dfRos["labels"]=Y_trainRos
# labRos = dfRos['labels']
# distRos = lab.value_counts()
# sns.countplot(labRos)
# print(dict_characters)

In [9]:
# class_weight1 = class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(Y_train), y=Y_train)
# print("Old Class Weights: ",class_weight1)
# class_weight1 = class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(Y_trainRos), y=Y_trainRos)
# print("Old Class Weights: ",class_weight1)

In [10]:
modelcnn = Sequential()
modelcnn.add(Conv2D(16, kernel_size=(3, 3),activation='relu',input_shape=(im_shape[0],im_shape[1],3)))
modelcnn.add(MaxPooling2D(pool_size=(2, 2)))
modelcnn.add(Conv2D(32, kernel_size=(3,3), activation='relu'))
modelcnn.add(MaxPooling2D(pool_size=(2, 2)))
modelcnn.add(Dropout(0.2))
modelcnn.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
modelcnn.add(MaxPooling2D(pool_size=(2, 2)))
modelcnn.add(Flatten())
modelcnn.add(Dense(100, activation='relu'))
modelcnn.add(Dropout(0.2))
modelcnn.add(Dense(num_classes, activation='softmax'))
modelcnn.summary()

modelcnn.compile(loss='categorical_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])

In [11]:
epochs = 10

#Callback to save the best model
callbacks_list = [
    keras.callbacks.ModelCheckpoint(
        filepath='modelcnn.h5',
        monitor='val_loss', save_best_only=True, verbose=1),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=5,verbose=1)
]

#Training
history = modelcnn.fit(
        train_generator,
        steps_per_epoch=nb_train_samples // BATCH_SIZE,
        epochs=epochs,
        callbacks = callbacks_list,
        validation_data=validation_generator,
        verbose = 1,
        validation_steps=nb_validation_samples // BATCH_SIZE)

In [30]:
val_loss = list()
val_accuracy = list()
test_loss= list()
test_accuracy = list()

In [13]:
# Load the best saved model
from tensorflow.keras.models import load_model

#model = load_model('../input/classify-food-datas-models/model.h5')
#modelcnn = load_model('modelcnn.h5')
score = modelcnn.evaluate_generator(validation_generator)
val_loss.append(score[0])
val_accuracy.append(score[1])
print('\n\nVal loss:', score[0])
print('Val accuracy:', score[1])

score = modelcnn.evaluate_generator(test_generator)
print('\nTest loss:', score[0])
print('Test accuracy:', score[1])

test_loss.append(score[0])
test_accuracy.append(score[1])

In [14]:
import itertools
#Gera matriz de confusão
def plot_confusion_matrix(cm, classes, normalize=True, title='Confusion matrix', cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.figure(figsize=(10,10))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        cm = np.around(cm, decimals=2)
        cm[np.isnan(cm)] = 0.0
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [15]:
from sklearn.metrics import classification_report, confusion_matrix

#Confution Matrix and Classification Report
Y_pred = modelcnn.predict_generator(test_generator)#, nb_test_samples // BATCH_SIZE, workers=1)
y_pred = np.argmax(Y_pred, axis=1)
target_names = classes

#Confution Matrix
cm = confusion_matrix(test_generator.classes, y_pred)
plot_confusion_matrix(cm, target_names, normalize=False, title='Confusion Matrix')
print('Classification Report')
print(classification_report(test_generator.classes, y_pred, target_names=target_names))

## **Transfer Learning**

In [16]:
# data_generator = ImageDataGenerator(
#        validation_split=0.2, rotation_range=5, width_shift_range=0.05,
#        height_shift_range=0.05, preprocessing_function=preprocess_input,
#        shear_range=0.05,
#        zoom_range=0.05, horizontal_flip=True, fill_mode='nearest')

# Image dataset 
# Without augmentation
data_generator = ImageDataGenerator(validation_split=0.2,preprocessing_function=preprocess_input)

val_data_generator = ImageDataGenerator( preprocessing_function=preprocess_input,validation_split=0.2)

In [17]:
# Generator para parte train
train_generator = data_generator.flow_from_directory(TRAINING_DIR, target_size=im_shape, shuffle=True, seed=seed,
                                                     class_mode='categorical', batch_size=BATCH_SIZE, subset="training")
# Generator para parte validação
validation_generator = val_data_generator.flow_from_directory(VAL_DIR, target_size=im_shape, shuffle=False, seed=seed,
                                                     class_mode='categorical', batch_size=BATCH_SIZE, subset="validation")

# Generator para dataset de teste
test_generator = ImageDataGenerator(preprocessing_function=preprocess_input)
test_generator = test_generator.flow_from_directory(TEST_DIR, target_size=im_shape, shuffle=False, seed=seed,
                                                     class_mode='categorical', batch_size=BATCH_SIZE)

nb_train_samples = train_generator.samples
nb_validation_samples = validation_generator.samples
nb_test_samples = test_generator.samples
classes = list(train_generator.class_indices.keys())
print('Classes: '+str(classes))
num_classes  = len(classes)

In [18]:
# Visualizando alguns exemplos do dataset por meio do Generator criado
plt.figure(figsize=(15,15))
for i in range(9):
    #gera subfigures
    plt.subplot(330 + 1 + i)
    batch = train_generator.next()[0]*255
    image = batch[0].astype('uint8')
    plt.imshow(image)
plt.show()

In [19]:
base_model = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(im_shape[0], im_shape[1], 3))

x = base_model.output
x = Flatten()(x)
x = Dense(120, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax', kernel_initializer='random_uniform')(x)

modeltrf = Model(inputs=base_model.input, outputs=predictions)

# Freezing pretrained layers
for layer in base_model.layers:
    layer.trainable=False
    
optimizer = Adam()
modeltrf.compile(optimizer=optimizer,loss='categorical_crossentropy',metrics=['accuracy'])

In [20]:
epochs = 10

# Saving the best model
callbacks_list = [
    keras.callbacks.ModelCheckpoint(
        filepath='modeltrf.h5',
        monitor='val_loss', save_best_only=True, verbose=1),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=10,verbose=1)
]

history = modeltrf.fit(
        train_generator,
        steps_per_epoch=nb_train_samples // BATCH_SIZE,
        epochs=epochs,
        callbacks = callbacks_list,
        validation_data=validation_generator,
        verbose = 1,
        validation_steps=nb_validation_samples // BATCH_SIZE)

In [21]:
# from tensorflow.keras.models import load_model
# Load the best saved model
# model = load_model('modeltrf.h5')

In [22]:
# Using the validation dataset
score = modeltrf.evaluate_generator(validation_generator)
print('Val loss:', score[0])
print('Val accuracy:', score[1])

In [23]:
# Using the test dataset
score = modeltrf.evaluate_generator(test_generator)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [24]:
# Some reports
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

#Confution Matrix and Classification Report
Y_pred = modeltrf.predict_generator(test_generator)#, nb_test_samples // BATCH_SIZE, workers=1)
y_pred = np.argmax(Y_pred, axis=1)
target_names = classes

#Confution Matrix
cm = confusion_matrix(test_generator.classes, y_pred)
plot_confusion_matrix(cm, target_names, normalize=False, title='Confusion Matrix')
print('Classification Report')
print(classification_report(test_generator.classes, y_pred, target_names=target_names))

In [25]:
model_json = modeltrf.to_json()
with open("lungs_model.json", "w") as json_file:
    json_file.write(model_json)
modeltrf.save_weights("lungs_model.h5")
print("Model Saved to the disk")

In [26]:
from tensorflow.keras.preprocessing import image
def predict(img_name):
    img=image.load_img(img_name,target_size=(200,200))
    img=image.img_to_array(img)
    plt.imshow(img.astype('int32'))
    plt.show()
    #img=tf.keras.applications.efficientnet.preprocess_input(img)
    prediction=modeltrf.predict(img.reshape(1,200,200,3))
    output=np.argmax(prediction)
    print(class_names[output])

In [27]:
import os
class_names=os.listdir('../input/chest-xray-pneumoniacovid19tuberculosis/train')
print(class_names)

In [28]:
predict('../input/chest-xray-pneumoniacovid19tuberculosis/val/NORMAL/NORMAL2-IM-1437-0001.jpeg') 

In [29]:
import cv2
img = cv2.imread('../input/chest-xray-pneumoniacovid19tuberculosis/test/PNEUMONIA/person103_bacteria_488.jpeg')
img = cv2.resize(img,(200,200))
img = np.reshape(img,[1,200,200,3])

#classes = model.predict_classes(img)
predict_x=modeltrf.predict(img) 
classes=np.argmax(predict_x,axis=1)

print (classes)