# Xception para diagnóstico de IDC em imagens histológicas:

## Autor: Otávio A M Guerra

## Import das bibliotecas necessárias:

In [1]:
#!pip install -U tensorboardcolab
#!pip install image-classifiers
import cv2
import numpy as np
from tensorflow import keras
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorboardcolab import *
from keras.utils import Sequence
from google.colab import drive
drive.mount('/content/drive')

# Import das bibliotecas de construcao da rede neural
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report
import matplotlib
matplotlib.use("Agg")

Using TensorFlow backend.


Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


## Definindo função que lerá as imagens dos arquivos csv em batches para entrada na rede:

In [0]:
def csv_image_generator(inputPath, bs, mode="train", aug=None):
	# Abre o arquivo csv para leitura
	f = open(inputPath, "r")

	# Loop infinito
	while True:
		# Inicializa vetores de imagens e classes
		images = []
		labels = []

		# Loop ate atingir o BatchSize
		while len(images) < bs:
			# Lê a proxima linha do CSV
			line = f.readline()

			# Se a linha tiver vazia isso indica que chegamos ao
			# fim do arquivo
			if line == "":
				# Reseta o ponteiro do arquivo para o inicio
				# e lê novamente a primeira linha
				f.seek(0)
				line = f.readline()

				# Se estamos avaliando entao temos que sair do loop
				# para garantirmos que nao avaliaremos o modelo 2 vezes
				# em amostras do inicio do arquivo
				if mode == "eval":
					break

			# Extrai a classe da imagem e constroi a mesma
			line = line.strip().split(",")
			label = int(line[0])
			image = np.array([int(x) for x in line[1:]], dtype="uint8")
			image = image.reshape((50, 50, 3))
			image = cv2.resize(image,(75, 75), interpolation = cv2.INTER_CUBIC)

			# Atualiza as listas do Batch
			images.append(image)
			labels.append(label)

		# Trata parametro de Data Augmentation
		if aug is not None:
			(images, labels) = next(aug.flow(np.array(images),
				labels, batch_size=bs))

		# yield the batch to the calling function
		yield (np.array(images), labels)

## Definindo variaveis e caminhos dos arquivos:

In [0]:
# Path dos CSV's
TRAIN_CSV = "/content/drive/My Drive/TCC/Projeto/dataset/IDC_training.csv"
TEST_CSV = "/content/drive/My Drive/TCC/Projeto/dataset/IDC_test.csv"
VAL_CSV = "/content/drive/My Drive/TCC/Projeto/dataset/IDC_validation.csv"

# Numero de Epocas e Batch Size
NUM_EPOCHS = 10
BS = 128

# Taxa de Aprendizado (Learning Rate)
INIT_LR = 0.0001

# Inicializa variaveis de tamanho dos conjuntos
NUM_TRAIN_IMAGES = 0
NUM_TEST_IMAGES = 0
NUM_VAL_IMAGES = 0

# Abre o csv de treinamento e define um conjunto com as classes
# e as labels do conjunto de treino e teste
f = open(TRAIN_CSV, "r")
labels = set()
testLabels = []
trainLabels = []

# loop no CSV de treinamento
for line in f:
    # Extrai a label da classe, atualiza a lista de labels e incrementa
    # o numero total de imagens de treino
    label = line.strip().split(",")[0]
    labels.add(label)
    trainLabels.append(label)
    NUM_TRAIN_IMAGES += 1

# Fecha CSV de treino e abre CSV de teste
f.close()
f = open(TEST_CSV, "r")

# loop no CSV de teste
for line in f:
	# Extrai a label da classe, atualiza a lista de labels e incrementa
	# o numero total de imagens de teste
	label = int(line.strip().split(",")[0])
	testLabels.append(label)
	NUM_TEST_IMAGES += 1

# Fecha o CSV de Teste
f.close()

## Construindo o "gerador" de imagens:

In [0]:
aug = ImageDataGenerator(rotation_range=20, zoom_range=0.15,
	width_shift_range=0.2, height_shift_range=0.2, shear_range=0.15,
	horizontal_flip=True, fill_mode="nearest")

#test_aug = ImageDataGenerator(rescale = 1./255)

# Inicializa geradores de Treino e Teste
trainGen = csv_image_generator(TRAIN_CSV, BS, mode="train", aug=aug)
testGen = csv_image_generator(TEST_CSV, BS, mode="train", aug=None)

## Lidando com o desbalanceamento do conjunto de treinamento:

In [0]:
from keras.utils import np_utils
trainLabels = np_utils.to_categorical(trainLabels)
classTotals = trainLabels.sum(axis=0)
classWeight = classTotals.max() / classTotals

## Importação e definição do modelo Xception:

In [7]:
from keras import applications
from keras import optimizers
from keras.models import Sequential, Model 
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras import backend as k 
from keras.callbacks import ModelCheckpoint, EarlyStopping
from contextlib import redirect_stdout

img_width, img_height = 75, 75 # Dimensoes das imagens de entrada


model = applications.xception.Xception(weights = 'imagenet', include_top=False, input_shape = (img_width, img_height, 3))

# "Congelando" os 10 primeiros layers
#for layer in model.layers[:10]:
#    layer.trainable = False


# Adicionando layers FC 
x = model.output
x = Flatten()(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)
predictions = Dense(1, activation="sigmoid")(x)

# Criando o modelo final 
model_final = Model(input = model.input, output = predictions)

# Resumo da Arquitetura do modelo
print(model_final.summary())

# Salva o .summary() do modelo em um arquivo texto
with open('Xception_summary.txt', 'w') as f:
    with redirect_stdout(f):
        model_final.summary()

# Compilando o modelo final
model_final.compile(loss = "binary_crossentropy", optimizer = optimizers.Adam(lr=INIT_LR, decay=INIT_LR / NUM_EPOCHS), metrics=["accuracy"])

W0731 23:42:11.541077 140343710115712 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0731 23:42:11.616895 140343710115712 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0731 23:42:11.640299 140343710115712 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0731 23:42:11.705992 140343710115712 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0731 23:42:11.707362 1403437101

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.4/xception_weights_tf_dim_ordering_tf_kernels_notop.h5


W0731 23:42:35.063285 140343710115712 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
W0731 23:42:35.219017 140343710115712 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0731 23:42:35.235427 140343710115712 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py:180: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 75, 75, 3)    0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 37, 37, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 37, 37, 32)   128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 37, 37, 32)   0           block1_conv1_bn[0][0]            
__________________________________________________________________________________________________
block1_con

## Definindo callbacks para monitorar e salvar o modelo:

In [0]:
checkpoint = ModelCheckpoint("Xception_1.h5", monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_acc', min_delta=0, patience=5, verbose=1, mode='auto')

## Treinando o modelo e realizando predições no conjunto de teste:

In [9]:
print("[START] Iniciando treinamento...\n")
H = model_final.fit_generator(
	trainGen,
	steps_per_epoch=NUM_TRAIN_IMAGES // BS,
	validation_data=testGen,
	validation_steps=NUM_TEST_IMAGES // BS,
	epochs=NUM_EPOCHS,
    callbacks = [checkpoint, early],
    class_weight=classWeight)

[START] Iniciando treinamento...

Epoch 1/10

Epoch 00001: val_acc improved from -inf to 0.87059, saving model to Xception_1.h5
Epoch 2/10

Epoch 00002: val_acc improved from 0.87059 to 0.88752, saving model to Xception_1.h5
Epoch 3/10

Epoch 00003: val_acc improved from 0.88752 to 0.88992, saving model to Xception_1.h5
Epoch 4/10

Epoch 00004: val_acc improved from 0.88992 to 0.89553, saving model to Xception_1.h5
Epoch 5/10

Epoch 00005: val_acc did not improve from 0.89553
Epoch 6/10

Epoch 00006: val_acc improved from 0.89553 to 0.90044, saving model to Xception_1.h5
Epoch 7/10

Epoch 00007: val_acc did not improve from 0.90044
Epoch 8/10

Epoch 00008: val_acc improved from 0.90044 to 0.90054, saving model to Xception_1.h5
Epoch 9/10

Epoch 00009: val_acc improved from 0.90054 to 0.90325, saving model to Xception_1.h5
Epoch 10/10

Epoch 00010: val_acc improved from 0.90325 to 0.90345, saving model to Xception_1.h5


## Avaliando o melhor modelo encontrado no processo de treinamento:

In [0]:
from keras.models import load_model

# Re-inicializa o gerador de teste para avaliacao
testGen = csv_image_generator(TEST_CSV, BS,
	mode="eval", aug=None)

# Carrega o melhor modelo
best_model = load_model('Xception_1.h5')

# Realiza predicoes no conjunto de teste
predIdxs = best_model.predict_generator(testGen,
	steps=(NUM_TEST_IMAGES // BS) + 1)

## Plot da curva de aprendizado  e métricas de classificação:

In [0]:
def get_label(proba):
    """ Função que transforma probabilidades em classe predita de acordo com
        limiar de 0.5 definido"""
    if proba > 0.5:
        return 1
    else:
        return 0

predLabels = [get_label(i) for i in list(predIdxs)]

In [12]:
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
print("[Métricas] Avaliando a rede no conjunto de Teste...\n")
print(classification_report(testLabels,predLabels))
print("\n")
print("Acuracia Balanceada: {}\n".format(balanced_accuracy_score(testLabels,predLabels)))
print("ROC-AUC: {}".format(roc_auc_score(testLabels,predLabels)))

[Métricas] Avaliando a rede no conjunto de Teste...

              precision    recall  f1-score   support

           0       0.88      0.94      0.91      5000
           1       0.93      0.87      0.90      5000

    accuracy                           0.90     10000
   macro avg       0.91      0.90      0.90     10000
weighted avg       0.91      0.90      0.90     10000



Acuracia Balanceada: 0.9036

ROC-AUC: 0.9036


In [0]:
# Plot de Loss e Acuracia de treino e teste:
N = 10
plt.style.use("ggplot")
plt.figure(figsize=(15,10))
plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), H.history["val_loss"], label="test_loss")
plt.plot(np.arange(0, N), H.history["acc"], label="train_acc")
plt.plot(np.arange(0, N), H.history["val_acc"], label="test_acc")
plt.title("Loss e Acurácia no Conjunto de Treino e Teste")
plt.xlabel("Epoca #")
plt.ylabel("Loss/Acurácia")
plt.legend(loc="lower left")
plt.savefig("plot.png")