In [1]:
from random import randint
import os
import shutil
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Conv2D, Input
from tensorflow.keras.layers import Flatten, MaxPool2D, AvgPool2D
from tensorflow.keras.layers import BatchNormalization, Reshape, UpSampling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.regularizers import l2 
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.models import Model, load_model

seed = 123

In [2]:
# ao chegar aqui, as imagens já precisam estar pré processadas
# criar nova pasta similar à train_main, mas os dados de seus subdiretorios possuirao imagens pré processadas

def segmentation(path):
    
    img = cv2.imread(path)
    gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    # noise removal
    kernel = np.ones((3,3),np.uint8)
    opening = cv2.morphologyEx(thresh,cv2.MORPH_OPEN,kernel, iterations = 2)
    # sure background area
    sure_bg = cv2.dilate(opening,kernel,iterations = 3)
    res = cv2.bitwise_and(img,img,mask = sure_bg)
    
    return res

def bgr_CLAHE(img):
    
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    lab_planes = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit = 2.0,tileGridSize = (6, 6))
    lab_planes[0] = clahe.apply(lab_planes[0])
    lab = cv2.merge(lab_planes)
    img = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
    
    return img

In [3]:
# Caminhos dos  csvs:
treino_2018 ="/media/leandro/84EE-B5FB/isic_2018_treino.csv"
treino_2017 ="/media/leandro/84EE-B5FB/isic_2017_treino.csv"
teste = "/media/leandro/84EE-B5FB/isic_2017_teste.csv"
validacao = "/media/leandro/84EE-B5FB/isic_2017_validacao.csv"


In [4]:
# ------- VALIDAÇÃO (extração e pré processamento)

# tratando
df_validacao_2017 = pd.read_csv(validacao)

df_validacao_2017.drop('seborrheic_keratosis', axis = 1, inplace = True)
df_validacao_2017.columns = ['img', 'pos']
df_validacao_2017['img'] = df_validacao_2017['img'].apply(lambda x: x + '.jpg')

In [5]:
df_validacao_2017.head()

Unnamed: 0,img,pos
0,ISIC_0001769.jpg,0.0
1,ISIC_0001852.jpg,0.0
2,ISIC_0001871.jpg,0.0
3,ISIC_0003462.jpg,0.0
4,ISIC_0003539.jpg,0.0


In [6]:
df_validacao_2017.shape

(150, 2)

In [7]:
diretorio_val = '/media/leandro/84EE-B5FB/isic_2017_validacao/isic_2017_validacao/'
pos = '/media/leandro/84EE-B5FB/validacao_main/pos'
neg = '/media/leandro/84EE-B5FB/validacao_main/neg'

In [8]:
# # dado que possua o dataframe com as imagens de treino, esta célula filtra imagens por ano, classe e aloca
# # dados em um diretorio possuindo dois sub diretorios (um para classe positiva e outro para negativa)

# neg_files = df_validacao_2017.loc[df_validacao_2017['pos'] == 0]['img'].tolist() 
# pos_files = df_validacao_2017.loc[df_validacao_2017['pos'] == 1]['img'].tolist()

# pos_files = [i for i in os.listdir(diretorio_val) if i in pos_files]

# neg_files = [i for i in os.listdir(diretorio_val) if i in neg_files]

# for f in pos_files:
#     shutil.copy(diretorio_val + f, pos)
    

# for f in neg_files:
#     shutil.copy(diretorio_val + f, neg)
    

In [9]:
# ------- TREINO 2017

df_treino_2017 = pd.read_csv(treino_2017)

# adicionando extensão aos nomes dos arquivos
df_treino_2017['image_id'] = df_treino_2017['image_id'].apply(lambda x: x + '.jpg')

# criando flag para nevus
df_treino_2017['nevus'] = ((df_treino_2017.melanoma == 0) & 
                           (df_treino_2017.seborrheic_keratosis == 0)).astype(float)

df_treino_2017.drop('seborrheic_keratosis', axis = 1, inplace = True)

# as imagens precisam ser ou nevus ou melanoma
df_treino_2017 = df_treino_2017.loc[(df_treino_2017['melanoma'] == 1) | (df_treino_2017['nevus'] == 1)]

# ------- TREINO 2018

df_treino_2018 = pd.read_csv(treino_2018)

# tratando nomes (adicionando extensões aos nomes) e filtrando series 
df_treino_2018 = df_treino_2018[['image', 'MEL', 'NV']]
df_treino_2018['image'] = df_treino_2018['image'].apply(lambda x: x + '.jpg')

# imagens precisam ser ou nevus ou melanoma
df_treino_2018 = df_treino_2018.loc[(df_treino_2018['MEL'] == 1) | (df_treino_2018['NV'] == 1)]

# ------- BALANCEAMENTO

# unificar nomes das series, para eventual concat
df_treino_2018.columns = ['img', 'pos', 'neg']
df_treino_2017.columns = ['img', 'pos', 'neg']

# separando casos positivos e negativos de cada ano
pos_2018 = df_treino_2018.loc[df_treino_2018['pos'] == 1]
pos_2017 = df_treino_2017.loc[df_treino_2017['pos'] == 1]

neg_2018 = df_treino_2018.loc[df_treino_2018['neg'] == 1]
neg_2017 = df_treino_2017.loc[df_treino_2017['neg'] == 1]

# juntando casos positivos e negativos de todos os anos
full_pos = pd.concat([pos_2017, pos_2018], axis = 0)
full_neg = pd.concat([neg_2017, neg_2018], axis = 0)

# amostrando aleatoriamente, n dados negativos, estes consistirão nas observações negativas (a amostragem
# é feita para separar um número de imagens equivalente ao número de imagens positivas que temos à disposição)

full_neg = full_neg.sample(n = full_pos.shape[0], random_state = seed)

full_pos = full_pos[['img', 'pos']]
full_neg = full_neg[['img', 'pos']]

filenames = pd.concat([full_pos, full_neg]).reset_index().drop('index', axis = 1)

In [10]:
# # dado que possua o dataframe com as imagens de treino, esta célula filtra imagens por ano, classe e aloca
# # dados em um diretorio possuindo dois sub diretorios (um para classe positiva e outro para negativa)

# neg_files = filenames.loc[filenames['pos'] == 0]['img'].tolist() 
# pos_files = filenames.loc[filenames['pos'] == 1]['img'].tolist()

# pos_files17 = [i for i in os.listdir('isic_2017_treino') if i in pos_files]
# pos_files18 = [i for i in os.listdir('isic_2018_treino') if i in pos_files]

# neg_files17 = [i for i in os.listdir('isic_2017_treino') if i in neg_files]
# neg_files18 = [i for i in os.listdir('isic_2018_treino') if i in neg_files]

# for f in pos_files17:
#     shutil.copy('isic_2017_treino/' + f, 'train_main/pos')
    
# for f in pos_files18:
#     shutil.copy('isic_2018_treino/' + f, 'train_main/pos')

# for f in neg_files17:
#     shutil.copy('isic_2017_treino/' + f, 'train_main/neg')
    
# for f in neg_files18:
#     shutil.copy('isic_2018_treino/' + f, 'train_main/neg')

In [11]:
# train_main_pos possui duas pastas 'pos' e 'neg', estas contém, respectivamente, os dados de treino positivos
# e negativos

In [12]:
# escolhendo valores para redimensionamento

In [13]:
diretorio_pos = "/media/leandro/84EE-B5FB/train_main/pos/"
diretorio_neg = "/media/leandro/84EE-B5FB/train_main/neg/"

In [14]:
dims = [cv2.imread(f'{diretorio_pos}{i}').shape for i in os.listdir(diretorio_pos)]
i = pd.Series([i[0] for i in dims])
j = pd.Series([i[1] for i in dims])

In [15]:
i.max(), j.max()

(4499, 6748)

In [16]:
i.mean(), j.mean()

(1859.4009661835748, 2698.723027375201)

In [17]:
# me parece melhor opção
i.median(), j.median()

dimensions = (450, 600)

# Processamento de imagens e encoder

In [68]:
gerador = ImageDataGenerator(
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    zoom_range = 0.2,
    fill_mode = 'nearest',
    horizontal_flip = True,
    vertical_flip = True,
    data_format = None)



In [69]:
diretorio_treino = "/media/leandro/84EE-B5FB/train_main/"
diretorio_teste ="/media/leandro/84EE-B5FB/isic_2017_teste/"
diretorio_val ="/media/leandro/84EE-B5FB/validacao_main/"

In [70]:
target_size = (50, 50)
batch_size = 100

gerador_treino = gerador.flow_from_directory(diretorio_treino, 
                                             target_size = target_size,
                                             batch_size = batch_size)

gerador_validacao = gerador.flow_from_directory(diretorio_val, 
                                                target_size = target_size,
                                                batch_size = batch_size)

gerador_teste = gerador.flow_from_directory(diretorio_teste, 
                                            target_size = target_size,
                                            batch_size = batch_size)

Found 2974 images belonging to 2 classes.
Found 150 images belonging to 2 classes.
Found 600 images belonging to 1 classes.


In [52]:
cnn_i, cnn_j, cnn_chnls = 50, 50, 3

cnn = Sequential()
cnn.add(Conv2D(50, (3, 3), input_shape = (cnn_i, cnn_j, cnn_chnls), activation = 'relu'))
cnn.add(Conv2D(50, (3, 3), activation = 'relu'))
cnn.add(MaxPool2D((2, 2)))
cnn.add(Conv2D(50, (3, 3), activation = 'relu'))
cnn.add(Conv2D(50, (3, 3), activation = 'relu'))
cnn.add(AvgPool2D((2, 2)))
cnn.add(Conv2D(50, (3, 3), activation = 'relu'))
cnn.add(BatchNormalization())
cnn.add(AvgPool2D((2, 2)))
cnn.add(Flatten())
cnn.add(Dense(25, activation = 'relu', kernel_regularizer = l2(0.05)))
cnn.add(Dropout(0.2))
cnn.add(Dense(25, activation = 'relu', kernel_regularizer = l2(0.05)))
cnn.add(BatchNormalization())
cnn.add(Dropout(0.2))
cnn.add(Dense(25, activation = 'relu', kernel_regularizer = l2(0.05)))
cnn.add(Dropout(0.2))
cnn.add(Dense(25, activation = 'relu', kernel_regularizer = l2(0.05)))
cnn.add(Dense(1, activation = 'sigmoid'))
        
cnn.compile(loss = 'binary_crossentropy', optimizer = RMSprop(learning_rate = 0.001))

es = EarlyStopping(monitor = 'loss', patience = 5) 

In [54]:
es = EarlyStopping(patience = 5, restore_best_weights = True)

cnn.fit(gerador_treino, 
             epochs = 1000, 
             validation_data = (gerador_validacao),
             steps_per_epoch = gerador_treino.samples//batch_size,
             validation_steps = gerador_validacao.samples//batch_size,
             callbacks = [es])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000


<tensorflow.python.keras.callbacks.History at 0x7ff7c04398b0>

# Autoencoder

In [119]:
#REDES (autoencoder e nn)

#auto encoder (pré processamento)

px_h, px_v, chnls = 50, 50, 3
ipt_e = Input(shape = (px_h, px_v, chnls))

encoder = Conv2D(4, (3, 3), input_shape = (px_h, px_v, chnls), activation = 'relu', padding = 'same')(ipt_e)
encoder = MaxPool2D((5, 5))(encoder)
encoder = Conv2D(8, (3, 3), activation = 'relu', padding = 'same')(encoder)
encoder = MaxPool2D((2, 2))(encoder)
encoder = Conv2D(16, (2, 2), activation = 'relu', padding = 'same')(encoder)
encoder = Flatten()(encoder)
encoder = Dense(32, name = 'gargalo')(encoder)

decoder = Dense(400)(encoder)
decoder = Reshape(target_shape = (5, 5, 16))(decoder)
decoder = Conv2D(8, (2, 2), activation = 'relu', padding = 'same')(decoder)
decoder = UpSampling2D((2, 2))(decoder)
decoder = Conv2D(4, (3, 3), activation = 'relu', padding = 'same')(decoder)
decoder = UpSampling2D((5, 5))(decoder)
decoder = Conv2D(4, (3, 3), activation = 'relu', padding = 'same')(decoder)
decoder = Conv2D(3, (3, 3), activation = 'sigmoid', padding = 'same')(decoder)

autoencoder = Model(inputs = ipt_e, outputs = decoder, name = 'autoencoder')
autoencoder.compile(loss = 'mean_squared_error')





In [120]:
autoencoder.summary()

Model: "autoencoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_32 (InputLayer)        [(None, 50, 50, 3)]       0         
_________________________________________________________________
conv2d_187 (Conv2D)          (None, 50, 50, 4)         112       
_________________________________________________________________
max_pooling2d_60 (MaxPooling (None, 10, 10, 4)         0         
_________________________________________________________________
conv2d_188 (Conv2D)          (None, 10, 10, 8)         296       
_________________________________________________________________
max_pooling2d_61 (MaxPooling (None, 5, 5, 8)           0         
_________________________________________________________________
conv2d_189 (Conv2D)          (None, 5, 5, 16)          528       
_________________________________________________________________
flatten_32 (Flatten)         (None, 400)               

In [124]:
es = EarlyStopping(patience = 5, restore_best_weights = True)

autoencoder.fit_generator(gerador_treino, 
             epochs = 1000, 
             validation_data = (gerador_validacao),
             steps_per_epoch = gerador_treino.samples//batch_size,
             validation_steps = gerador_validacao.samples//batch_size,
             callbacks = [es])

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/1000


InvalidArgumentError:  Incompatible shapes: [100,2] vs. [100,50,50,3]
	 [[node mean_squared_error/SquaredDifference (defined at <ipython-input-121-09a51d7f21b2>:3) ]] [Op:__inference_train_function_26059]

Function call stack:
train_function


In [None]:
es = EarlyStopping(monitor = 'val_loss', patience = 2)

autoencoder.fit(x_treino, x_treino,
                batch_size = 512,
                epochs = 20,
                validation_data = (x_valid, x_valid))