In [1]:
import datetime as dt
import wget
import os
import pandas as pd
import numpy as np
import funcoes as f
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import image_dataset_from_directory
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from keras.regularizers import l2
from keras.utils import load_img
from keras.utils import img_to_array
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import absl.logging

from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score

absl.logging.set_verbosity(absl.logging.ERROR)
tf.get_logger().setLevel('WARNING')

In [2]:
dataset = "dp03" # "dp1" , "dp3" ou "dp03"
seed = np.random.randint(0, 9999)
batch_size = 32
n_samples = 1000

In [3]:
if dataset == "dp3":
    img = load_img('./teste sanidade/dp3/DP3_class0/image1.png')
elif dataset == "dp1":
    img = load_img('./teste sanidade/dp1/DP1_class0/image1.png')
elif dataset == "dp03":
    img = load_img('./teste sanidade/dp03/classe1/image1.png')
img = img_to_array(img)
img_shape = img.shape
del img
img_shape

(64, 64, 3)

In [4]:
dados = image_dataset_from_directory(f"./teste sanidade/{dataset}/",
                                                   label_mode="binary",
                                                  image_size=img_shape[:2],
                                                  batch_size=batch_size,
                                                  shuffle=True,
                                                  seed=seed)

Found 2000 files belonging to 2 classes.


In [5]:
# pega 300 elementos do dataset para treino e 100 pra validação
dados = dados.shuffle(len(dados), seed=seed, reshuffle_each_iteration=True)
train_data = dados.take((n_samples//batch_size))
dados = dados.shuffle(len(dados), seed=seed, reshuffle_each_iteration=True)
validation_data = dados.take((100//batch_size))
print(len(train_data), len(validation_data))

31 3


# CNN hinge loss e regularizador

In [6]:
########## modelo csvm direto no keras

def CNN_hinge(img_shape=img_shape):
    # define our MLP network
    model = keras.Sequential()
    #model.add(layers.Input(shape=(20, 180, 3)))
    model.add(layers.Input(shape=img_shape))
    
    # reescala os dados pra entre 0 e 1
    model.add(layers.Rescaling(1./255))
    
    # primeira convolucao
    model.add(layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu'))
    model.add(layers.MaxPool2D(pool_size=(1, 2)))
    model.add(layers.Dropout(0.15))
    
    # segunda convolucao
    #model.add(layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))#, padding="same"))
    #model.add(layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding="same"))
    #model.add(layers.MaxPool2D(pool_size=(2, 2)))
    #model.add(layers.Dropout(0.20))
    
    # terceira
    #model.add(layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding="same"))
    #model.add(layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding="same"))
    #model.add(layers.MaxPool2D(pool_size=(2, 2)))
    
    # FC_1024
    model.add(layers.Flatten(name="flatten"))
    model.add(layers.Dense(16, activation="relu"))
    model.add(layers.Dense(8, activation="relu"))
    model.add(layers.Dense(1, kernel_regularizer=l2(0.01))) # aqui mandam usar l2 0,01
    model.add(layers.Activation('linear')) # aqui mandam por linear
    #model.add(layers.Activation('relu')) # tentar com tanh 
    
    return model

In [7]:
########## modelo cnn-svm direto no keras

model = CNN_hinge()
model.compile(loss = "categorical_hinge", optimizer="Adam", metrics=["accuracy",
                                                                     keras.metrics.Precision(),
                                                                     keras.metrics.Recall(), 
                                                                     keras.metrics.AUC()])

# callbacks
# salva o melhor modelo na pasta "modelo"
checkpoint = ModelCheckpoint("modelo", monitor='val_accuracy', verbose=0, save_best_only=True, mode='max')
# para de treinar se a acuracia de treino parar de aumentar por 3 epochs
es = EarlyStopping(monitor='val_accuracy', patience=4)
callbacks_list = [checkpoint, es]

#treina
history = model.fit(train_data,  epochs=25, use_multiprocessing=True)#, validation_data=validation_data)#, callbacks=callbacks_list )

# carrega o melhor modelo treinado e avalia
#model = keras.models.load_model("modelo")
model.evaluate(validation_data)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


[1.0162702798843384, 0.5625, 0.0, 0.0, 0.5]

# CNN

In [8]:
def CNN(img_shape=img_shape):
    model = keras.Sequential()
    model.add(layers.Input(shape=img_shape))
    
    # reescala os dados pra entre 0 e 1
    model.add(layers.Rescaling(1./255))
    
    # convolucao
    model.add(layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu'))
    model.add(layers.MaxPool2D(pool_size=(1, 2)))
    model.add(layers.Dropout(0.15))
    
    # classificação
    model.add(layers.Flatten(name="flatten"))
    model.add(layers.Dense(16, activation="relu"))
    model.add(layers.Dense(8, activation="relu"))
    # pra classificacao binaria parece que precisa usar sigmoid
    model.add(layers.Dense(1, activation="sigmoid"))
    return model

In [9]:
model = CNN()
model.compile(loss = "binary_crossentropy", optimizer="Adam", metrics=["accuracy",
                                                                     keras.metrics.Precision(),
                                                                     keras.metrics.Recall(), 
                                                                     keras.metrics.AUC()])

# callbacks
# salva o melhor modelo na pasta "modelo"
checkpoint = ModelCheckpoint("modelo", monitor='val_accuracy', verbose=0, save_best_only=True, mode='max')
# para de treinar se a acuracia de treino parar de aumentar por 3 epochs
es = EarlyStopping(monitor='val_accuracy', patience=4)
callbacks_list = [checkpoint, es]

#treina
history = model.fit(train_data,  epochs=25, use_multiprocessing=True)#, validation_data=validation_data)#, callbacks=callbacks_list )

# carrega o melhor modelo treinado e avalia
#model = keras.models.load_model("modelo")
model.evaluate(validation_data)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


[0.005977926775813103, 1.0, 1.0, 1.0, 0.9999999403953552]

# CSVM

Aqui preciso declarar os dados novamente com batch_size=1 pra poder extrair os dados mais facilmente

In [10]:
def CSVM(img_shape=img_shape):
    # define our MLP network
    model = keras.Sequential()
    #model.add(layers.Input(shape=(20, 180, 3)))
    model.add(layers.Input(shape=img_shape))
    model.add(layers.Rescaling(1./255))
    
    # primeira convolucao
    model.add(layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu'))#, padding="same"))
    model.add(layers.MaxPool2D(pool_size=(1, 2)))
    model.add(layers.Dropout(0.15))
    model.add(layers.Flatten())
    
    return model

In [11]:
dados = image_dataset_from_directory(f"./teste sanidade/{dataset}/",
                                                   label_mode="binary",
                                                  image_size=img_shape[:2],
                                                  batch_size=1,
                                                  shuffle=True,
                                                  seed=seed)

Found 2000 files belonging to 2 classes.


In [12]:
# pega 300 elementos do dataset para treino e 100 pra validação
dados = dados.shuffle(len(dados), seed=seed, reshuffle_each_iteration=True)
train_data = dados.take((n_samples//batch_size)*batch_size)
dados = dados.shuffle(len(dados), seed=seed, reshuffle_each_iteration=True)
validation_data = dados.take((100//batch_size)*batch_size)
print(len(train_data), len(validation_data))

992 96


In [13]:
model = CSVM(img_shape=img_shape)
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_2 (Rescaling)     (None, 64, 64, 3)         0         
                                                                 
 conv2d_2 (Conv2D)           (None, 62, 62, 32)        896       
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 62, 31, 32)       0         
 2D)                                                             
                                                                 
 dropout_2 (Dropout)         (None, 62, 31, 32)        0         
                                                                 
 flatten (Flatten)           (None, 61504)             0         
                                                                 
Total params: 896
Trainable params: 896
Non-trainable params: 0
________________________________________________________

In [14]:
# coloca os dados de treino da SVM em um numpy array

predictions = np.empty(shape=(len(train_data), list(model.output.shape)[1]), dtype="float16")#, dtype="float32")
labels =  np.empty(len(train_data), dtype="int").reshape(-1, 1)
i = 0
j = 0
print("Preparando dados")
for x, y in train_data:
    #print(y)
    if i == j:
        print("\r", round(j/len(train_data)*100, 1),"%", end="    ")
        j += 1000
    predictions[i] = (model(x, training=False).numpy()[0])
    labels[i] = y.numpy()[0]
    i += 1
labels = np.ravel(labels)
print("\r", "100", "%", "         ")

Preparando dados
 100 %          


In [15]:
modelo_svm = LinearSVC()
print("treinando modelo")
modelo_svm.fit(predictions, labels)
print("medindo score")
modelo_svm.score(predictions, labels)

treinando modelo




medindo score


1.0

In [16]:
# dados de teste pro csvm
predictions = np.empty(shape=(len(validation_data), list(model.output.shape)[1]), dtype="float16")#, dtype="float32")
labels =  np.empty(len(validation_data), dtype="int").reshape(-1, 1)
i = 0
j = 0
print("Preparando dados")
for x, y in validation_data:
    #print(y)
    if i == j:
        print("\r", round(j/len(validation_data)*100, 1),"%", end="    ")
        j += 1000
    predictions[i] = (model(x, training=False).numpy()[0])
    labels[i] = y.numpy()[0]
    i += 1
labels = np.ravel(labels)
print("\r", "100", "%", "         ")

Preparando dados
 100 %          


In [17]:
print("testando modelo")
modelo_svm.score(predictions, labels)

testando modelo


1.0

## Treina multiplas vezes

In [18]:
repeticoes = 10
scores_CNN_hinge = []
scores_CNN = []
scores_CSVM = []
resultado_linear = np.empty((repeticoes, 5))
resultado_poli = np.empty((repeticoes, 5))
resultado_rbf = np.empty((repeticoes, 5))
resultado_CNN_hinge = np.empty((repeticoes, 5))
resultado_CNN = np.empty((repeticoes, 5))

n_samples = 1000

# treina n vezes e salva scores nos arrays
for repeticao in range(repeticoes):
    print(repeticao)
    #cria dados
    seed = np.random.randint(0, 9999)
    dados = image_dataset_from_directory(f"./teste sanidade/{dataset}/",
                                                   label_mode="binary",
                                                  image_size=img_shape[:2],
                                                  batch_size=batch_size,
                                                  shuffle=True,
                                                  seed=seed)
    
    # pega 300 elementos do dataset para treino e 100 pra validação
    dados = dados.shuffle(len(dados), seed=seed, reshuffle_each_iteration=True)
    train_data = dados.take((n_samples//batch_size))
    dados = dados.shuffle(len(dados), seed=seed, reshuffle_each_iteration=True)
    validation_data = dados.take((100//batch_size))
    
    # treina CNN hinge
    model = CNN_hinge()
    model.compile(loss = "categorical_hinge", optimizer="Adam", metrics=["accuracy",
                                                                     keras.metrics.Precision(),
                                                                     keras.metrics.Recall(), 
                                                                     keras.metrics.AUC()])
    model.fit(train_data,  epochs=25, use_multiprocessing=True, verbose=0)#, validation_data=validation_data, verbose=0)#, callbacks=callbacks_list)
    resultado_CNN_hinge[repeticao] = model.evaluate(validation_data)
    del model
    
    # treina CNN normal
    model = CNN()
    model.compile(loss = "binary_crossentropy", optimizer="Adam", metrics=["accuracy",
                                                                     keras.metrics.Precision(),
                                                                     keras.metrics.Recall(), 
                                                                     keras.metrics.AUC()])
    model.fit(train_data,  epochs=25, use_multiprocessing=True, verbose=0)#, validation_data=validation_data, verbose=0)#, callbacks=callbacks_list)
    resultado_CNN[repeticao] = model.evaluate(validation_data)
    del model
    
    # prepara os dados pra CSVM
    dados = image_dataset_from_directory(f"./teste sanidade/{dataset}/",
                                                   label_mode="binary",
                                                  image_size=img_shape[:2],
                                                  batch_size=1,
                                                  shuffle=True,
                                                  seed=seed)

    dados = dados.shuffle(len(dados), seed=seed, reshuffle_each_iteration=True)
    train_data = dados.take((n_samples//batch_size)*batch_size)
    dados = dados.shuffle(len(dados), seed=seed, reshuffle_each_iteration=True)
    validation_data = dados.take((100//batch_size)*batch_size)

    model = CSVM(img_shape=img_shape)

    # coloca os dados de treino da SVM em um numpy array

    predictions = np.empty(shape=((n_samples//batch_size)*batch_size, list(model.output.shape)[1]), dtype="float16")#, dtype="float32")
    labels =  np.empty((n_samples//batch_size)*batch_size, dtype="int").reshape(-1, 1)
    i = 0
    j = 0
    #print("Preparando dados")
    for x, y in train_data:
        #print(y)
        if i == j:
            #print("\r", round(j/((300//batch_size)*batch_size)*100, 1),"%", end="    ")
            j += 1000
        predictions[i] = (model(x, training=False).numpy()[0])
        labels[i] = y.numpy()[0]
        i += 1
    labels = np.ravel(labels)
    #print("\r", "100", "%", "         ")
    
    
     # treina CSVM
    #modelo_svm = LinearSVC()
    modelo_linear = SVC(kernel='linear')
    modelo_poli = SVC(kernel='poly')
    modelo_rbf = SVC(kernel='rbf')

    modelo_linear.fit(predictions, labels)
    modelo_poli.fit(predictions, labels)
    modelo_rbf.fit(predictions, labels)
    
    # prepara dados de teste pro csvm
    predictions = np.empty(shape=(((100//batch_size)*batch_size), list(model.output.shape)[1]), dtype="float16")#, dtype="float32")
    labels =  np.empty(((100//batch_size)*batch_size), dtype="int").reshape(-1, 1)
    i = 0
    j = 0
    #print("Preparando dados")
    for x, y in validation_data:
        #print(y)
        if i == j:
            #print("\r", round(j/((100//batch_size)*batch_size)*100, 1),"%", end="    ")
            j += 1000
        predictions[i] = (model(x, training=False).numpy()[0])
        labels[i] = y.numpy()[0]
        i += 1
    labels = np.ravel(labels)
    #print("\r", "100", "%", "         ")
    
    del model
    
    y_pred = modelo_linear.predict(predictions)
    y_true = labels
    
    resultado_linear[repeticao] = np.array([accuracy_score(y_true, y_pred),
                   precision_score(y_true, y_pred), 
                   recall_score(y_true, y_pred), 
                   f1_score(y_true, y_pred),
                   roc_auc_score(y_true, y_pred)
                  ])
    
    y_pred = modelo_poli.predict(predictions)
    y_true = labels
    
    resultado_poli[repeticao] = np.array([accuracy_score(y_true, y_pred),
                   precision_score(y_true, y_pred), 
                   recall_score(y_true, y_pred), 
                   f1_score(y_true, y_pred),
                   roc_auc_score(y_true, y_pred)
                  ])
    
    y_pred = modelo_rbf.predict(predictions)
    y_true = labels
    
    resultado_rbf[repeticao] = np.array([accuracy_score(y_true, y_pred),
                   precision_score(y_true, y_pred), 
                   recall_score(y_true, y_pred), 
                   f1_score(y_true, y_pred),
                   roc_auc_score(y_true, y_pred)
                  ])

0
Found 2000 files belonging to 2 classes.
Found 2000 files belonging to 2 classes.
1
Found 2000 files belonging to 2 classes.
Found 2000 files belonging to 2 classes.
2
Found 2000 files belonging to 2 classes.
Found 2000 files belonging to 2 classes.
3
Found 2000 files belonging to 2 classes.
Found 2000 files belonging to 2 classes.
4
Found 2000 files belonging to 2 classes.
Found 2000 files belonging to 2 classes.
5
Found 2000 files belonging to 2 classes.
Found 2000 files belonging to 2 classes.
6
Found 2000 files belonging to 2 classes.
Found 2000 files belonging to 2 classes.
7
Found 2000 files belonging to 2 classes.
Found 2000 files belonging to 2 classes.
8
Found 2000 files belonging to 2 classes.
Found 2000 files belonging to 2 classes.
9
Found 2000 files belonging to 2 classes.
Found 2000 files belonging to 2 classes.


In [19]:
# transforma resultados da SVM em df
resultado_linear = pd.DataFrame(resultado_linear, columns=["accuracy", "precision", "recall", "f1-score", "AUC"])
resultado_poli = pd.DataFrame(resultado_poli, columns=["accuracy", "precision", "recall", "f1-score", "AUC"])
resultado_rbf = pd.DataFrame(resultado_rbf, columns=["accuracy", "precision", "recall", "f1-score", "AUC"]) 
# transforma resultados CNN em df e tira a loss
resultado_CNN = pd.DataFrame(resultado_CNN, columns=["loss", "accuracy", "precision", "recall", "AUC"]).drop(columns=["loss"])
resultado_CNN_hinge = pd.DataFrame(resultado_CNN_hinge, columns=["loss", "accuracy", "precision", "recall", "AUC"]).drop(columns=["loss"])
# calcula f1
resultado_CNN["f1-score"] = 2*(resultado_CNN["precision"]*resultado_CNN["recall"])/(resultado_CNN["precision"]+resultado_CNN["recall"])
resultado_CNN_hinge["f1-score"] = 2*(resultado_CNN_hinge["precision"]*resultado_CNN_hinge["recall"])/(resultado_CNN_hinge["precision"]+resultado_CNN_hinge["recall"])
# poe 0 nos nans
resultado_CNN.fillna(0, inplace=True)
resultado_CNN_hinge.fillna(0, inplace=True)
# reordena as colunas
colunas = ["accuracy", "precision", "recall", "f1-score", "AUC"]
resultado_CNN = resultado_CNN[colunas]
resultado_CNN_hinge = resultado_CNN_hinge[colunas]

In [20]:
display(resultado_CNN_hinge)
display(resultado_CNN)
display(resultado_linear)
display(resultado_poli)
display(resultado_rbf)

Unnamed: 0,accuracy,precision,recall,f1-score,AUC
0,0.979167,1.0,0.957447,0.978261,1.0
1,1.0,1.0,1.0,1.0,1.0
2,1.0,1.0,1.0,1.0,1.0
3,0.46875,0.0,0.0,0.0,0.5
4,0.541667,0.0,0.0,0.0,0.5
5,0.447917,0.0,0.0,0.0,0.971698
6,1.0,1.0,1.0,1.0,1.0
7,0.489583,0.0,0.0,0.0,0.5
8,1.0,1.0,1.0,1.0,1.0
9,1.0,1.0,1.0,1.0,1.0


Unnamed: 0,accuracy,precision,recall,f1-score,AUC
0,1.0,1.0,1.0,1.0,1.0
1,0.447917,0.0,0.0,0.0,0.5
2,1.0,1.0,1.0,1.0,1.0
3,1.0,1.0,1.0,1.0,1.0
4,1.0,1.0,1.0,1.0,1.0
5,1.0,1.0,1.0,1.0,1.0
6,1.0,1.0,1.0,1.0,1.0
7,1.0,1.0,1.0,1.0,1.0
8,0.458333,0.0,0.0,0.0,0.5
9,1.0,1.0,1.0,1.0,1.0


Unnamed: 0,accuracy,precision,recall,f1-score,AUC
0,1.0,1.0,1.0,1.0,1.0
1,1.0,1.0,1.0,1.0,1.0
2,1.0,1.0,1.0,1.0,1.0
3,1.0,1.0,1.0,1.0,1.0
4,1.0,1.0,1.0,1.0,1.0
5,1.0,1.0,1.0,1.0,1.0
6,1.0,1.0,1.0,1.0,1.0
7,1.0,1.0,1.0,1.0,1.0
8,1.0,1.0,1.0,1.0,1.0
9,1.0,1.0,1.0,1.0,1.0


Unnamed: 0,accuracy,precision,recall,f1-score,AUC
0,1.0,1.0,1.0,1.0,1.0
1,1.0,1.0,1.0,1.0,1.0
2,1.0,1.0,1.0,1.0,1.0
3,1.0,1.0,1.0,1.0,1.0
4,1.0,1.0,1.0,1.0,1.0
5,1.0,1.0,1.0,1.0,1.0
6,1.0,1.0,1.0,1.0,1.0
7,1.0,1.0,1.0,1.0,1.0
8,1.0,1.0,1.0,1.0,1.0
9,1.0,1.0,1.0,1.0,1.0


Unnamed: 0,accuracy,precision,recall,f1-score,AUC
0,0.770833,0.685714,1.0,0.813559,0.770833
1,0.697917,1.0,0.45283,0.623377,0.726415
2,1.0,1.0,1.0,1.0,1.0
3,0.927083,1.0,0.847826,0.917647,0.923913
4,0.635417,0.5625,1.0,0.72,0.656863
5,1.0,1.0,1.0,1.0,1.0
6,1.0,1.0,1.0,1.0,1.0
7,0.53125,0.53125,1.0,0.693878,0.5
8,1.0,1.0,1.0,1.0,1.0
9,0.916667,1.0,0.843137,0.914894,0.921569


In [21]:
resultados_finais = pd.DataFrame(columns=["accuracy", "precision", "recall", "f1-score", "AUC"])
resultados_finais.loc["CNN"] = resultado_CNN.mean()
resultados_finais.loc["CNN hinge"] = resultado_CNN_hinge.mean()
resultados_finais.loc["SVC linear"] = resultado_linear.mean()
resultados_finais.loc["SVC poli"] = resultado_poli.mean()
resultados_finais.loc["SVC rbf"] = resultado_rbf.mean()
resultados_finais

Unnamed: 0,accuracy,precision,recall,f1-score,AUC
CNN,0.890625,0.8,0.8,0.8,0.9
CNN hinge,0.792708,0.6,0.595745,0.597826,0.84717
SVC linear,1.0,1.0,1.0,1.0,1.0
SVC poli,1.0,1.0,1.0,1.0,1.0
SVC rbf,0.847917,0.877946,0.914379,0.868335,0.849959
