In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import pandas as pd
import pickle
from unicodedata import normalize
import os
import sys
import re
from tqdm import tqdm
from time import sleep
import matplotlib.pyplot as plt
import sklearn.metrics as metrics
import pylab as pl
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_fscore_support
import transformers
import textclassification as tc
from joblib import Parallel, delayed
import multiprocessing
import time

In [5]:
def clean_text(x):
    if type(x) is str:
        pattern = r'[^a-zA-z0-9!.,?\s]'
        x = normalize('NFKD', x).encode('ASCII', 'ignore').decode('ASCII')
        x = re.sub(pattern, '', x)
        return x.lower()
    else:
        return ""

In [6]:
class MultiHeadSelfAttention(layers.Layer):
    def __init__(self, embed_dim, num_heads=8):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        if embed_dim % num_heads != 0:
            raise ValueError(
                f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
            )
        self.projection_dim = embed_dim // num_heads
        self.query_dense = layers.Dense(embed_dim)
        self.key_dense = layers.Dense(embed_dim)
        self.value_dense = layers.Dense(embed_dim)
        self.combine_heads = layers.Dense(embed_dim)

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        # x.shape = [batch_size, seq_len, embedding_dim]
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)  # (batch_size, seq_len, embed_dim)
        key = self.key_dense(inputs)  # (batch_size, seq_len, embed_dim)
        value = self.value_dense(inputs)  # (batch_size, seq_len, embed_dim)
        query = self.separate_heads(
            query, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        key = self.separate_heads(
            key, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        value = self.separate_heads(
            value, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        attention, weights = self.attention(query, key, value)
        attention = tf.transpose(
            attention, perm=[0, 2, 1, 3]
        )  # (batch_size, seq_len, num_heads, projection_dim)
        concat_attention = tf.reshape(
            attention, (batch_size, -1, self.embed_dim)
        )  # (batch_size, seq_len, embed_dim)
        output = self.combine_heads(
            concat_attention
        )  # (batch_size, seq_len, embed_dim)
        return output

In [7]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)



In [8]:
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

In [9]:
df            = pd.read_csv("../../data/marcapasso/train.csv")
dicionarioCsv = pd.read_csv("../../data/marcapasso/dicionario.csv")

print("Preparing texts...")
texts = [" " + tc.clean_text(text) + " " for text in df["texto"]]
    
dicionario = {}
for row in dicionarioCsv.itertuples():
    aux = []
    for diag in row:
        if type(diag) is str: aux.append(tc.clean_text(diag))
    dicionario[row[0]] = aux
    if row[0] == 9: break

Preparing texts...


In [30]:
print("Calculating scores...\n")


N_CORES = multiprocessing.cpu_count()

def return_scores(text, dicionario):
    scores = [max([tc.make_score(text, diag) for diag in dicionario[i]]) for i in range(10)]
    return scores

batch = 500
print("Working in batches of", batch)
with open('../processed_data/scores_marcapasso_tf.csv', 'w') as f:
    f.write("id, scorings\n")
    #     errors = []

    
startTime = time.time()
for i in range(0, len(texts), batch):
    print(i,"/",len(texts))
    startBatch = time.time()
    #try:
#     scores = [return_scores(text, dicionario) for text in texts[i:i+batch]]
    scores = Parallel(n_jobs = N_CORES)(delayed(return_scores)
                              (text, dicionario)
                    for text in texts[i:i+batch])
    
    if(i == 0): result = np.array(scores)
    else: result = np.concatenate((result, np.array(scores)), axis = 0)
    with open('../processed_data/scores_marcapasso_tf.csv', 'a') as f:
        for j in range(i,i+batch):
            f.write(str(j))
            f.write(',"')
            f.write(str(scores[j-i]))
            f.write('"\n')
#     except:
#         print("ERROR!!!!!")
#         errors.append([i, i+batch])
#     errors = np.array(errors)
#     np.save("errors.npy", errors)
    
    expectedTime = (((time.time() - startTime)/(i+batch)) * (len(texts))) - (time.time() - startTime)
    timeBatch    = time.time() - startBatch
    print("This batch has been done in", int(timeBatch/60), "minutes and", timeBatch%60,"seconds!")
    print("Expected time for ending is around", int(expectedTime/3600), "hours and ", int((expectedTime%3600)/60),"minutes!")
#         with open('../../data/resultados/scorings1.csv', 'a') as f:
#             for j in range(i,i+batch):
#                 f.write(str(db["ID_EXAME"][j]))
#                 f.write(',"')
#                 f.write(str(scores[j-i]))
#                 f.write('"\n')
print("Y of training data defined!!! Saving...")
np.save("results/score_marcapasso.npy", result)
print("Saved!")

Calculating scores...

Working in batches of 500
0 / 10483
This batch has been done in 0 minutes and 9.857285737991333 seconds!
Expected time for ending is around 0 hours and  3 minutes!
500 / 10483
This batch has been done in 0 minutes and 8.01957893371582 seconds!
Expected time for ending is around 0 hours and  2 minutes!
1000 / 10483
This batch has been done in 0 minutes and 11.06112289428711 seconds!
Expected time for ending is around 0 hours and  2 minutes!
1500 / 10483
This batch has been done in 0 minutes and 11.172068357467651 seconds!
Expected time for ending is around 0 hours and  2 minutes!
2000 / 10483
This batch has been done in 0 minutes and 10.840376138687134 seconds!
Expected time for ending is around 0 hours and  2 minutes!
2500 / 10483
This batch has been done in 0 minutes and 9.46355676651001 seconds!
Expected time for ending is around 0 hours and  2 minutes!
3000 / 10483
This batch has been done in 0 minutes and 12.912248134613037 seconds!
Expected time for ending i

IndexError: list index out of range

In [6]:
result = np.load("results/score_marcapasso.npy")

In [36]:
bin_res = np.array([[1 if i >= 70 else 0 for i in row] for row in result])
np.save("results/mp_bin_scores.npy", bin_res)
y_train = bin_res

In [13]:
y_train = np.load("results/mp_bin_scores.npy")

In [7]:
vocab_size = 10000  # Only consider the top 20k words
maxlen = 411
x_train = np.load("../data/x_train.npy", allow_pickle = True)
print(len(x_train), "Training sequences")
# print(len(x_val), "Validation sequences")
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
# x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)

1807167 Training sequences


In [12]:
seq_size = 411
with open("../main/output/25_Jul_2020/tokenizer_10000.pickle", 'rb') as handle:
    tokenizer = pickle.load(handle)
x_train = tokenizer.texts_to_sequences(texts)

for row in x_train:
    while(len(row) < 366):
        row.append(0)
        
x_train = np.array(x_train)

In [10]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 6776683403940868913
]


In [11]:
embed_dim = 32  # Embedding size for each token
num_heads = 8  # Number of attention heads
ff_dim = 32  # Hidden layer size in feed forward network inside transformer
out_dim = 10
maxlen = 366
vocab_size = 10000

inputs = layers.Input(shape=(maxlen,))
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block1 = TransformerBlock(embed_dim, num_heads, ff_dim)
transformer_block2 = TransformerBlock(embed_dim, num_heads, ff_dim)
transformer_block3 = TransformerBlock(embed_dim, num_heads, ff_dim)
transformer_block4 = TransformerBlock(embed_dim, num_heads, ff_dim)
transformer_block5 = TransformerBlock(embed_dim, num_heads, ff_dim)
transformer_block6 = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block1(x)
x = transformer_block2(x)
x = transformer_block3(x)
x = transformer_block4(x)
x = transformer_block5(x)
x = transformer_block6(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.1)(x)
x = layers.Dense(20, activation="relu")(x)
x = layers.Dropout(0.1)(x)
x = layers.Flatten()(x)
outputs = layers.Dense(out_dim, activation="softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs)

In [62]:
np.save("mp_train/x_train.npy", x_train)

In [57]:
#Okay, training
print("Okay let's train")
model.compile("adam", "binary_crossentropy", metrics=["accuracy"])
history = model.fit(
    x_train, y_train, batch_size=32, epochs=10, validation_split=0.1,
    callbacks = [tf.keras.callbacks.EarlyStopping(monitor='accuracy', mode='max', min_delta=1),
             tf.keras.callbacks.ModelCheckpoint(filepath='best_model_mp', monitor='val_accuracy', save_best_only=True)]
)

Okay let's train
Epoch 1/10

KeyboardInterrupt: 

In [29]:
model.save("last_model")

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: last_model/assets


In [4]:
vocab_size = 10000
maxlen = 411
with open("../main/output/25_Jul_2020/tokenizer_"+str(vocab_size)+".pickle", 'rb') as handle:
    tokenizer = pickle.load(handle)

In [5]:
print("Preparing texts...")
text        = [clean_text(t) for t in text]
test_X_temp = tokenizer.texts_to_sequences(text)
test_X      = keras.preprocessing.sequence.pad_sequences(test_X_temp, maxlen=maxlen)
test_Y      = np.load("../light_data/gold_labels.npy", allow_pickle = True)
valid       = np.load("../light_data/new_classes.npy")[:,1]
# test_Y = test_Y[:,0]
y_test      = np.array(np.array([[int(row[i]) for i in range(len(row)) if valid[i]] for row in test_Y]))

Preparing texts...


In [6]:
model = tf.keras.models.load_model('last_model')
model

<tensorflow.python.keras.engine.functional.Functional at 0x7fc3404a5b50>

In [2]:
import pandas as pd
pd.read_csv("../light_data/gold_mp.csv", sep = ';')

Unnamed: 0,id,real1,real2,real3,real4,real5,real6,real7,real8,real9,...,regex1,regex2,regex3,regex4,regex5,regex6,regex7,regex8,regex9,regex10
0,0,1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,1,1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,2,1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
3,3,1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,4,1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131,131,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
132,132,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
133,133,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
134,134,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
labels    = np.array(pd.read_csv("../light_data/gold_mp.csv", sep = ';'))[:,1:11]
test_text = pd.read_csv("../../data/marcapasso/texto.csv", sep = ';')["text"]
test_text = [tc.clean_text(t) for t in test_text]

In [32]:
test_text = tokenizer.texts_to_sequences(test_text)

for row in test_text:
    while(len(row) < 366):
        row.append(0)

x_test = np.array(test_text)

In [65]:
np.save("mp_train/x_test.npy", x_test)

In [35]:
y_score = model.predict(x_test, batch_size = 32)
np.save("predict_tf_best_mp.npy", y_score)

In [38]:
y_test = labels

In [68]:
np.save("mp_train/y_test.npy", y_test)

In [39]:
print("Calculating best candidates for threshold...")
n_class = 10
# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
thresholds = dict()
roc_auc = dict()
for i in range(n_class):
    fpr[i], tpr[i], thresholds[i] = metrics.roc_curve(y_test[:, i], y_score[:, i])
    roc_auc[i] = metrics.auc(fpr[i], tpr[i])

# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = metrics.roc_curve(y_test.ravel(), y_score.ravel())
roc_auc["micro"] = metrics.auc(fpr["micro"], tpr["micro"])

Calculating best candidates for threshold...


In [40]:
print("Calculating thresholds...")
#Calculate limits by maximizing F1
limits = []
for j in tqdm(range(n_class)):
    bigf1 = 0
    for threshold in thresholds[j]:
        y_bin = []
        for row in y_score[:,j]:
            if row > threshold:
                y_bin.append(1)
            else:
                y_bin.append(0)
        y_bin = np.array(y_bin)
        precision, _, f1, _ = precision_recall_fscore_support(y_test[:,j], y_bin, average = 'binary')
        
        if(f1 > bigf1 and precision > 0):
            bigf1 = f1
            maxi = threshold
    limits.append(maxi)

  _warn_prf(average, modifier, msg_start, len(result))
 10%|█         | 1/10 [00:00<00:01,  7.15it/s]

Calculating thresholds...


100%|██████████| 10/10 [00:00<00:00, 17.64it/s]


In [42]:
ordem_classes_antigo = ['área_eletricamente_inativa',
       'Bloqueio_de_ramo_direito', 'Bloqueio_de_ramo_esquerdo',
       'Bloqueio_de_ramo_direito_e_bloqueio_divisional_anterossuperior_do_ramo_esquerdo',
       'Bloqueio_intraventricular_inespecífico',
       'Sobrecarga_ventricular_esquerda_(critérios_de_Romhilt-Estes)',
       'Sobrecarga_ventricular_esquerda_(critérios_de_voltagem)',
       'Fibrilação_atrial', 'Flutter_atrial',
       'Bloqueio_atrioventricular_de_2°_grau_Mobitz_I',
       'Bloqueio_atrioventricular_de_2°_grau_Mobitz_II',
       'Bloqueio_atrioventricular_2:1', 'Bloqueio_atrioventricular_avançado',
       'Bloqueio_atrioventricular_total',
       'Pré-excitação_ventricular_tipo_Wolff-Parkinson-White',
       'Sistema_de_estimulação_cardíaca_normofuncionante',
       'Sistema_de_estimulação_cardíaca_com_disfunção',
       'Taquicardia_atrial_multifocal', 'Taquicardia_atrial',
       'Taquicardia_supraventricular', 'Corrente_de_lesão_subendocárdica',
       'Alterações_primárias_da_repolarização_ventricular',
       'Extrassístoles_supraventriculares', 'Extrassístoles_ventriculares',
       'Bradicardia_sinusal',
       'ECG_dentro_dos_limites_da_normalidade_para_idade_e_sexo',
       'Alterações_da_repolarização_ventricular_atribuídas_à_ação_digitálica',
       'Alterações_inespecíficas_da_repolarização_ventricular',
       'Alterações_secundárias_da_repolarização_ventricular',
       'Arritmia_sinusal',
       'Ausência_de_sinal_eletrocardiográfico_que_impede_a_análise',
       'Interferência_na_linha_de_base_que_não_impede_a_análise_do_ECG',
       'Ausência_de_sinal_eletrocardiográfico_que_não_impede_a_análise',
       'Traçado_com_qualidade_técnica_insuficiente',
       'Possível_inversão_de_posicionamento_de_eletrodos',
       'Baixa_voltagem_em_derivações_precordiais',
       'Baixa_voltagem_em_derivações_periféricas',
       'Bloqueio_atrioventricular_de_1°_grau',
       'Bloqueio_de_ramo_direito_e_bloqueio_divisional_posteroinferior_do_ramo_esquerdo',
       'Bloqueio_divisional_anterossuperior_do_ramo_esquerdo',
       'Bloqueio_divisional_posteroinferior_do_ramo_esquerdo',
       'Desvio_do_eixo_do_QRS_para_direita',
       'Desvio_do_eixo_do_QRS_para_esquerda',
       'Dissociação_atrioventricular_isorrítmica',
       'Distúrbio_de_condução_do_ramo_direito',
       'Distúrbio_de_condução_do_ramo_esquerdo', 'Intervalo_PR_curto',
       'Intervalo_QT_prolongado', 'Isquemia_subendocárdica',
       'Progressão_lenta_de_R_nas_derivações_precordiais', 'Pausa_sinusal',
       'Corrente_de_lesão_subepicárdica',
       'Corrente_de_lesão_subepicárdica_-_provável_infarto_agudo_do_miocárdio_com_supradesnivelamento_de_ST',
       'Repolarização_precoce', 'Ritmo_atrial_ectópico',
       'Ritmo_atrial_multifocal', 'Ritmo_idioventricular_acelerado',
       'Ritmo_juncional', 'Síndrome_de_Brugada', 'Sobrecarga_atrial_direita',
       'Sobrecarga_atrial_esquerda', 'Sobrecarga_biatrial',
       'Sobrecarga_biventricular', 'Sobrecarga_ventricular_direita',
       'Sobrecarga_ventricular_esquerda(_critérios_de_voltagem)',
       'Taquicardia_sinusal', 'Taquicardia_ventricular_não_sustentada',
       'Taquicardia_ventricular_sustentada',
       'Suspeita_de_Síndrome_de_Brugada_repetir_V1-V2_em_derivações_superiores',
       'Taquicardia_juncional', 'Batimento_de_escape_atrial',
       'Batimento_de_escape_supraventricular', 'Batimento_de_escape_juncional',
       'Batimento_de_escape_ventricular']

ordem_classes = ['área_eletricamente_inativa',
 'Bloqueio_de_ramo_direito',
 'Bloqueio_de_ramo_esquerdo',
 'Sobrecarga_ventricular_esquerda_(critérios_de_Romhilt-Estes)',
 'Fibrilação_atrial',
 'Flutter_atrial',
 'Bloqueio_atrioventricular_de_2°_grau_Mobitz_I',
 'Pré-excitação_ventricular_tipo_Wolff-Parkinson-White',
 'Sistema_de_estimulação_cardíaca_normofuncionante',
 'Taquicardia_atrial_multifocal',
 'Taquicardia_supraventricular',
 'Alterações_primárias_da_repolarização_ventricular',
 'Extrassístoles_supraventriculares',
 'Extrassístoles_ventriculares',
 'Bradicardia_sinusal',
 'ECG_dentro_dos_limites_da_normalidade_para_idade_e_sexo',
 'Alterações_inespecíficas_da_repolarização_ventricular',
 'Alterações_secundárias_da_repolarização_ventricular',
 'Arritmia_sinusal',
 'Ausência_de_sinal_eletrocardiográfico_que_impede_a_análise',
 'Possível_inversão_de_posicionamento_de_eletrodos',
 'Bloqueio_atrioventricular_de_1°_grau',
 'Bloqueio_divisional_anterossuperior_do_ramo_esquerdo',
 'Bloqueio_divisional_posteroinferior_do_ramo_esquerdo',
 'Desvio_do_eixo_do_QRS_para_direita',
 'Desvio_do_eixo_do_QRS_para_esquerda',
 'Distúrbio_de_condução_do_ramo_direito',
 'Distúrbio_de_condução_do_ramo_esquerdo',
 'Intervalo_PR_curto',
 'Intervalo_QT_prolongado',
 'Isquemia_subendocárdica',
 'Progressão_lenta_de_R_nas_derivações_precordiais',
 'Ritmo_atrial_ectópico',
 'Sobrecarga_atrial_esquerda',
 'Taquicardia_sinusal']
ordem_classes_eng = ""

In [5]:
ordem_classes = ["Chagas Disease",
"Schemic Cardiomyopathy",
"Valvular Heart Disease",
"Hypertrophic Cardiomyopathy",
"Congenic Cardiopatics",
"Long QT Syndrome",
"Brugada Syndrome",
"Idiopathic Ventricular Fibrillation",
"Arrhythmogenic Dysplasia of VD", 
"Idiopathic Cardiomyopathy"]
ordem_classes = np.array(pd.read_csv("../../data/marcapasso/dicionario.csv"))[:,1][:10]

In [6]:
ordem_classes

array(['chagas', 'miocardiopatia isquêmica', 'cardiopatia valvar',
       'cardiomiopatia hipertrófica', 'cardiopatia congênita',
       'síndrome do QT longo', 'síndrome de Brugada',
       'fibrilação ventricular idiopática',
       'displasia arritmogênica do VD', 'miocardiopatia idiopática'],
      dtype=object)

In [44]:
from datetime import date

today = date.today()
date = today.strftime("%d_%b_%Y")
import os, sys
if not os.path.exists("results"):
    os.mkdir("results")
if not os.path.exists("results/"+date):
    os.mkdir("results/"+date)
# if not os.path.exists("results/"+date+"/ROC"):
#     os.mkdir("results/"+date+"/ROC")

In [56]:
print("Calculating results...")
#Function to turn score in binary
#Here the thresholds must be a list of 10 numbers
def getMetrics(y_test, y_score, thresholds):
    
    #First we turn into binary
    y_bin = []
    for j in range(len(y_score)):
        ans = []
        for i in range(n_class):
            if y_score[j][i] >= thresholds[i]:
#             if y_label[j][i]:
                ans.append(1)
            else:
                ans.append(0)
        y_bin.append(np.array(ans))
    y_bin = np.array(y_bin)
    np.save("bin_tf_last.npy", y_bin)
    
    #Then we calculate
    target_names = ["(" + ordem_classes[i] + ") Class" + str(i) for i in range(n_class)]
    precision = dict()
    recall = dict()
    f1 = dict()
    sup = dict()
    for i in range(n_class):
        precision[i], recall[i], f1[i], sup[i] = precision_recall_fscore_support(y_test[:,i], y_bin[:,i], average = 'binary')
    return precision, recall, f1, sup

#z = 0.1
#while(z < 1):
precision, recall, f1, _ = getMetrics(y_test, y_score, limits)
#   z += 0.1
f1 = f1.items()
df = pd.DataFrame(columns = ["Class", "Precision", "Recall", "F1", "Ocurrences"])
for row in f1:
    n = row[0]
    sup = y_test[:,n].sum()
    new_row = {'Class': str(ordem_classes[n]), 'Precision': precision[n], 'Recall': recall[n], 'F1': row[1], "Ocurrences": sup}
    df = df.append(new_row, ignore_index = True)
df = df.set_index("Class")
df.to_csv("results/"+date+"/resultLastModelMP.csv")

Calculating results...


In [42]:
len(np.load("bin_tf.npy")[0])

35

In [50]:
model.evaluate(test_X, test_Y)



[0.14547136425971985, 0.4534973204135895]