In [1]:
import os
import tensorflow as tf
import tensorflow_addons as tfa

import numpy as np
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import multilabel_confusion_matrix

import keras
from keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import Constant
from tensorflow.lookup import KeyValueTensorInitializer, StaticHashTable
from tensorflow.keras.layers import Input, LSTM, Embedding, TimeDistributed, Dropout, Bidirectional, Dense, Layer, InputSpec
from tensorflow_addons.text import crf_log_likelihood, viterbi_decode, crf_decode

import tensorflow.keras.backend as K
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping


from seqeval.metrics import classification_report
from seqeval.scheme import IOB2
from seqeval.scheme import IOB1


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

 The versions of TensorFlow you are currently using is 2.10.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


# Model

In [2]:
def embedding_layer(input_dim, output_dim, input_length, mask_zero):
    return Embedding(input_dim = input_dim, output_dim = output_dim, input_length = input_length, mask_zero = mask_zero)

def bilstm_crf(maxlen, n_tags, lstm_units, embedding_dim, n_words, mask_zero, training = True):
    """
    bilstm_crf - module to build BiLSTM-CRF model
    Inputs:
        - input_shape : tuple
            Tensor shape of inputs, excluding batch size
    Outputs:
        - output : tensorflow.keras.outputs.output
            BiLSTM-CRF output
    """
    input = Input(shape = (maxlen,))
    # Embedding layer
    embeddings = embedding_layer(input_dim = n_words, output_dim = embedding_dim, input_length = maxlen, mask_zero = mask_zero)
    output = embeddings(input)

    # BiLSTM layer
    output = Bidirectional(LSTM(units = lstm_units, return_sequences = True, recurrent_dropout = 0.1))(output)

    # Dense layer
    output = TimeDistributed(Dense(n_tags, activation = 'relu'))(output)

    output = CRF(n_tags, name = 'crf_layer')(output)
    return Model(input, output)

In [3]:
class CRF(Layer):
    def __init__(self,
                 output_dim,
                 sparse_target=True,
                 transitions=None,
                 **kwargs):
        """
        Args:
            output_dim (int): the number of labels to tag each temporal input.
            sparse_target (bool): whether the the ground-truth label represented in one-hot.
        Input shape:
            (batch_size, sentence length, output_dim)
        Output shape:
            (batch_size, sentence length, output_dim)
        """
        super(CRF, self).__init__(**kwargs)
        self.output_dim = int(output_dim)
        self.sparse_target = sparse_target
        self.input_spec = InputSpec(min_ndim=3)
        self.supports_masking = False
        self.sequence_lengths = None
        self.transitions = transitions

    def build(self, input_shape):
        assert len(input_shape) == 3
        f_shape = tf.TensorShape(input_shape)
        input_spec = InputSpec(min_ndim=3, axes={-1: f_shape[-1]})

        if f_shape[-1] is None:
            raise ValueError('The last dimension of the inputs to `CRF` '
                             'should be defined. Found `None`.')
        if f_shape[-1] != self.output_dim:
            raise ValueError('The last dimension of the input shape must be equal to output'
                             ' shape. Use a linear layer if needed.')
        self.input_spec = input_spec
        self.transitions = self.add_weight(name='transitions',
                                           shape=[self.output_dim, self.output_dim],
                                           initializer='glorot_uniform',
                                           trainable=True)
        self.built = True

    def compute_mask(self, inputs, mask=None):
        # Just pass the received mask from previous layer, to the next layer or
        # manipulate it if this layer changes the shape of the input
        return mask

    def call(self, inputs, sequence_lengths=None, training=None, **kwargs):
        sequences = tf.convert_to_tensor(inputs, dtype=self.dtype)
        if sequence_lengths is not None:
            assert len(sequence_lengths.shape) == 2
            assert tf.convert_to_tensor(sequence_lengths).dtype == 'int32'
            seq_len_shape = tf.convert_to_tensor(sequence_lengths).get_shape().as_list()
            assert seq_len_shape[1] == 1
            self.sequence_lengths = K.flatten(sequence_lengths)
        else:
            self.sequence_lengths = tf.ones(tf.shape(inputs)[0], dtype=tf.int32) * (
                tf.shape(inputs)[1]
            )

        viterbi_sequence, _ = crf_decode(sequences,
                                         self.transitions,
                                         self.sequence_lengths)
        output = K.one_hot(viterbi_sequence, self.output_dim)
        return K.in_train_phase(sequences, output)

    @property
    def loss(self):
        def crf_loss(y_true, y_pred):
            y_pred = tf.convert_to_tensor(y_pred, dtype=self.dtype)
            log_likelihood, self.transitions = crf_log_likelihood(
                y_pred,
                tf.cast(K.argmax(y_true), dtype=tf.int32) if self.sparse_target else y_true,
                self.sequence_lengths,
                transition_params=self.transitions,
            )
            return tf.reduce_mean(-log_likelihood)
        return crf_loss

    @property
    def accuracy(self):
        def viterbi_accuracy(y_true, y_pred):
            # -1e10 to avoid zero at sum(mask)
            mask = K.cast(
                K.all(K.greater(y_pred, -1e10), axis=2), K.floatx())
            shape = tf.shape(y_pred)
            sequence_lengths = tf.ones(shape[0], dtype=tf.int32) * (shape[1])
            y_pred, _ = crf_decode(y_pred, self.transitions, sequence_lengths)
            if self.sparse_target:
                y_true = K.argmax(y_true, 2)
            y_pred = K.cast(y_pred, 'int32')
            y_true = K.cast(y_true, 'int32')
            corrects = K.cast(K.equal(y_true, y_pred), K.floatx())
            return K.sum(corrects * mask) / K.sum(mask)
        return viterbi_accuracy

    def compute_output_shape(self, input_shape):
        tf.TensorShape(input_shape).assert_has_rank(3)
        return input_shape[:2] + (self.output_dim,)


    def get_config(self):
        config = super(CRF, self).get_config()
        config.update({
            'output_dim': self.output_dim,
            'sparse_target': self.sparse_target,
            'transitions': self.transitions.numpy()  # Convert the transitions to a NumPy array
        })
        return config
    
    @classmethod
    def from_config(cls, config):
        # Since 'transitions' is a NumPy array, we need to convert it back to a tensor
        transitions = tf.convert_to_tensor(config['transitions'])
        # Create a new instance of CRF with the saved configuration
        return cls(output_dim=config['output_dim'], sparse_target=config['sparse_target'], transitions=transitions)

In [None]:
"""
def viterbi_accuracy(y_true, y_pred):
    mask = K.cast(K.all(K.greater(y_pred, -1e10), axis=2), K.floatx())
    shape = tf.shape(y_pred)
    sequence_lengths = tf.ones(shape[0], dtype=tf.int32) * (shape[1])
    y_pred, _ = crf_decode(y_pred, K.zeros_like(y_pred), sequence_lengths)
    if K.ndim(y_true) == K.ndim(y_pred) - 1:
        y_true = K.expand_dims(y_true, K.ndim(y_pred) - 1)
    y_pred = K.cast(y_pred, 'int32')
    y_true = K.cast(y_true, 'int32')
    corrects = K.cast(K.equal(y_true, y_pred), K.floatx())
    return K.sum(corrects * mask) / K.sum(mask)
    """


In [4]:
def viterbi_accuracy(y_true, y_pred):
    # -1e10 to avoid zero at sum(mask)
    mask = K.cast(
        K.all(K.greater(y_pred, -1e10), axis=2), K.floatx())
    shape = tf.shape(y_pred)
    sequence_lengths = tf.ones(shape[0], dtype=tf.int32) * (shape[1])
    y_pred, _ = crf_decode(y_pred, self.transitions, sequence_lengths)
    if self.sparse_target:
        y_true = K.argmax(y_true, 2)
    y_pred = K.cast(y_pred, 'int32')
    y_true = K.cast(y_true, 'int32')
    corrects = K.cast(K.equal(y_true, y_pred), K.floatx())
    return K.sum(corrects * mask) / K.sum(mask)

In [5]:
"""def crf_loss(y_true, y_pred):
    y_pred = tf.convert_to_tensor(y_pred, dtype=CRF(dtype='float32').dtype)
    log_likelihood, _ = tf.keras.layers.CRF(dtype='float32')(y_pred, y_true)
    return tf.reduce_mean(-log_likelihood)"""

In [5]:
def crf_loss(y_true, y_pred):
    y_pred = tf.convert_to_tensor(y_pred, dtype=self.dtype)
    log_likelihood, self.transitions = crf_log_likelihood(
        y_pred,
        tf.cast(K.argmax(y_true), dtype=tf.int32) if self.sparse_target else y_true,
        self.sequence_lengths,
        transition_params=self.transitions,
    )
    return tf.reduce_mean(-log_likelihood)

loaded_model = tf.keras.models.load_model('model_teste_load', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})

# Test Data

In [6]:
def to_tuples(data):
    iterator = zip(data["word"].values.tolist(),
                  data["tag"].values.tolist())
    return [(word, tag) for word, tag in iterator]

In [7]:
def build_vocab(data):
  all_words = list(set(data["word"].values))
  all_tags = list(set(data["tag"].values))

  word2index = {word: idx + 2 for idx, word in enumerate(all_words)}

  word2index["--UNKNOWN_WORD--"] = 0

  word2index["--PADDING--"] = 1

  index2word = {idx: word for word, idx in word2index.items()}

  tag2index = {tag: idx + 1 for idx, tag in enumerate(all_tags)}
  tag2index["--PADDING--"] = 0

  index2tag = {idx: word for word, idx in tag2index.items()}

  return word2index, index2word, tag2index, index2tag

In [8]:
def tokenize(reports, word2index, tag2index):
  contents = []
  labels = []
  for report in reports:
    content = []
    label = []
    for i in range(len(report)):
      word, tag = report[i]
      word_idx = word2index.get(word, 0)
      tag_idx = tag2index.get(tag, 0)
      content.append(word_idx)
      label.append(tag_idx)

    contents.append(content)
    labels.append(label)

  """
  padding the array with max_sentence_size
  pad_sequences(sequences, maxlen=None, dtype="int32", padding="pre", truncating="pre", value=0.0,):
  the maxlen argument if provided, or the length of the longest sequence in the list.
  """

  max_sentence_size = 512
  contents = tf.keras.preprocessing.sequence.pad_sequences(contents, maxlen=max_sentence_size, padding='post', value=1)
  labels = tf.keras.preprocessing.sequence.pad_sequences(labels, maxlen=max_sentence_size, padding='post')

  #max_sentence = len(contents[0])
  tag_size = len(tag2index)

  #print(max_sentence)

  labels_categorical = [tf.keras.utils.to_categorical(i, num_classes=tag_size) for i in labels]
  labels_categorical = np.asarray(labels_categorical)

  return contents, labels, labels_categorical, max_sentence_size

In [9]:
def number_to_word_test_sentences_and_tags(index2tag, index2word, X_test, y_test):

    test_sentences= []
    test_tags = []

    # Recupera os laudos e tags no formato word2index/tag2index
    for i in range(len(X_test)):
        aux_tag = []

        report = ""
        sentence = X_test[i]
        tags = y_test[i]

        # Recupera o laudo
        for j in range(len(sentence)):
            # Recupera a palavra
            word = sentence[j]
            # Recupera a tag
            tag = tags[j]
            """
            print(word)
            print(tag)
            """
            # A tag é predita em one-hot-enconding
            # int_tag é o inteiro que representa a tag
            # no dicionário index2tag
            int_tag = np.where(tag == int(1))
            """
            print(int_tag)
            """
            # Constrói o laudo ignorando as palavras "padding"
            # Constrói o array de tags do laudo
            if str(index2word[word]) != '--PADDING--':
                report = report + " " + str(index2word[word])
                aux_tag.append(index2tag[int(int_tag[0][0])])

        """
        print(report)
        print(aux_tag)
        """
        test_sentences.append(report)
        test_tags.append(aux_tag)

    return test_sentences, test_tags

In [11]:
def result_df_model_previous(test_sentences, test_tags, model, word2index, index2tag, MAX_SENTENCE):

    test_df = pd.DataFrame(columns = ['report', 'word', 'tag', 'tag_pred'])

    for i in range (len(test_sentences)):

        # Gera os laudos no formato index2word com o tamanho max_sentence

        #print("LAUDO " + str(i) + "____________________________________________________________________________________________")
        sentence = test_sentences[i]
        tags = test_tags[i]
        
        sentence = sentence.split()
        padded_sentence = sentence + [word2index["--PADDING--"]] * (MAX_SENTENCE - len(sentence))
        padded_sentence = [word2index.get(w, 0) for w in padded_sentence]

        # Faz a predição das tags das palavras
        pred = model.predict(np.array([padded_sentence]))
        pred = np.argmax(pred, axis=-1)

        #print(len(padded_sentence))
        #print(len(tags))
        #print(len(pred[0]))
        #print(pred[0])

        if i < 10:
            retval = ""
            for w, t, p in zip(sentence, tags, pred[0]):
                retval = retval + "{:25}: {:10} {:5}".format(w, t, index2tag[p]) + "\n"
                aux_dict = {'report': ('report_0' + str(i)), 'word': w, 'tag' : t, 'tag_pred' : index2tag[p]}
                df_new_row = pd.DataFrame([aux_dict])
                test_df = pd.concat([test_df, df_new_row])
                #test_df = test_df.append({'sentence': ('sentence_0' + str(i)), 'word': w, 'tag' : t, 'tag_pred' : index2tag[p]}, ignore_index = True)


        else:
            retval = ""
            for w, t, p in zip(sentence, tags, pred[0]):
                retval = retval + "{:25}: {:10} {:5}".format(w, t, index2tag[p]) + "\n"
                aux_dict = {'report': ('report_' + str(i)), 'word': w, 'tag' : t, 'tag_pred' : index2tag[p]}
                df_new_row = pd.DataFrame([aux_dict])
                test_df = pd.concat([test_df, df_new_row])
                #test_df = test_df.append({'sentence': ('sentence_0' + str(i)), 'word': w, 'tag' : t, 'tag_pred' : index2tag[p]}, ignore_index = True)

        #print(retval)

    return test_df

In [24]:
"""def result_df_model_previous(test_sentences_int ,test_sentences, test_tags, model, word2index, index2tag, MAX_SENTENCE):

    test_df = pd.DataFrame(columns = ['report', 'word', 'tag', 'tag_pred'])

    for i in range (len(test_sentences)):

        # Gera os laudos no formato index2word com o tamanho max_sentence

        #print("LAUDO " + str(i) + "____________________________________________________________________________________________")
        sentence = test_sentences[i]
        tags = test_tags[i]
        
        sentence = sentence.split()
        #padded_sentence = sentence + [word2index["--PADDING--"]] * (MAX_SENTENCE - len(sentence))
        #padded_sentence = [word2index.get(w, 0) for w in padded_sentence]

        # Faz a predição das tags das palavras
        #pred = model.predict(np.array([padded_sentence]))
        sentence_int = test_sentences_int[i]
        pred = model.predict(sentence_int )
        pred = np.argmax(pred, axis=-1)

        if i < 10:
            retval = ""
            for w, t, p in zip(sentence, tags, pred[0]):
                retval = retval + "{:25}: {:10} {:5}".format(w, t, index2tag[p]) + "\n"
                aux_dict = {'report': ('report_0' + str(i)), 'word': w, 'tag' : t, 'tag_pred' : index2tag[p]}
                df_new_row = pd.DataFrame([aux_dict])
                test_df = pd.concat([test_df, df_new_row])


        else:
            retval = ""
            for w, t, p in zip(sentence, tags, pred[0]):
                retval = retval + "{:25}: {:10} {:5}".format(w, t, index2tag[p]) + "\n"
                aux_dict = {'report': ('report_' + str(i)), 'word': w, 'tag' : t, 'tag_pred' : index2tag[p]}
                df_new_row = pd.DataFrame([aux_dict])
                test_df = pd.concat([test_df, df_new_row])

        #print(retval)

    return test_df"""

In [12]:
data = pd.read_csv('laudos_1_963_iob.csv', encoding= 'utf-8', index_col=0)

In [13]:
data_test = pd.read_csv('test_mod_df.csv', encoding= 'utf-8', index_col=0)

In [14]:
word2index, index2word, tag2index, index2tag = build_vocab(data)

In [15]:
reports = data_test.groupby("report").apply(to_tuples).tolist()

In [16]:
test_text_sequences, test_tag_sequences, test_tag_sequences_categorical, max_len = tokenize(reports, word2index, tag2index)

In [18]:
test_text_sequences

array([[4007, 3036, 4096, ...,    1,    1,    1],
       [4007, 3036, 4096, ...,    1,    1,    1],
       [4007, 3036, 4096, ...,    1,    1,    1],
       ...,
       [4007, 3036, 4096, ...,    1,    1,    1],
       [2775, 3015, 2267, ...,    1,    1,    1],
       [4007, 3036, 4096, ...,    1,    1,    1]])

In [19]:
test_sentences, test_tags = number_to_word_test_sentences_and_tags(index2tag, index2word, test_text_sequences, test_tag_sequences_categorical)

In [20]:
test_text_sequences[0]

array([4007, 3036, 4096, 2012, 1159, 2976,  771, 1459,  659, 3915, 2027,
       3622,  111, 4062, 1851, 1131,  526, 2329,  439, 2329, 1562,  864,
        815, 2148, 2267, 2291,  659,   95,  145, 1691, 3030, 2618, 1497,
         26, 3622, 4098,  824,  577, 2595, 1949,  119, 1331, 2267, 3217,
       2631, 1551, 1286, 3658, 3397, 1286, 3897,  779, 1286, 1537, 1691,
       4098,  824, 2595, 1949,  119, 1331, 2267, 3217, 2631, 3672, 1286,
       2601, 1691, 1467, 3389, 1694, 1427,  771, 3562, 1960, 1286, 1165,
       1286, 1694, 3389, 3343, 1765, 1286, 2495, 3134, 3776, 1723, 1286,
       2484, 1537, 1286, 1243, 1054,  659, 2000, 2733, 1286,  577, 1243,
       1289, 3931, 1851, 1623, 1286,  903, 1286, 1727,  709,  771, 3305,
       3220, 1691, 3820, 3296,  659, 2847, 2478,  216, 1286, 1474, 2876,
        864, 2161, 1691, 2725, 2156, 2078, 2714, 1691,  298, 1792, 3622,
       2440, 2368,  574, 1949, 3217,  887, 1551, 1286, 3658, 3891, 1286,
       3897,  779, 1286, 3380,  705, 2732, 1562,   

In [22]:
index2tag

{1: 'I-Tamanho',
 2: 'B-Localização',
 3: 'B-Calcificação',
 4: 'I-Bordas',
 5: 'B-Bordas',
 6: 'B-Achado',
 7: 'O',
 8: 'I-Calcificação',
 9: 'B-Atenuação',
 10: 'I-Localização',
 11: 'I-Achado',
 12: 'B-Tamanho',
 13: 'I-Atenuação',
 0: '--PADDING--'}

# Test

In [44]:
"""word2index, index2word, tag2index, index2tag = build_vocab(data)
reports = data.groupby("report").apply(to_tuples).tolist()
text_sequences, tag_sequences, tag_sequences_categorical, max_len = tokenize(reports, word2index, tag2index)
X_train, X_test, y_train, y_test = train_test_split(text_sequences, tag_sequences_categorical, test_size=0.1, random_state=1234)
test_sentences, test_tags = number_to_word_test_sentences_and_tags(index2tag, index2word, X_test, y_test)"""

# Load Models

In [15]:
print(os.getcwd())

c:\Users\tarci\OneDrive\Área de Trabalho\mestrado\bilstm_antigo


In [23]:
cur_dir = os.getcwd()

In [24]:
models_dir = cur_dir+'\\models\\'

In [26]:
loaded_model_00 = tf.keras.models.load_model(models_dir+'model_00', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
result_df_model_00 = result_df_model_previous(test_sentences, test_tags, loaded_model_00, word2index, index2tag, max_len)
result_df_model_00.to_csv("result_df_model_00.csv", encoding='utf-8')

ValueError: Unable to restore custom object of type _tf_keras_metric. Please make sure that any custom layers are included in the `custom_objects` arg when calling `load_model()` and make sure that all layers implement `get_config` and `from_config`.

In [19]:
loaded_model_00 = tf.keras.models.load_model(models_dir+'model_00', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_01 = tf.keras.models.load_model(models_dir+'model_01', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_02 = tf.keras.models.load_model(models_dir+'model_02', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_03 = tf.keras.models.load_model(models_dir+'model_03', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_04 = tf.keras.models.load_model(models_dir+'model_04', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_05 = tf.keras.models.load_model(models_dir+'model_05', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_06 = tf.keras.models.load_model(models_dir+'model_06', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_07 = tf.keras.models.load_model(models_dir+'model_07', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_08 = tf.keras.models.load_model(models_dir+'model_08', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_09 = tf.keras.models.load_model(models_dir+'model_09', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_10 = tf.keras.models.load_model(models_dir+'model_10', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_11 = tf.keras.models.load_model(models_dir+'model_11', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_12 = tf.keras.models.load_model(models_dir+'model_12', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_13 = tf.keras.models.load_model(models_dir+'model_13', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_14 = tf.keras.models.load_model(models_dir+'model_14', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_15 = tf.keras.models.load_model(models_dir+'model_15', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_16 = tf.keras.models.load_model(models_dir+'model_16', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_17 = tf.keras.models.load_model(models_dir+'model_17', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_18 = tf.keras.models.load_model(models_dir+'model_18', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_19 = tf.keras.models.load_model(models_dir+'model_19', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_20 = tf.keras.models.load_model(models_dir+'model_20', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_21 = tf.keras.models.load_model(models_dir+'model_21', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_22 = tf.keras.models.load_model(models_dir+'model_22', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_23 = tf.keras.models.load_model(models_dir+'model_23', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_24 = tf.keras.models.load_model(models_dir+'model_24', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_25 = tf.keras.models.load_model(models_dir+'model_25', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})
loaded_model_26 = tf.keras.models.load_model(models_dir+'model_26', custom_objects={'CRF': CRF, 'viterbi_accuracy': viterbi_accuracy, 'crf_loss': crf_loss})


# Result

In [49]:
result_df_model_00 = result_df_model_previous(test_sentences, test_tags, loaded_model_00, word2index, index2tag, max_len)
result_df_model_00.to_csv("result_df_model_00.csv", encoding='utf-8')

result_df_model_01 = result_df_model_previous(test_sentences, test_tags, loaded_model_01, word2index, index2tag, max_len)
result_df_model_01.to_csv("result_df_model_01.csv", encoding='utf-8')

result_df_model_02 = result_df_model_previous(test_sentences, test_tags, loaded_model_02, word2index, index2tag, max_len)
result_df_model_02.to_csv("result_df_model_02.csv", encoding='utf-8')

result_df_model_03 = result_df_model_previous(test_sentences, test_tags, loaded_model_03, word2index, index2tag, max_len)
result_df_model_03.to_csv("result_df_model_03.csv", encoding='utf-8')

result_df_model_04 = result_df_model_previous(test_sentences, test_tags, loaded_model_04, word2index, index2tag, max_len)
result_df_model_04.to_csv("result_df_model_04.csv", encoding='utf-8')

result_df_model_05 = result_df_model_previous(test_sentences, test_tags, loaded_model_05, word2index, index2tag, max_len)
result_df_model_05.to_csv("result_df_model_05.csv", encoding='utf-8')

result_df_model_06 = result_df_model_previous(test_sentences, test_tags, loaded_model_06, word2index, index2tag, max_len)
result_df_model_06.to_csv("result_df_model_06.csv", encoding='utf-8')

result_df_model_07 = result_df_model_previous(test_sentences, test_tags, loaded_model_07, word2index, index2tag, max_len)
result_df_model_07.to_csv("result_df_model_07.csv", encoding='utf-8')

result_df_model_08 = result_df_model_previous(test_sentences, test_tags, loaded_model_08, word2index, index2tag, max_len)
result_df_model_08.to_csv("result_df_model_08.csv", encoding='utf-8')

result_df_model_09 = result_df_model_previous(test_sentences, test_tags, loaded_model_09, word2index, index2tag, max_len)
result_df_model_09.to_csv("result_df_model_09.csv", encoding='utf-8')

result_df_model_10 = result_df_model_previous(test_sentences, test_tags, loaded_model_10, word2index, index2tag, max_len)
result_df_model_10.to_csv("result_df_model_10.csv", encoding='utf-8')

result_df_model_11 = result_df_model_previous(test_sentences, test_tags, loaded_model_11, word2index, index2tag, max_len)
result_df_model_11.to_csv("result_df_model_11.csv", encoding='utf-8')

result_df_model_12 = result_df_model_previous(test_sentences, test_tags, loaded_model_12, word2index, index2tag, max_len)
result_df_model_12.to_csv("result_df_model_12.csv", encoding='utf-8')

result_df_model_13 = result_df_model_previous(test_sentences, test_tags, loaded_model_13, word2index, index2tag, max_len)
result_df_model_13.to_csv("result_df_model_13.csv", encoding='utf-8')

result_df_model_14 = result_df_model_previous(test_sentences, test_tags, loaded_model_14, word2index, index2tag, max_len)
result_df_model_14.to_csv("result_df_model_14.csv", encoding='utf-8')

result_df_model_15 = result_df_model_previous(test_sentences, test_tags, loaded_model_15, word2index, index2tag, max_len)
result_df_model_15.to_csv("result_df_model_15.csv", encoding='utf-8')

result_df_model_16 = result_df_model_previous(test_sentences, test_tags, loaded_model_16, word2index, index2tag, max_len)
result_df_model_16.to_csv("result_df_model_16.csv", encoding='utf-8')

result_df_model_17 = result_df_model_previous(test_sentences, test_tags, loaded_model_17, word2index, index2tag, max_len)
result_df_model_17.to_csv("result_df_model_17.csv", encoding='utf-8')

result_df_model_18 = result_df_model_previous(test_sentences, test_tags, loaded_model_18, word2index, index2tag, max_len)
result_df_model_18.to_csv("result_df_model_18.csv", encoding='utf-8')

result_df_model_19 = result_df_model_previous(test_sentences, test_tags, loaded_model_19, word2index, index2tag, max_len)
result_df_model_19.to_csv("result_df_model_19.csv", encoding='utf-8')

result_df_model_20 = result_df_model_previous(test_sentences, test_tags, loaded_model_20, word2index, index2tag, max_len)
result_df_model_20.to_csv("result_df_model_20.csv", encoding='utf-8')

result_df_model_21 = result_df_model_previous(test_sentences, test_tags, loaded_model_21, word2index, index2tag, max_len)
result_df_model_21.to_csv("result_df_model_21.csv", encoding='utf-8')

result_df_model_22 = result_df_model_previous(test_sentences, test_tags, loaded_model_22, word2index, index2tag, max_len)
result_df_model_22.to_csv("result_df_model_22.csv", encoding='utf-8')

result_df_model_23 = result_df_model_previous(test_sentences, test_tags, loaded_model_23, word2index, index2tag, max_len)
result_df_model_23.to_csv("result_df_model_23.csv", encoding='utf-8')

result_df_model_24 = result_df_model_previous(test_sentences, test_tags, loaded_model_24, word2index, index2tag, max_len)
result_df_model_24.to_csv("result_df_model_24.csv", encoding='utf-8')

result_df_model_25 = result_df_model_previous(test_sentences, test_tags, loaded_model_25, word2index, index2tag, max_len)
result_df_model_25.to_csv("result_df_model_25.csv", encoding='utf-8')

result_df_model_26 = result_df_model_previous(test_sentences, test_tags, loaded_model_26, word2index, index2tag, max_len)
result_df_model_26.to_csv("result_df_model_26.csv", encoding='utf-8')



KeyboardInterrupt: 

# Seq Eval

In [None]:
model_name = []
results_by_model = []
results_by_model_by_tag = []

for filename in filenames:
    f = os.path.join(current_directory, filename)
    # checking if it is a file
    if os.path.isfile(f):
        print(f)

    # Transformar os dados
    data = pd.read_csv(f, index_col=0)

    # Converter as colunas para listas
    sentences = data.groupby("report").apply(to_tuples).tolist()
    texts, tags, tags_pred = tuple_2_list(sentences)
    
    print()
    print()
    print(filename)
    result_dict = classification_report(tags, tags_pred, mode="strict", scheme=IOB2, zero_division=False)
    print(result_dict)
    print()
    print()
