<a href="https://colab.research.google.com/github/rllima/IF704/blob/main/entity_recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#http://alexminnaar.com/2019/08/22/ner-rnns-tensorflow.html
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior() 


import numpy as np
import os
import time
import pandas as pd

print(tf.__version__)

Instructions for updating:
non-resource variables are not supported in the long term
2.6.0


In [2]:
!wget https://raw.githubusercontent.com/rllima/IF704/main/data/train.csv
!wget https://raw.githubusercontent.com/rllima/IF704/main/data/test.csv

--2021-08-18 20:45:22--  https://raw.githubusercontent.com/rllima/IF704/main/data/train.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 354161 (346K) [text/plain]
Saving to: ‘train.csv.2’


2021-08-18 20:45:22 (95.7 MB/s) - ‘train.csv.2’ saved [354161/354161]

--2021-08-18 20:45:22--  https://raw.githubusercontent.com/rllima/IF704/main/data/test.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 568765 (555K) [text/plain]
Saving to: ‘test.csv.2’


2021-08-18 20:45:22 (125 MB/s) - ‘test.csv.2’ saved [568765/568765]



In [3]:
from collections import defaultdict


In [4]:
def read_data(file_path):
    tokens = []
    tags = []
    
    tweet_tokens = []
    tweet_tags = []
    for line in open(file_path, encoding='utf-8'):
        line = line.strip()
        if not line:
            if tweet_tokens:
                tokens.append(tweet_tokens)
                tags.append(tweet_tags)
            tweet_tokens = []
            tweet_tags = []
        else:
            token, tag = line.split()
            # Replace all urls with <URL> token
            # Replace all users with <USR> token
            if token.find('http://') == 0 or token.find('https://') == 0:
                token = '<URL>'
            if token[0] == '@':
                token = '<USR>'
            
            tweet_tokens.append(token)
            tweet_tags.append(tag)
            
    return tokens, tags

Prepare dictionaries

To train a neural network, we will use two mappings:

{token} →
→
 {token id}: address the row in embeddings matrix for the current token;
{tag} →
→
 {tag id}: one-hot ground truth probability distribution vectors for computing the loss at the output of the network.
Now you need to implement the function build_dict which will return {token or tag} →
→
 {index} and vice versa.

In [5]:
def build_dict(tokens_or_tags, special_tokens):
    """
        tokens_or_tags: a list of lists of tokens or tags
        special_tokens: some special tokens
    """
    # Create a dictionary with default value 0
    tok2idx = defaultdict(lambda: 0)
    idx2tok = []
    
    # Create mappings from tokens (or tags) to indices and vice versa.
    # Add special tokens (or tags) to the dictionaries.
    # The first special token must have index 0.
    
    # Mapping tok2idx should contain each token or tag only once. 
    # To do so, you should extract unique tokens/tags from the tokens_or_tags variable
    # and then index them (for example, you can add them into the list idx2tok
    # and for each token/tag save the index into tok2idx).

    for twt in tokens_or_tags:
        for tok in twt:
            idx2tok.append(tok)
    idx2tok = list(set(idx2tok))
    idx2tok = special_tokens + idx2tok
    for i, v in enumerate(idx2tok):
        tok2idx[v] = i
    
    return tok2idx, idx2tok

In [6]:
train_tokens, train_tags = read_data('train.csv')
test_tokens, test_tags = read_data('test.csv')

In [7]:
special_tokens = ['<UNK>', '<PAD>']
special_tags = ['O']

# Create dictionaries 
token2idx, idx2token = build_dict(train_tokens, special_tokens)
tag2idx, idx2tag = build_dict(train_tags, special_tags)

In [8]:
len(set(idx2token)) == len(idx2token)

True

In [9]:
def words2idxs(tokens_list):
    return [token2idx[word] for word in tokens_list]

def tags2idxs(tags_list):
    return [tag2idx[tag] for tag in tags_list]

def idxs2words(idxs):
    return [idx2token[idx] for idx in idxs]

def idxs2tags(idxs):
    return [idx2tag[idx] for idx in idxs]

In [10]:
def batches_generator(batch_size, tokens, tags,
                      shuffle=True, allow_smaller_last_batch=True):
    """Generates padded batches of tokens and tags."""
    
    n_samples = len(tokens)
    if shuffle:
        order = np.random.permutation(n_samples)
    else:
        order = np.arange(n_samples)

    n_batches = n_samples // batch_size
    if allow_smaller_last_batch and n_samples % batch_size:
        n_batches += 1

    for k in range(n_batches):
        batch_start = k * batch_size
        batch_end = min((k + 1) * batch_size, n_samples)
        current_batch_size = batch_end - batch_start
        x_list = []
        y_list = []
        max_len_token = 0
        for idx in order[batch_start: batch_end]:
            x_list.append(words2idxs(tokens[idx]))
            y_list.append(tags2idxs(tags[idx]))
            max_len_token = max(max_len_token, len(tags[idx]))
            
        # Fill in the data into numpy nd-arrays filled with padding indices.
        x = np.ones([current_batch_size, max_len_token], dtype=np.int32) * token2idx['<PAD>']
        y = np.ones([current_batch_size, max_len_token], dtype=np.int32) * tag2idx['O']
        lengths = np.zeros(current_batch_size, dtype=np.int32)
        for n in range(current_batch_size):
            utt_len = len(x_list[n])
            x[n, :utt_len] = x_list[n]
            lengths[n] = utt_len
            y[n, :utt_len] = y_list[n]
        yield x, y, lengths

In [11]:
class BiLSTMModel():
    pass

In [12]:
def declare_placeholders(self):
    """Specifies placeholders for the model."""

    # Placeholders for input and ground truth output.
    self.input_batch = tf.placeholder(dtype=tf.int32, shape=[None, None], name='input_batch') 
    self.ground_truth_tags = tf.placeholder(dtype=tf.int32, shape=[None, None], name='ground_truth_tags') 
  
    # Placeholder for lengths of the sequences.
    self.lengths = tf.placeholder(dtype=tf.int32, shape=[None], name='lengths') 
    
    # Placeholder for a dropout keep probability. If we don't feed
    # a value for this placeholder, it will be equal to 1.0.
    self.dropout_ph = tf.placeholder_with_default(tf.cast(1.0, tf.float32), shape=[])
    
    # Placeholder for a learning rate (tf.float32).
    self.learning_rate_ph = tf.placeholder(dtype=tf.float32, shape=[])

In [13]:
BiLSTMModel.__declare_placeholders = classmethod(declare_placeholders)

Now, let us specify the layers of the neural network. 
First, we need to perform some preparatory steps:

Create embeddings matrix with tf.Variable. 
Specify its name (embeddings_matrix), type (tf.float32), and initialize with random values.
Create forward and backward LSTM cells. TensorFlow provides a number of RNN cells ready for you.
Wrap your cells with DropoutWrapper. Dropout is an important regularization technique for neural networks. Specify all keep probabilities using the dropout placeholder that we created before.
After that, you can build the computation graph that transforms an input_batch:

Look up embeddings for an input_batch in the prepared embedding_matrix.
Pass the embeddings through Bidirectional Dynamic RNN with the specified forward and backward cells. Use the lengths placeholder here to avoid computations for padding tokens inside the RNN.
Create a dense layer on top. Its output will be used directly in loss function.
Fill in the code below. In case you need to debug something, the easiest way is to check that tensor shapes of each step match the expected ones.

In [14]:
def build_layers(self, vocabulary_size, embedding_dim, n_hidden_rnn, n_tags):
    """Specifies bi-LSTM architecture and computes logits for inputs."""
    
    # Create embedding variable (tf.Variable) with dtype tf.float32
    initial_embedding_matrix = np.random.randn(vocabulary_size, embedding_dim) / np.sqrt(embedding_dim)
    embedding_matrix_variable = tf.Variable(initial_embedding_matrix, dtype=tf.float32)
    ######### YOUR CODE HERE #############
    
    # Create RNN cells (for example, tf.nn.rnn_cell.BasicLSTMCell) with n_hidden_rnn number of units 
    # and dropout (tf.nn.rnn_cell.DropoutWrapper), initializing all *_keep_prob with dropout placeholder.
    forward_cell = tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.BasicLSTMCell(n_hidden_rnn),
                                                input_keep_prob=self.dropout_ph,
                                                output_keep_prob=self.dropout_ph,
                                                state_keep_prob=self.dropout_ph)
    ######### YOUR CODE HERE #############
    backward_cell = tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.BasicLSTMCell(n_hidden_rnn),
                                                input_keep_prob=self.dropout_ph,
                                                output_keep_prob=self.dropout_ph,
                                                state_keep_prob=self.dropout_ph)
    ######### YOUR CODE HERE #############

    # Look up embeddings for self.input_batch (tf.nn.embedding_lookup).
    # Shape: [batch_size, sequence_len, embedding_dim].
    embeddings =  tf.nn.embedding_lookup(embedding_matrix_variable, self.input_batch)
    ######### YOUR CODE HERE #############
    
    # Pass them through Bidirectional Dynamic RNN (tf.nn.bidirectional_dynamic_rnn).
    # Shape: [batch_size, sequence_len, 2 * n_hidden_rnn]. 
    # Also don't forget to initialize sequence_length as self.lengths and dtype as tf.float32.
    (rnn_output_fw, rnn_output_bw), _ =  tf.nn.bidirectional_dynamic_rnn(
    forward_cell,
    backward_cell,
    embeddings,
    dtype=tf.float32,
    sequence_length=self.lengths)
    ######### YOUR CODE HERE #############
    rnn_output = tf.concat([rnn_output_fw, rnn_output_bw], axis=2)

    # Dense layer on top.
    # Shape: [batch_size, sequence_len, n_tags].   
    self.logits = tf.layers.dense(rnn_output, n_tags, activation=None)

In [15]:
BiLSTMModel.__build_layers = classmethod(build_layers)

To compute the actual predictions of the neural network, you need to apply softmax to the last layer and find the most probable tags with argmax.

In [16]:
def compute_predictions(self):
    """Transforms logits to probabilities and finds the most probable tags."""
    
    # Create softmax (tf.nn.softmax) function
    softmax_output = tf.nn.softmax(self.logits)
    
    # Use argmax (tf.argmax) to get the most probable tags
    # Don't forget to set axis=-1
    # otherwise argmax will be calculated in a wrong way
    self.predictions = tf.argmax(softmax_output, axis = -1)

In [17]:
BiLSTMModel.__compute_predictions = classmethod(compute_predictions)

During training we do not need predictions of the network, but we need a loss function. We will use cross-entropy loss, efficiently implemented in TF as cross entropy with logits. Note that it should be applied to logits of the model (not to softmax probabilities!). Also note, that we do not want to take into account loss terms coming from <PAD> tokens. So we need to mask them out, before computing mean.

In [18]:
def compute_loss(self, n_tags, PAD_index):
    """Computes masked cross-entopy loss with logits."""
    
    # Create cross entropy function function (tf.nn.softmax_cross_entropy_with_logits)
    ground_truth_tags_one_hot = tf.one_hot(self.ground_truth_tags, n_tags)
    loss_tensor =  tf.nn.softmax_cross_entropy_with_logits(labels=ground_truth_tags_one_hot,
                                                           logits=self.logits)
    
    mask = tf.cast(tf.not_equal(self.input_batch, PAD_index), tf.float32)
    # Create loss function which doesn't operate with <PAD> tokens (tf.reduce_mean)
    mask = tf.cast(tf.not_equal(loss_tensor, PAD_index), tf.float32)
    self.loss =  tf.reduce_mean(mask*loss_tensor)

In [19]:
BiLSTMModel.__compute_loss = classmethod(compute_loss)

The last thing to specify is how we want to optimize the loss. We suggest that you use Adam optimizer with a learning rate from the corresponding placeholder. You will also need to apply clipping to eliminate exploding gradients. It can be easily done with clip_by_norm function.

In [20]:
def perform_optimization(self):
    """Specifies the optimizer and train_op for the model."""
    
    # Create an optimizer (tf.train.AdamOptimizer)
    self.optimizer =  tf.train.AdamOptimizer(learning_rate=self.learning_rate_ph)
    ######### YOUR CODE HERE #############
    self.grads_and_vars = self.optimizer.compute_gradients(self.loss)
    
    # Gradient clipping (tf.clip_by_norm) for self.grads_and_vars
    # Pay attention that you need to apply this operation only for gradients 
    # because self.grads_and_vars contains also variables.
    # list comprehension might be useful in this case.
    clip_norm = tf.cast(1.0, tf.float32)
    self.grads_and_vars = [(tf.clip_by_norm(g, clip_norm), v) for g,v in self.grads_and_vars]
    
    self.train_op = self.optimizer.apply_gradients(self.grads_and_vars)

In [21]:
BiLSTMModel.__perform_optimization = classmethod(perform_optimization)

In [22]:
def init_model(self, vocabulary_size, n_tags, embedding_dim, n_hidden_rnn, PAD_index):
    self.__declare_placeholders()
    self.__build_layers(vocabulary_size, embedding_dim, n_hidden_rnn, n_tags)
    self.__compute_predictions()
    self.__compute_loss(n_tags, PAD_index)
    self.__perform_optimization()

In [23]:
BiLSTMModel.__init__ = classmethod(init_model)

Train the network and predict tags
Session.run is a point which initiates computations in the graph that we have defined. To train the network, we need to compute self.train_op, which was declared in perform_optimization. To predict tags, we just need to compute self.predictions. Anyway, we need to feed actual data through the placeholders that we defined before.

In [24]:
def train_on_batch(self, session, x_batch, y_batch, lengths, learning_rate, dropout_keep_probability):
    feed_dict = {self.input_batch: x_batch,
                 self.ground_truth_tags: y_batch,
                 self.learning_rate_ph: learning_rate,
                 self.dropout_ph: dropout_keep_probability,
                 self.lengths: lengths}
    
    session.run(self.train_op, feed_dict=feed_dict)

In [25]:
BiLSTMModel.train_on_batch = classmethod(train_on_batch)

In [26]:
def predict_for_batch(self, session, x_batch, lengths):
    feed_dict = {self.input_batch: x_batch,
                self.lengths: lengths}
    predictions = session.run(self.predictions, feed_dict=feed_dict)
    return predictions

In [27]:
BiLSTMModel.predict_for_batch = classmethod(predict_for_batch)

In [28]:
def predict_tags(model, session, token_idxs_batch, lengths):
    """Performs predictions and transforms indices to tokens and tags."""
    
    tag_idxs_batch = model.predict_for_batch(session, token_idxs_batch, lengths)
    
    tags_batch, tokens_batch = [], []
    for tag_idxs, token_idxs in zip(tag_idxs_batch, token_idxs_batch):
        tags, tokens = [], []
        for tag_idx, token_idx in zip(tag_idxs, token_idxs):
            tags.append(idx2tag[tag_idx])
            tokens.append(idx2token[token_idx])
        tags_batch.append(tags)
        tokens_batch.append(tokens)
    return tags_batch, tokens_batch

In [40]:
from collections import OrderedDict

def _update_chunk(candidate, prev, current_tag, current_chunk, current_pos, prediction=False):
    if candidate == 'B-' + current_tag:
        if len(current_chunk) > 0 and len(current_chunk[-1]) == 1:
                current_chunk[-1].append(current_pos - 1)
        current_chunk.append([current_pos])
    elif candidate == 'I-' + current_tag:
        if prediction and (current_pos == 0 or current_pos > 0 and prev.split('-', 1)[-1] != current_tag):
            current_chunk.append([current_pos])
        if not prediction and (current_pos == 0 or current_pos > 0 and prev == 'O'):
            current_chunk.append([current_pos])
    elif current_pos > 0 and prev.split('-', 1)[-1] == current_tag:
        if len(current_chunk) > 0:
            current_chunk[-1].append(current_pos - 1)

def _update_last_chunk(current_chunk, current_pos):
    if len(current_chunk) > 0 and len(current_chunk[-1]) == 1:
        current_chunk[-1].append(current_pos - 1)

def _tag_precision_recall_f1(tp, fp, fn):
    precision, recall, f1 = 0, 0, 0
    if tp + fp > 0:
        precision = tp / (tp + fp) * 100
    if tp + fn > 0:
        recall = tp / (tp + fn) * 100
    if precision + recall > 0:
        f1 = 2 * precision * recall / (precision + recall)
    return precision, recall, f1

def _aggregate_metrics(results, total_correct):
    total_true_entities = 0
    total_predicted_entities = 0
    total_precision = 0
    total_recall = 0
    total_f1 = 0
    for tag, tag_metrics in results.items():
        n_pred = tag_metrics['n_predicted_entities']
        n_true = tag_metrics['n_true_entities']
        total_true_entities += n_true
        total_predicted_entities += n_pred
        total_precision += tag_metrics['precision'] * n_pred
        total_recall += tag_metrics['recall'] * n_true
    
    accuracy = 0
    if total_true_entities > 0:
        accuracy = total_correct / total_true_entities * 100
    else:
        print('CAUTION! Accuracy equals zero because there are no '\
              'correct entities. Check the correctness of your data.')
    if total_predicted_entities > 0:
        total_precision = total_precision / total_predicted_entities
    total_recall = total_recall / total_true_entities
    if total_precision + total_recall > 0:
        total_f1 = 2 * total_precision * total_recall / (total_precision + total_recall)
    return total_true_entities, total_predicted_entities, \
           total_precision, total_recall, total_f1, accuracy

def _print_info(n_tokens, total_true_entities, total_predicted_entities, total_correct):
    print('processed {len} tokens ' \
          'with {tot_true} phrases; ' \
          'found: {tot_pred} phrases; ' \
          'correct: {tot_cor}.\n'.format(len=n_tokens,
                                         tot_true=total_true_entities,
                                         tot_pred=total_predicted_entities,
                                         tot_cor=total_correct))

def _print_metrics(accuracy, total_precision, total_recall, total_f1):
    print('precision:  {tot_prec:.2f}%; ' \
          'recall:  {tot_recall:.2f}%; ' \
          'F1:  {tot_f1:.2f}\n'.format(acc=accuracy,
                                           tot_prec=total_precision,
                                           tot_recall=total_recall,
                                           tot_f1=total_f1))

def _print_tag_metrics(tag, tag_results):
    print(('\t%12s' % tag) + ': precision:  {tot_prec:6.2f}%; ' \
                               'recall:  {tot_recall:6.2f}%; ' \
                               'F1:  {tot_f1:6.2f}; ' \
                               'predicted:  {tot_predicted:4d}\n'.format(tot_prec=tag_results['precision'],
                                                                         tot_recall=tag_results['recall'],
                                                                         tot_f1=tag_results['f1'],
                                                                         tot_predicted=tag_results['n_predicted_entities']))

def precision_recall_f1(y_true, y_pred, print_results=True, short_report=False):
    # Find all tags
    tags = sorted(set(tag[2:] for tag in y_true + y_pred if tag != 'O'))

    results = OrderedDict((tag, OrderedDict()) for tag in tags)
    n_tokens = len(y_true)
    total_correct = 0

    # For eval_conll_try we find all chunks in the ground truth and prediction
    # For each chunk we store starting and ending indices
    for tag in tags:
        true_chunk = list()
        predicted_chunk = list()
        for position in range(n_tokens):
            _update_chunk(y_true[position], y_true[position - 1], tag, true_chunk, position)
            _update_chunk(y_pred[position], y_pred[position - 1], tag, predicted_chunk, position, True)

        _update_last_chunk(true_chunk, position)
        _update_last_chunk(predicted_chunk, position)

        # Then we find all correctly classified intervals
        # True positive results
        tp = sum(chunk in predicted_chunk for chunk in true_chunk)
        total_correct += tp

        # And then just calculate errors of the first and second kind
        # False negative
        fn = len(true_chunk) - tp
        # False positive
        fp = len(predicted_chunk) - tp
        precision, recall, f1 = _tag_precision_recall_f1(tp, fp, fn)

        results[tag]['precision'] = precision
        results[tag]['recall'] = recall
        results[tag]['f1'] = f1
        results[tag]['n_predicted_entities'] = len(predicted_chunk)
        results[tag]['n_true_entities'] = len(true_chunk)

    total_true_entities, total_predicted_entities, \
           total_precision, total_recall, total_f1, accuracy = _aggregate_metrics(results, total_correct)

    if print_results:
      print("Entrei")
      _print_info(n_tokens, total_true_entities, total_predicted_entities, total_correct)
      _print_metrics(accuracy, total_precision, total_recall, total_f1)

      if not short_report:
          for tag, tag_results in results.items():
              _print_tag_metrics(tag, tag_results)
    return results

In [42]:
def eval_conll(model, session, tokens, tags, short_report=True):
    """Computes NER quality measures using CONLL shared task script."""
    
    y_true, y_pred = [], []
    for x_batch, y_batch, lengths in batches_generator(1, tokens, tags):
        tags_batch, tokens_batch = predict_tags(model, session, x_batch, lengths)
        if len(x_batch[0]) != len(tags_batch[0]):
            raise Exception("Incorrect length of prediction for the input, "
                            "expected length: %i, got: %i" % (len(x_batch[0]), len(tags_batch[0])))
        predicted_tags = []
        ground_truth_tags = []
        for gt_tag_idx, pred_tag, token in zip(y_batch[0], tags_batch[0], tokens_batch[0]): 
            if token != '<PAD>':
                ground_truth_tags.append(idx2tag[gt_tag_idx])
                predicted_tags.append(pred_tag)

        # We extend every prediction and ground truth sequence with 'O' tag
        # to indicate a possible end of entity.
        y_true.extend(ground_truth_tags + ['O'])
        y_pred.extend(predicted_tags + ['O'])
        
    results = precision_recall_f1(y_true, y_pred, print_results=True, short_report=short_report)
    return results

In [31]:


model = BiLSTMModel(vocabulary_size=len(token2idx), n_tags=len(tag2idx), embedding_dim=200, n_hidden_rnn=200,PAD_index=token2idx['<PAD>'])

batch_size = 32
n_epochs = 4
learning_rate = .005
learning_rate_decay = 2**(.5)
dropout_keep_probability = .5

Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor




Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.





In [43]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

print('Start training... \n')
for epoch in range(n_epochs):
    # For each epoch evaluate the model on train and validation data
    print('-' * 20 + ' Epoch {} '.format(epoch+1) + 'of {} '.format(n_epochs) + '-' * 20)
    print('Train data evaluation:')
    eval_conll(model, sess, train_tokens, train_tags, short_report=True)
    
    # Train the model
    for x_batch, y_batch, lengths in batches_generator(batch_size, train_tokens, train_tags):
        model.train_on_batch(sess, x_batch, y_batch, lengths, learning_rate, dropout_keep_probability)
        
    # Decaying the learning rate
    learning_rate = learning_rate / learning_rate_decay
    
print('...training finished.')

Start training... 

-------------------- Epoch 1 of 4 --------------------
Train data evaluation:
Entrei
processed 48855 tokens with 1496 phrases; found: 32288 phrases; correct: 52.

precision:  0.16%; recall:  3.48%; F1:  0.31

-------------------- Epoch 2 of 4 --------------------
Train data evaluation:
Entrei
processed 48855 tokens with 1496 phrases; found: 3 phrases; correct: 0.

precision:  0.00%; recall:  0.00%; F1:  0.00

-------------------- Epoch 3 of 4 --------------------
Train data evaluation:
Entrei
processed 48855 tokens with 1496 phrases; found: 0 phrases; correct: 0.

precision:  0.00%; recall:  0.00%; F1:  0.00

-------------------- Epoch 4 of 4 --------------------
Train data evaluation:
Entrei
processed 48855 tokens with 1496 phrases; found: 12 phrases; correct: 0.

precision:  0.00%; recall:  0.00%; F1:  0.00

...training finished.


In [44]:
print('-' * 20 + ' Train set quality: ' + '-' * 20)
train_results = eval_conll(model, sess, train_tokens, train_tags, short_report=False)


-------------------- Train set quality: --------------------
Entrei
processed 48855 tokens with 1496 phrases; found: 186 phrases; correct: 7.

precision:  3.76%; recall:  0.47%; F1:  0.83

	     company: precision:    0.00%; recall:    0.00%; F1:    0.00; predicted:     0

	    facility: precision:    0.00%; recall:    0.00%; F1:    0.00; predicted:     0

	     geo-loc: precision:   25.00%; recall:    0.36%; F1:    0.71; predicted:     4

	       movie: precision:    0.00%; recall:    0.00%; F1:    0.00; predicted:     0

	 musicartist: precision:    0.00%; recall:    0.00%; F1:    0.00; predicted:     0

	       other: precision:    0.00%; recall:    0.00%; F1:    0.00; predicted:     2

	      person: precision:    3.33%; recall:    1.34%; F1:    1.91; predicted:   180

	     product: precision:    0.00%; recall:    0.00%; F1:    0.00; predicted:     0

	  sportsteam: precision:    0.00%; recall:    0.00%; F1:    0.00; predicted:     0

	      tvshow: precision:    0.00%; recall:   

In [45]:
print('-' * 20 + ' Test set quality: ' + '-' * 20)
test_results = eval_conll(model, sess, test_tokens, test_tags, short_report=False)

-------------------- Test set quality: --------------------
Entrei
processed 65745 tokens with 3473 phrases; found: 97 phrases; correct: 2.

precision:  2.06%; recall:  0.06%; F1:  0.11

	     company: precision:    0.00%; recall:    0.00%; F1:    0.00; predicted:     0

	    facility: precision:    0.00%; recall:    0.00%; F1:    0.00; predicted:     0

	     geo-loc: precision:    0.00%; recall:    0.00%; F1:    0.00; predicted:     3

	       movie: precision:    0.00%; recall:    0.00%; F1:    0.00; predicted:     0

	 musicartist: precision:    0.00%; recall:    0.00%; F1:    0.00; predicted:     0

	       other: precision:    0.00%; recall:    0.00%; F1:    0.00; predicted:     0

	      person: precision:    2.13%; recall:    0.41%; F1:    0.69; predicted:    94

	     product: precision:    0.00%; recall:    0.00%; F1:    0.00; predicted:     0

	  sportsteam: precision:    0.00%; recall:    0.00%; F1:    0.00; predicted:     0

	      tvshow: precision:    0.00%; recall:    0