# Evaluate Model Performance on the Test Set 

In [1]:
import numpy as np
import tensorflow as tf
from dataProcessing import load_file, preProcessingIWSLT12, encode_data, insert_target
from transformers import BertTokenizer
from transformers import TFBertForMaskedLM
import sys

In [2]:
### Instantiate tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

In [3]:
### path to weights
checkpoint_path = "Models/20200508_085150/cp-010.ckpt"

In [4]:
### punctuation encoder
punctuation_enc = {
    'O': 0,
    'COMMA': 1,
    'PERIOD': 2,
    'QUESTION': 3
}

### Hyper-parameters

In [5]:
# n = 25000

vocab_size = 30522
segment_size = 16
batch_size = 2

### Get the dataset

In [6]:
# name of data with the sentences
data_name = "IWSLT12"
# testSet_01 = 'Data' + data_name + '/extractTest_01.txt'
testSet_01 = 'Data' + data_name + '/IWSLT12.TALK.train.en.txt.Test_01'

# from sentences to word + punctuation
data = load_file(preProcessingIWSLT12(testSet_01))

# encode data and insert target
X_, y_ = encode_data(data, tokenizer, punctuation_enc)
X = insert_target(X_, segment_size)
y = np.asarray(y_)

# one hot encode the labels
y = tf.one_hot(y, 4, dtype='int64').numpy()

# # get only a fraction of data
# X = X[0:n]
# y = y[0:n]

dataset = tf.data.Dataset.from_tensor_slices((X, y))
dataset = dataset.batch(batch_size)

In [7]:
X.shape

(93846, 16)

### Build the model

In [8]:
# build and compile model

bert_input = tf.keras.Input(shape=(segment_size), dtype='int32', name='bert_input')
x = TFBertForMaskedLM.from_pretrained('bert-base-uncased')(bert_input)[0]
x = tf.keras.layers.Reshape((segment_size*vocab_size,))(x)
dense_out = tf.keras.layers.Dense(4, activation='softmax')(x)

net = tf.keras.Model(bert_input, dense_out, name='network')

net.compile(optimizer='adam',
              loss=tf.losses.CategoricalCrossentropy(from_logits=False),
              metrics=[tf.keras.metrics.Recall(class_id=0, name='Rec_0'),
                       tf.keras.metrics.Precision(class_id=0, name='Prec_0'),
                       tf.keras.metrics.Recall(class_id=1, name='Rec_1'),
                       tf.keras.metrics.Precision(class_id=1, name='Prec_1'),
                       tf.keras.metrics.Recall(class_id=2, name='Rec_2'),
                       tf.keras.metrics.Precision(class_id=2, name='Prec_2'),
                       tf.keras.metrics.Recall(class_id=3, name='Rec_3'),
                       tf.keras.metrics.Precision(class_id=3, name='Prec_3'),
                      ])

In [9]:
# load the weights
net.load_weights(checkpoint_path)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f4772529070>

### Evaluate the model

In [10]:
# net.evaluate(dataset)

In [13]:
dir(net)

['_TF_MODULE_IGNORED_PROPERTIES',
 '__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_activity_regularizer',
 '_add_inbound_node',
 '_add_trackable',
 '_add_variable_with_custom_getter',
 '_assert_compile_was_called',
 '_assert_weights_created',
 '_attribute_sentinel',
 '_auto_track_sub_layers',
 '_autocast',
 '_base_init',
 '_build_input_shape',
 '_call_accepts_kwargs',
 '_call_arg_was_passed',
 '_call_fn_args',
 '_call_full_argspec',
 '_callable_losses',
 '_check_call_args',
 '_checkpoint_dependencies',
 '_clear_losses',
 '_collect_input_masks',
 '_compile_was_called',
 '_compiled_trainable_state',
 '_compute_dtype',
 '_co

In [20]:
net._trainable

True

In [18]:
import transformers

In [19]:
transformers.BertConfig()

BertConfig {
  "_num_labels": 2,
  "architectures": null,
  "attention_probs_dropout_prob": 0.1,
  "bad_words_ids": null,
  "bos_token_id": null,
  "decoder_start_token_id": null,
  "do_sample": false,
  "early_stopping": false,
  "eos_token_id": null,
  "finetuning_task": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "is_decoder": false,
  "is_encoder_decoder": false,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1
  },
  "layer_norm_eps": 1e-12,
  "length_penalty": 1.0,
  "max_length": 20,
  "max_position_embeddings": 512,
  "min_length": 0,
  "model_type": "bert",
  "no_repeat_ngram_size": 0,
  "num_attention_heads": 12,
  "num_beams": 1,
  "num_hidden_layers": 12,
  "num_return_sequences": 1,
  "output_attentions": false,
  "output_hidden_states": false,
  "output_past": true,
  "pad_token_id": 0,
  "prefix": null,
  "pruned_