# Evaluate Model Performance on the Test Set 

In [1]:
import numpy as np
import tensorflow as tf
from data import load_file, process_data, preProcessingScriber

from transformers import AutoTokenizer
from transformers import TFCamembertForMaskedLM

from datetime import datetime
import os
import json
import sys

In [2]:
checkpoint_path = "ModelsExp/20200428_180956/cp-004.ckpt"

### Hyper-parameters

In [3]:
n = 512

vocab_size = 32005
segment_size = 32
batch_size = 5
train_layer_ind = -2  # 0 for all model, -2 for only top layer
num_epochs = 2

hyperparameters = {
    'vocab_size': vocab_size,
    'segment_size': segment_size,
    'batch_size': batch_size
}

In [4]:
punctuation_enc = {
    'O': 0,
    'PERIOD': 1,
}

In [5]:
# # name of data with the sentences
# data_name = "IWSLT12"
# testSet_01 = 'Data' + data_name + '/extractTest_01.txt'

# preProcessingIWSLT12(testSet_01)

# data_test = load_file('./Data/testSet_02.txt')

# tokenizer = AutoTokenizer.from_pretrained("jplu/tf-camembert-base", do_lower_case=True)

# X_test, y_test = process_data(data_test, tokenizer, punctuation_enc, segment_size)
# y_test = np.asarray(y_test)

# X_test = X_test[0:n]
# y_test = y_test[0:n]

In [7]:
print('\nPRE-PROCESS AND PROCESS DATA')

punctuation_enc = {
    'O': 0,
    'PERIOD': 1,
}


# name of dataset with sentences
data_name = "Scriber"
trainSet_01 = 'Data' + data_name + '/' + 'extractTrain_01.txt'
validSet_01 = 'Data' + data_name + '/' + 'extractValid_01.txt'


# from sentences to list of words+punctuation
outTrain = preProcessingScriber(trainSet_01)
outValid = preProcessingScriber(validSet_01)

data_train = load_file(outTrain)
data_valid = load_file(outValid)


### instantiate the tokenizer
tokenizer = AutoTokenizer.from_pretrained("jplu/tf-camembert-base", do_lower_case=True)


X_train, y_train = process_data(data_train, tokenizer, punctuation_enc, segment_size)
y_train = np.asarray(y_train)
X_valid, y_valid = process_data(data_valid, tokenizer, punctuation_enc, segment_size)
y_valid = np.asarray(y_valid)


X_test = X_train[0:n]
y_test = y_train[0:n]


PRE-PROCESS AND PROCESS DATA


In [8]:
# one hot encode the labels
y_test = tf.one_hot(y_test, 4, dtype='int64').numpy()

### Build the dataset

In [9]:
dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
dataset = dataset.batch(batch_size)

### Build the model

In [10]:
# build and compile model

bert_input = tf.keras.Input(shape=(segment_size), dtype='int32', name='bert_input')
x = TFCamembertForMaskedLM.from_pretrained("jplu/tf-camembert-base")(bert_input)[0]
x = tf.keras.layers.Reshape((segment_size*vocab_size,))(x)
dense_out = tf.keras.layers.Dense(4, activation='softmax')(x)

net = tf.keras.Model(bert_input, dense_out, name='network')

net.compile(optimizer='adam',
              loss=tf.losses.CategoricalCrossentropy(from_logits=False),
              metrics=[tf.keras.metrics.Recall(class_id=0, name='Rec_0'),
                       tf.keras.metrics.Precision(class_id=0, name='Prec_0'),
                       tf.keras.metrics.Recall(class_id=1, name='Rec_1'),
                       tf.keras.metrics.Precision(class_id=1, name='Prec_1'),
                      ])

In [11]:
# load the weights
net.load_weights(checkpoint_path)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f31cc09d040>

### Evaluate the model

In [12]:
net.evaluate(dataset)



[0.10224173218011856,
 0.9958158731460571,
 0.99790358543396,
 0.970588207244873,
 0.9428571462631226]