# Evaluate Model Performance on the Test Set 

In [1]:
import numpy as np
import tensorflow as tf
from dataProcessing import load_file, preProcessingScriber, encode_data, insert_target
from transformers import AutoTokenizer
from transformers import TFCamembertForMaskedLM
from datetime import datetime
import os
import json
import sys

In [2]:
### instantiate the tokenizer
tokenizer = AutoTokenizer.from_pretrained("jplu/tf-camembert-base", do_lower_case=True)

In [3]:
### path to weights
checkpointPath = "Models/20200510_101117/cp-010.ckpt"

In [4]:
### puntuation encoder
punctuation_enc = {
    'O': 0,
    'PERIOD': 1,
}

## Hyper-parameters

In [5]:
# n = 9600

vocab_size = 32005
segment_size = 32
batch_size = 32

## Get Dataset

In [6]:
# name of dataset with sentences
data_name = "Scriber"
# fileName = 'Data' + data_name + '/' + 'extractTrain_01.txt'
# fileName = 'Data' + data_name + '/' + 'extractValid_01.txt'
# fileName = 'Data' + data_name + '/' + 'raw.processed.Train_01.txt'
fileName = 'Data' + data_name + '/' + 'raw.processed.Test_01.txt'

# from sentences to list of words+punctuation
data = load_file(preProcessingScriber(fileName))

# encode and insert target
X_, y_ = encode_data(data, tokenizer, punctuation_enc)
X = insert_target(X_, segment_size)
y = np.asarray(y_)

# get only an n of the data.
print(X.shape)
# X = X[0:n]
# y = y[0:n]
# print(X.shape)

# one hot encode the labels
y = tf.one_hot(y, 4, dtype='int64').numpy()

dataset = tf.data.Dataset.from_tensor_slices((X, y)).batch(batch_size)

(21009, 32)


## Build the model

In [7]:
# build and compile model

bert_input = tf.keras.Input(shape=(segment_size), dtype='int32', name='bert_input')
x = TFCamembertForMaskedLM.from_pretrained("jplu/tf-camembert-base")(bert_input)[0]
x = tf.keras.layers.Reshape((segment_size*vocab_size,))(x)
dense_out = tf.keras.layers.Dense(4, activation='softmax')(x)

net = tf.keras.Model(bert_input, dense_out, name='network')

net.compile(optimizer='adam',
              loss=tf.losses.CategoricalCrossentropy(from_logits=False),
              metrics=[tf.keras.metrics.Recall(class_id=0, name='Rec_0'),
                       tf.keras.metrics.Precision(class_id=0, name='Prec_0'),
                       tf.keras.metrics.Recall(class_id=1, name='Rec_1'),
                       tf.keras.metrics.Precision(class_id=1, name='Prec_1'),
                      ])

In [8]:
# load the weights
net.load_weights(checkpointPath)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fd3af658e20>

## Evaluate the model

In [9]:
evaluation = net.evaluate(dataset)



In [20]:
recall = 0.5888; precision = 0.5705
2* (precision*recall) / (precision+recall)

0.579505563702234

In [10]:
# ### model 005
# # 657/657 [==============================] - 1380s 2s/step - loss: 0.2349 - Rec_0: 0.9792 - Prec_0: 0.9597 - Rec_1: 0.4778 - Prec_1: 0.6432
# recall = 0.4778; precision = 0.6432
# 2* (precision*recall) / (precision+recall)

0.5482978768956288

In [11]:
y.shape

(21009, 4)

In [12]:
test = y[0:5]
print(test)

[[1 0 0 0]
 [1 0 0 0]
 [1 0 0 0]
 [1 0 0 0]
 [0 1 0 0]]


In [13]:
np.where(test==1)

(array([0, 1, 2, 3, 4]), array([0, 0, 0, 0, 1]))

In [14]:
indTup = np.where(y==1)
print(indTup)

(array([    0,     1,     2, ..., 21006, 21007, 21008]), array([0, 0, 0, ..., 0, 0, 0]))


In [15]:
ind = indTup[1]
print(ind)

[0 0 0 ... 0 0 0]


In [16]:
ind.shape

(21009,)

In [17]:
np.sum(ind)

1532

In [18]:
# percentage of words followed by punctuation
709/9600

0.07385416666666667

In [19]:
.5 * 709/9600

0.03692708333333333