In [1]:
from pathlib import Path
import json

import tensorflow as tf
from tensorflow_addons.metrics import F1Score
from transformers import TFGPT2Model, GPT2Tokenizer
from tensorflow.keras.losses import BinaryCrossentropy, CategoricalCrossentropy

from eval_utils.utils import BinaryF1Score

2021-07-18 21:31:17.819926: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


### Read dataset

In [2]:
def get_dataset(path_dataset: str, path_tokenizer: str, block_size: int, task: str):
    tokenizer = GPT2Tokenizer.from_pretrained(path_tokenizer)
    inputs = []
    labels = []

    with open(path_dataset, 'r') as input_files:
        data = json.load(input_files)

    for example in data:
        text = f'{example["title"]} {example["content"]}'
        tokens = tokenizer.encode(text)
        rating = int(example['starRating'])

        label = rating - 1
        label = tf.keras.utils.to_categorical(label, num_classes=5, dtype='int32') if task == 'multi' else int(
            label > 2)

        inputs.append(tokens)
        labels.append(label)

    inputs = tf.keras.preprocessing.sequence.pad_sequences(inputs, padding='post', truncating='post', maxlen=block_size)

    return tf.data.Dataset.from_tensor_slices((inputs, labels))

### Eval trained model on LaRoSeDa

In [3]:
def eval_laroseda(path_model: str, path_log: str, block_size: int, task: str):
    batch_size = 1
    path_dataset = '../../../dataset/laroseda/split'
    path_tokenizer = '../../../model/tokenizer'
    model = tf.keras.models.load_model(path_model)
    name_model = path_model.split('/')[-1].replace('.h5', '')

    model.compile(
        loss=BinaryCrossentropy() if task == 'binary' else CategoricalCrossentropy(),
        metrics=[BinaryF1Score() if task == 'binary' else F1Score(num_classes=5, average='macro'), 'accuracy']
    )

    ds_dev = get_dataset(f'{path_dataset}/dev.json', path_tokenizer, block_size, task).shuffle(10000) \
        .batch(batch_size, drop_remainder=True)
    ds_test = get_dataset(f'{path_dataset}/test.json', path_tokenizer, block_size, task).shuffle(10000) \
        .batch(batch_size, drop_remainder=True)

    Path(path_log).mkdir(parents=True, exist_ok=True)

    _, f1_score_dev, accuracy_dev = model.evaluate(ds_dev)
    _, f1_score_test, accuracy_test = model.evaluate(ds_test)

    with open(f'{path_log}{path_model.split("/")[-1].replace(".h5", "")}.txt', 'w+') as output_file:
        output_file.write(f'F1 Score for Dev: {f1_score_dev}\n')
        output_file.write(f'Accuracy for Dev: {accuracy_dev}\n')
        output_file.write(f'F1 Score for Test: {f1_score_test}\n')
        output_file.write(f'Accuracy for Test: {accuracy_test}\n')

    print("\nFor model version task:", name_model)
    print(f'F1 Score for Dev: {f1_score_dev}')
    print(f'Accuracy for Dev: {accuracy_dev}')
    print(f'F1 Score for Test: {f1_score_test}')
    print(f'Accuracy for Test: {accuracy_test}\n')

    del model
    del ds_dev, ds_test

## Run evaluation

In [6]:
block_size = 128
path_log = '../../../log/laroseda'
path_model = '../../../model/evaluation/laroseda'

### Base

In [5]:
version = 'base'

In [6]:
eval_laroseda(f'{path_model}/{version}/{version}-multi-class.h5', f'{path_log}/{version}/', block_size, 'multi')

2021-07-18 21:04:10.295925: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
2021-07-18 21:04:11.787538: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-07-18 21:04:11.788222: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2021-07-18 21:04:11.862367: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: GeForce RTX 2060 computeCapability: 7.5
coreClock: 1.2GHz coreCount: 30 deviceMemorySize: 5.79GiB deviceMemoryBandwidth: 312.97GiB/s
2021-07-18 21:04:11.862388: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2021-07-18 21:04:11.864406: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynami



2021-07-18 21:04:18.429195: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-07-18 21:04:18.443307: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2599990000 Hz
2021-07-18 21:04:18.821861: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11


   9/1202 [..............................] - ETA: 19s - loss: 1.4396 - f1_score: 0.1416 - accuracy: 0.2242          

2021-07-18 21:04:19.144380: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.11



For model version task: base-multi-class
F1 Score for Dev: 0.6714493632316589
Accuracy for Dev: 0.8918468952178955
F1 Score for Test: 0.6789112687110901
Accuracy for Test: 0.8973333239555359



In [8]:
eval_laroseda(f'{path_model}/{version}/{version}-binary.h5', f'{path_log}/{version}/', block_size, 'binary')


For model version task: base-binary
F1 Score for Dev: 0.9850745797157288
Accuracy for Dev: 0.9850249290466309
F1 Score for Test: 0.9791044592857361
Accuracy for Test: 0.9789999723434448



### Medium

In [9]:
version = 'medium'

In [10]:
eval_laroseda(f'{path_model}/{version}/{version}-multi-class.h5', f'{path_log}/{version}/', block_size, 'multi')


For model version task: medium-multi-class
F1 Score for Dev: 0.6824102997779846
Accuracy for Dev: 0.9059900045394897
F1 Score for Test: 0.6829710006713867
Accuracy for Test: 0.902999997138977



In [12]:
eval_laroseda(f'{path_model}/{version}/{version}-binary.h5', f'{path_log}/{version}/', block_size, 'binary')


For model version task: medium-binary
F1 Score for Dev: 0.9867329597473145
Accuracy for Dev: 0.9866888523101807
F1 Score for Test: 0.9804571270942688
Accuracy for Test: 0.9803333282470703



### Large

In [4]:
version = 'large'

In [14]:
eval_laroseda(f'{path_model}/{version}/{version}-multi-class.h5', f'{path_log}/{version}/', block_size, 'multi')


For model version task: large-multi-class
F1 Score for Dev: 0.6835607290267944
Accuracy for Dev: 0.9043261408805847
F1 Score for Test: 0.6791609525680542
Accuracy for Test: 0.9026666879653931



In [7]:
eval_laroseda(f'{path_model}/{version}/{version}-binary.h5', f'{path_log}/{version}/', block_size, 'binary')

2021-07-18 21:31:42.205345: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
2021-07-18 21:31:44.091573: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-07-18 21:31:44.096672: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2021-07-18 21:31:44.169503: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: GeForce RTX 2060 computeCapability: 7.5
coreClock: 1.2GHz coreCount: 30 deviceMemorySize: 5.79GiB deviceMemoryBandwidth: 312.97GiB/s
2021-07-18 21:31:44.169529: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2021-07-18 21:31:44.177289: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynami



2021-07-18 21:31:59.076908: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-07-18 21:31:59.149969: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2599990000 Hz
2021-07-18 21:32:00.472352: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11


   2/1202 [..............................] - ETA: 1:36 - loss: 3.0979e-06 - binary_f1_score: 0.0000e+00 - accuracy: 1.0000   

2021-07-18 21:32:00.756659: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.11



For model version task: large-binary
F1 Score for Dev: 0.9867110252380371
Accuracy for Dev: 0.9866888523101807
F1 Score for Test: 0.9807437062263489
Accuracy for Test: 0.9806666374206543

