In [1]:
import os
import pandas as pd
import pprint

from pathlib import Path
from transformers import pipeline

from src import sentiment, helper, utility, context
from src.config import *
from src.metrics import evaluate_pipe

os.environ["TOKENIZERS_PARALLELISM"] = "false"
helper.list_config()

jsonl = helper.read_jsonl_as_string(Path("miko.jsonl"))
train_ds, val_ds, test_ds, label2id, id2label = utility.load_split_dataset(jsonl)  # load again, un-mapped
texts = helper.to_list_str(test_ds[SharedConfig.TEXT_COL])
labels = list(test_ds[SharedConfig.LABEL_COL])

modelConfig = MBertConfig
require_translation = False

context = context.setup_pipeline(modelConfig, require_translation)

trainer = sentiment.train(context)
pipe = pipeline(
    "text-classification", 
    model=trainer.model, 
    tokenizer=trainer.tokenizer, 
    return_all_scores=True,
    device=AppConfig.DEVICE
)
metrics = evaluate_pipe(pipe, texts, labels, id2label=trainer.model.config.id2label)
helper.print_header("pipeline metrics")
pprint.pprint(metrics)

if AppConfig.INFER_SAMPLE:
    sample_texts = [
        "Maganda ang serbisyo at mabilis ang delivery!"
        "Sobrang pangit ng karanasan ko.",
        "It was okay, nothing special.",
    ]
    sentiment.infer(sample_texts, trainer.tokenizer, trainer.model)


  from .autonotebook import tqdm as notebook_tqdm
Map: 100%|█████████████████████████████████████████████████████████████████████████████| 69/69 [00:00<00:00, 9256.88 examples/s]
Map: 100%|█████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 4279.03 examples/s]
Map: 100%|█████████████████████████████████████████████████████████████████████████████| 21/21 [00:00<00:00, 8185.15 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  super().__init__(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro,Precision Macro,Recall Macro
1,No log,1.079973,0.4,0.292929,0.291667,0.416667



### EVALUATION METRICS ###
{'epoch': 1.0,
 'eval_accuracy': 0.3333333333333333,
 'eval_f1_macro': 0.25555555555555554,
 'eval_loss': 1.0849825143814087,
 'eval_precision_macro': 0.23015873015873015,
 'eval_recall_macro': 0.35185185185185186,
 'eval_runtime': 0.1258,
 'eval_samples_per_second': 166.918,
 'eval_steps_per_second': 7.948}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Device set to use cuda:0



### PIPELINE METRICS ###
{'accuracy': 0.3333333333333333,
 'f1_macro': 0.25555555555555554,
 'report': '              precision    recall  f1-score   support\n'
           '\n'
           '           0      0.000     0.000     0.000         6\n'
           '           1      0.357     0.833     0.500         6\n'
           '           2      0.333     0.222     0.267         9\n'
           '\n'
           '    accuracy                          0.333        21\n'
           '   macro avg      0.230     0.352     0.256        21\n'
           'weighted avg      0.245     0.333     0.257        21\n'}


