#### Based on https://medium.com/swlh/painless-fine-tuning-of-bert-in-pytorch-b91c14912caa
https://github.com/aniruddhachoudhury/BERT-Tutorials/blob/master/Blog%202/BERT_Fine_Tuning_Sentence_Classification.ipynb

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import matplotlib.pyplot as plt
import torch.nn as nn
from os.path import join
import torch
from nlpClassifiers.data.dataset  import NLPDataset
from nlpClassifiers.models.models import BertSentenceFeaturesModel
#from torch.optim import AdamW, SGD
from torch.utils.data import DataLoader, SequentialSampler, RandomSampler
from transformers import BertForSequenceClassification, AdamW, BertConfig
from transformers import get_linear_schedule_with_warmup
from torch.nn import LayerNorm as BertLayerNorm
import numpy as np
import time
import logging
import datetime
import random
import pandas as pd
import argparse
import pickle as pk
import itertools
import os
import shutil
from pathlib import Path
import copy
import wandb
import re
from nlpClassifiers import settings
from scipy.special import expit
from sklearn.metrics import classification_report



In [3]:
def predict(
    model_path: Path,
    dataset: str,
    batch_size: int,
    labels_dict,
    device: torch.device
):

    print(f"====Loading dataset for testing")
    test_corpus = NLPDataset(dataset, "test", sentence_max_len, bert_path, labels_dict)
    test_dataloader = DataLoader(
        test_corpus,
        batch_size=batch_size,
        sampler = RandomSampler(test_corpus),
        pin_memory=True,
        num_workers=0,
        drop_last=True
    )

    print(f"====Loading model for testing")
    model = torch.load(join(model_path, "best-model.pth"))
    model.to(device)
    model.eval()
    pred_labels = []
    test_labels = []
    logits_list = []

    def _list_from_tensor(tensor):
        if tensor.numel() == 1:
            return [tensor.item()]
        return list(tensor.cpu().detach().numpy())

    print("====Testing model...")
    for batch in test_dataloader:
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_segment_ids, b_input_mask, b_labels = batch
        with torch.no_grad():
            loss, logits= model(b_input_ids,  b_segment_ids, b_labels)
            preds = np.argmax(logits.cpu(), axis=1) # Convert one-hot to index
            b_labels = b_labels.int()
            pred_labels.extend(_list_from_tensor(preds))
            test_labels.extend(_list_from_tensor(b_labels))
        logits_list.extend(_list_from_tensor(logits))

    print(classification_report(test_labels, pred_labels, labels=list(labels_dict.values()), target_names=np.array(list(labels_dict.keys())), digits=3))
    logits_list = expit(logits_list)

    del model
    torch.cuda.empty_cache()



In [4]:
def get_accuracy_from_logits(logits, labels):
    acc = (labels.cpu() == logits.cpu().argmax(-1)).float().detach().numpy()
    return float(100 * acc.sum() / len(acc))

In [5]:
DATA_PATH = '../../data/virtual-operator'
MODELS_PATH = '../../models/virtual-operator/bert-base-portuguese-tapt-classifier/'
PATH_TO_BERT = '../../models/virtual-operator/bertimbau-adaptive-base-finetuned/'
TRAIN_DATASET = os.path.join(DATA_PATH, 'train.csv')
VAL_DATASET = os.path.join(DATA_PATH, 'val.csv')
TEST_DATASET  = os.path.join(DATA_PATH, 'test.csv')
PATH_TO_VIRTUAL_OPERATOR_MODELS =  "../../models/virtual-operator"
PATH_TO_AGENT_BENCHMARK_MODELS = "../../models/agent-benchmark"
PATH_TO_ML_PT_MODELS = "../../models/mercado-livre-pt-only"

In [6]:
gpu = 0
dataset = 'agent-benchmark'
save_name = 'bert-features-classifier-cls-agent-benchmark'
bert_path = 'bert-base-cased'
batch_size = 16
sentence_max_len = 30

In [7]:
BASE_PATH_TO_MODELS = {"virtual-operator": PATH_TO_VIRTUAL_OPERATOR_MODELS, "agent-benchmark": PATH_TO_AGENT_BENCHMARK_MODELS, "mercado-livre-pt": PATH_TO_ML_PT_MODELS}
FULL_PATH_TO_MODELS = join(BASE_PATH_TO_MODELS[dataset], "bert-base-portuguese-tapt-classifier")

In [8]:
device = torch.device(f"cuda:{gpu}")


In [9]:
model_path = Path(
    FULL_PATH_TO_MODELS, 
    f"base-dataset-{dataset}-{save_name}"
)
last_saved_model = model_path

In [10]:
train_corpus = NLPDataset(dataset, "train", sentence_max_len, bert_path)
labels_dict = train_corpus.labels_dict

In [11]:
predict(last_saved_model, dataset, batch_size, labels_dict, device)

====Loading dataset for testing
====Loading model for testing
====Testing model...
                           precision    recall  f1-score   support

    calendar_notification      0.386     0.378     0.382        45
     transport_directions      0.538     0.512     0.525        41
           cooking_recipe      0.825     0.733     0.776        45
               radio_play      0.782     0.816     0.799       136
             lists_remove      0.870     0.827     0.848        81
               news_query      0.725     0.740     0.732       146
         cooking_question      0.580     0.644     0.611        45
           contacts_query      0.712     0.712     0.712        52
             general_joke      0.909     0.889     0.899        45
               audio_mute      0.737     0.737     0.737        38
            QA_open_query      0.421     0.429     0.425       119
          transport_train      0.824     0.884     0.853        95
         weather_question      0.544     0.78