#### Based on https://medium.com/swlh/painless-fine-tuning-of-bert-in-pytorch-b91c14912caa
https://github.com/aniruddhachoudhury/BERT-Tutorials/blob/master/Blog%202/BERT_Fine_Tuning_Sentence_Classification.ipynb

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import matplotlib.pyplot as plt
import torch.nn as nn
from os.path import join
import torch
from nlpClassifiers.data.dataset  import NLPDataset
#from torch.optim import AdamW, SGD
from torch.utils.data import DataLoader, SequentialSampler, RandomSampler
from transformers import BertForSequenceClassification, AdamW, BertConfig
from transformers import get_linear_schedule_with_warmup
from torch.nn import LayerNorm as BertLayerNorm
import numpy as np
import time
import logging
import datetime
import random
import pandas as pd
import argparse
import pickle as pk
import itertools
import os
import shutil
from pathlib import Path
import copy
import wandb
import re
from nlpClassifiers import settings
from scipy.special import expit
from sklearn.metrics import classification_report

In [4]:
def predict(
    model_path: Path,
    dataset: str,
    batch_size: int,
    labels_dict,
    device: torch.device
):
       
    print(f"====Loading dataset for testing")
    test_corpus = NLPDataset(dataset, "test", sentence_max_len, bert_path, labels_dict)
    test_dataloader = DataLoader(
        test_corpus,
        batch_size=batch_size,
        #sampler = RandomSampler(test_corpus),
        pin_memory=True,
        num_workers=0,
        drop_last=False
    )

    print(f"====Loading model for testing")
    model = BertForSequenceClassification.from_pretrained(
        model_path,
        num_labels = train_corpus.num_labels,
        output_attentions = False,
        output_hidden_states = True,
    )
    model.to(device)
    model.eval()
  #  cm = ConfusionMatrix([0,1])
    pred_labels = []
    test_labels = []
    logits_list = []

    def _list_from_tensor(tensor):
        if tensor.numel() == 1:
            return [tensor.item()]
        return list(tensor.cpu().detach().numpy())

    print("====Testing model...")
    for batch in test_dataloader:
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_segment_ids, b_input_mask, b_labels = batch
        with torch.no_grad():
            loss, logits, *_ =  model(b_input_ids, b_input_mask, token_type_ids=None, labels=b_labels)

            preds = np.argmax(logits.cpu(), axis=1) # Convert one-hot to index
            b_labels = b_labels.int()
            pred_labels.extend(_list_from_tensor(preds))
            test_labels.extend(_list_from_tensor(b_labels))
        logits_list.extend(_list_from_tensor(logits))
    print(classification_report(test_labels, pred_labels, labels=list(labels_dict.values()), target_names=np.array(list(labels_dict.keys())), digits=3))
    logits_list = expit(logits_list)
    del model
    torch.cuda.empty_cache()



In [5]:
DATA_PATH = '../../data/virtual-operator'
MODELS_PATH = '../../models/virtual-operator/bert-base-portuguese-tapt-classifier/'
PATH_TO_BERT = '../../models/virtual-operator/bertimbau-adaptive-base-finetuned/'
TRAIN_DATASET = os.path.join(DATA_PATH, 'train.csv')
VAL_DATASET = os.path.join(DATA_PATH, 'val.csv')
TEST_DATASET  = os.path.join(DATA_PATH, 'test.csv')
PATH_TO_VIRTUAL_OPERATOR_MODELS =  "../../models/virtual-operator"
PATH_TO_AGENT_BENCHMARK_MODELS = "../../models/agent-benchmark"
PATH_TO_ML_PT_MODELS = "../../models/mercado-livre-pt-only"

In [6]:
gpu = 0
dataset = 'agent-benchmark'
save_name = 'agent-benchmark-100-epochs-early-stop-reset-3-bert-base-cased'
bert_path = 'bert-base-cased'
batch_size = 16
sentence_max_len = 30

In [7]:
BASE_PATH_TO_MODELS = {"virtual-operator": PATH_TO_VIRTUAL_OPERATOR_MODELS, "agent-benchmark": PATH_TO_AGENT_BENCHMARK_MODELS, "mercado-livre-pt": PATH_TO_ML_PT_MODELS}
FULL_PATH_TO_MODELS = join(BASE_PATH_TO_MODELS[dataset], "bert-base-portuguese-tapt-classifier")

In [8]:
device = torch.device(f"cuda:{gpu}")


In [9]:
model_path = Path(
    FULL_PATH_TO_MODELS, 
    f"base-dataset-{dataset}-{save_name}"
)
last_saved_model = model_path

In [10]:
train_corpus = NLPDataset(dataset, "train", sentence_max_len, bert_path)
labels_dict = train_corpus.labels_dict

In [12]:
predict(last_saved_model, dataset, batch_size, labels_dict, device)

====Loading dataset for testing
====Loading model for testing
====Testing model...
                           precision    recall  f1-score   support

    calendar_notification      0.361     0.489     0.415        45
     transport_directions      0.760     0.463     0.576        41
           cooking_recipe      0.879     0.644     0.744        45
               radio_play      0.812     0.842     0.827       139
             lists_remove      0.943     0.815     0.874        81
               news_query      0.759     0.712     0.735       146
         cooking_question      0.556     0.667     0.606        45
           contacts_query      0.851     0.755     0.800        53
             general_joke      0.913     0.933     0.923        45
               audio_mute      0.750     0.789     0.769        38
            QA_open_query      0.530     0.442     0.482       120
          transport_train      0.786     0.968     0.868        95
         weather_question      0.673     0.77

In [13]:
torch.cuda.empty_cache()