# Evaluation

Contents:
1. <a href="#imports">Imports & installs</a>
2. <a href="#configs">Configs</a>
3. <a href="#preproc">Preproc</a>
4. <a href="#results">Results</a>
5. <a href="#cm">Confusion Matrix</a>
5. <a href="#pvo">Pytorch vs ONNX</a>

<div id="imports"></div>

##### Imports

In [2]:
!pip install datasets

Installing collected packages: dill, multiprocess, datasets
Successfully installed datasets-2.18.0 dill-0.3.8 multiprocess-0.70.16


In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm, trange
import os, sys
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertConfig
import ast

from keras.preprocessing.sequence import pad_sequences
# from sklearn.model_selection import train_test_split
import random
# Load model directly
import transformers, datasets
from transformers import AutoTokenizer,AutoModelForTokenClassification
# import onnxruntime
torch.__version__




'2.2.1'

<div id="configs"></div>

#### Configs

In [None]:
selected_tag_names = ['B-DebtInstrumentBasisSpreadOnVariableRate1',
 'B-DebtInstrumentFaceAmount',
 'B-DebtInstrumentInterestRateStatedPercentage',
 'B-DebtInstrumentMaturityDate',
 'B-LineOfCreditFacilityMaximumBorrowingCapacity',
 'I-DebtInstrumentBasisSpreadOnVariableRate1',
 'I-DebtInstrumentFaceAmount',
 'I-DebtInstrumentInterestRateStatedPercentage',
 'I-DebtInstrumentMaturityDate',
 'I-LineOfCreditFacilityMaximumBorrowingCapacity',
 'O',
 'PAD']

In [2]:
# model_path = "models/bert_subsampled_model_v1"

In [3]:
dataset = datasets.load_dataset("nlpaueb/finer-139")

ner_tags = dataset["train"].features["ner_tags"].feature.names

selected_tag_ids = [34, 37, 38, 41, 42, 43, 87, 178, 199, 204, 0]


test_shuffled = dataset["test"].shuffle(seed=42)

test_sample = dataset["test"]

test_df = pd.DataFrame(test_sample)

test_df

Unnamed: 0,id,tokens,ner_tags
0,1012878,"[The, changes, in, the, fair, value, of, the, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,1012879,"[Fair, Values, Financial, Assets, and, Financi...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,1012880,"[23, Table, of, Contents, AMERICAN, EXPRESS, C...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,1012881,"[The, fair, values, of, these, financial, inst...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,1012882,"[(, b, ), Level, 1, amounts, reflect, interest...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...
108373,1121251,"[Card, Member, receivables, reserve, for, cred...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
108374,1121252,"[As, of, March, 31, ,, 2020, ,, these, derivat...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
108375,1121253,"[Based, on, Credco, ’, s, assessment, of, the,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
108376,1121254,"[Credco, has, no, derivative, amounts, subject...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [4]:
test_df_org = pd.DataFrame(test_sample)

test_df_org

Unnamed: 0,id,tokens,ner_tags
0,1012878,"[The, changes, in, the, fair, value, of, the, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,1012879,"[Fair, Values, Financial, Assets, and, Financi...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,1012880,"[23, Table, of, Contents, AMERICAN, EXPRESS, C...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,1012881,"[The, fair, values, of, these, financial, inst...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,1012882,"[(, b, ), Level, 1, amounts, reflect, interest...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...
108373,1121251,"[Card, Member, receivables, reserve, for, cred...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
108374,1121252,"[As, of, March, 31, ,, 2020, ,, these, derivat...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
108375,1121253,"[Based, on, Credco, ’, s, assessment, of, the,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
108376,1121254,"[Credco, has, no, derivative, amounts, subject...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [6]:
# selected_tag_names = [ner_tags[x] for x in selected_tag_ids]

# selected_tag_names += ["PAD"]

selected_tag_names

['B-DebtInstrumentBasisSpreadOnVariableRate1',
 'B-DebtInstrumentFaceAmount',
 'B-DebtInstrumentInterestRateStatedPercentage',
 'B-DebtInstrumentMaturityDate',
 'B-LineOfCreditFacilityMaximumBorrowingCapacity',
 'I-DebtInstrumentBasisSpreadOnVariableRate1',
 'I-DebtInstrumentFaceAmount',
 'I-DebtInstrumentInterestRateStatedPercentage',
 'I-DebtInstrumentMaturityDate',
 'I-LineOfCreditFacilityMaximumBorrowingCapacity',
 'O',
 'PAD']

<div id="preproc"></div>

### Preproc

In [7]:
import re
def expand_tags(tokenizer, token_list, tag_list):

    final_tags = []

    counter = 0

    for t, l in zip(token_list, tag_list):

        t = re.sub(r'[^\x00-\x7F]+', '', t)
        if t == "":
            continue
        counter +=1


#         print(counter, l, t, counter)

        temp_counts = 0

        for toks in tokenizer.tokenize(t):
#             print(t, toks)


#             print(toks, temp_counts)
            if toks.startswith("##"):
                continue
            else:
                temp_counts +=1
                if temp_counts > 1:
                    if l != "O":
                        l = l.replace("B-", "I-")
#                 print(counter, "append", l, counter)

                final_tags.append(l)
    return final_tags

def expand_toks(tokenizer, token_list):

    final_toks = []
    for i, t in enumerate(token_list):
        t = re.sub(r'[^\x00-\x7F]+', '', t)
        if t == "":
            continue

        final_toks += combine_subtoks(tokenizer.tokenize(t))

    return final_toks


def combine_subtoks(toks):

    comb =[]

    curr = []
    for i,t in enumerate(toks):
#         print(i, curr)

        if t.startswith("##"):
            t = t[2:]
            curr.append(t)
        else:
            if len(curr) > 0:
                comb.append("".join(curr))
            curr= [t]
#             if i == len(toks)-1:
#                 print(i, comb)
    comb.append("".join(curr))



    return comb

from transformers import AutoTokenizer
import time


In [8]:
test_df["ner_tags"] = test_df["ner_tags"].apply(lambda x: [y if y in selected_tag_ids else 0 for y in x])
test_df["ner_tags"] = test_df["ner_tags"].apply(lambda x: [ner_tags[y] for y in x])

test_df

Unnamed: 0,id,tokens,ner_tags
0,1012878,"[The, changes, in, the, fair, value, of, the, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
1,1012879,"[Fair, Values, Financial, Assets, and, Financi...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
2,1012880,"[23, Table, of, Contents, AMERICAN, EXPRESS, C...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
3,1012881,"[The, fair, values, of, these, financial, inst...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
4,1012882,"[(, b, ), Level, 1, amounts, reflect, interest...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
...,...,...,...
108373,1121251,"[Card, Member, receivables, reserve, for, cred...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
108374,1121252,"[As, of, March, 31, ,, 2020, ,, these, derivat...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
108375,1121253,"[Based, on, Credco, ’, s, assessment, of, the,...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
108376,1121254,"[Credco, has, no, derivative, amounts, subject...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."


In [9]:
tokenizer = AutoTokenizer.from_pretrained("HariLuru/finer_distillbert_v2")

In [10]:

st = time.time()

test_df["toks_exp"] = test_df["tokens"].apply(lambda x: expand_toks(tokenizer, x))

et = time.time()

print("s", et-st)

test_df["ner_expanded"] = test_df.apply(lambda x: expand_tags(
    tokenizer, x["tokens"], x["ner_tags"]), axis=1)

et2 = time.time()

print("s2", et2-et)

s 211.25368547439575
s2 222.3210048675537


In [11]:
test_df_org["ner_tags_org"] = test_df_org["ner_tags"].apply(lambda x: [ner_tags[y] for y in x])

test_df_org["toks_exp"] = test_df_org["tokens"].apply(lambda x: expand_toks(tokenizer, x))

test_df_org["ner_expanded_org"] = test_df_org.apply(lambda x: expand_tags(
    tokenizer, x["tokens"], x["ner_tags_org"]), axis=1)

In [12]:
test_df.loc[test_df["ner_expanded"].apply(lambda x: len) != test_df["toks_exp"].apply(lambda x: len)]

Unnamed: 0,id,tokens,ner_tags,toks_exp,ner_expanded


In [13]:
test_df_org.loc[test_df_org["ner_expanded_org"].apply(lambda x: len) != test_df_org["toks_exp"].apply(lambda x: len)]

Unnamed: 0,id,tokens,ner_tags,ner_tags_org,toks_exp,ner_expanded_org


In [14]:
def load_model(path, mtype="pytorch"):

    if mtype=="pytorch":

        model = AutoModelForTokenClassification.from_pretrained(
            path
        )

        return model


    elif mtype=="onnx":

        session = onnxruntime.InferenceSession(path)

        return session

In [15]:
# !pip install seqeval

In [16]:
from transformers import AutoModelForTokenClassification, AutoTokenizer
from seqeval.metrics import precision_score, recall_score, f1_score, classification_report
import torch
from collections import defaultdict
import copy

# # Example setup (replace these with your model and tokenizer)
# model_name = "your_model_name_here"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForTokenClassification.from_pretrained(model_name)

# Function to align tokens and labels

def predict(model,input_ids,attention_mask, mtype="pytorch"):

    if mtype=="pytorch":


        with torch.no_grad():

            outputs = model(input_ids, attention_mask=attention_mask)

            preds = torch.argmax(outputs.logits, axis=2)

            preds = [output[mask == 1] for output, mask in zip(preds, attention_mask)]

#             print(outputs.logits, outputs.logits.shape)

    elif mtype=="onnx":
        # Prepare the input as a dictionary

        inputs_onnx = {model.get_inputs()[0].name: input_ids.numpy()}

        # Run inference
        outputs = model.run(None, inputs_onnx)

#         print(len(outputs[0]), "lennn")



        preds = np.argmax(outputs[0], axis=2)

        preds = [output[mask == 1] for output, mask in zip(preds, attention_mask)]

    return preds



def pred_pipeline(text, model,mtype="pytorch", bs=100, device="cpu"):

#     print(tokenizer.encode_plus(text))

    all_outs = []
    
    latency = []

    with torch.no_grad():
        for bn in range(0,len(text), bs):

            max_len = min(200,max([len(x.split()) for x in text[bn:bn+bs]]))
            
            st = time.time()

            tok_enc = tokenizer.batch_encode_plus(text[bn:bn+bs], padding='longest', return_tensors="pt", truncation=True,max_length=500)

            #     print(tok_enc)

            bids = tok_enc["input_ids"].to(device)

            bams = tok_enc["attention_mask"].to(device)

            print(bn, max_len, bids.shape)

            # bids = input_ids

            # bams = attention_mask[bn:bn+bs]

#             print(bams.shape, bids.shape, bn, bn+bs)

            outputs = predict(model, bids,bams, mtype=mtype)
    
            et = time.time()
        
            latency.append(et-st)

        # After getting all_outs


#             print(outputs.logits.shape)

            all_outs.extend(outputs)
    
    print("latencies")
    print(pd.Series(latency).quantile([0.5, 0.75, 0.9, 0.99]))

    print("preds done")

#     logits = torch.cat(all_outs, dim=0)

#     print(logits.shape)

#     preds = torch.argmax(logits, axis=2)


    return all_outs


def align_predictions_with_original_words(tokenizer, text, sub_token_predictions):
    word_level_predictions = []

    for sentence, predictions in zip(text, sub_token_predictions):

        tokenized_sentence = tokenizer.tokenize(sentence)
        aligned_labels = []
        current_word = None

        for token, label_idx in zip(tokenized_sentence, predictions[1:]): # to handle [CLS]
            if token.startswith("##"):
                continue  # Skip sub-tokens
            else:
                # if current_word is not None:
                  aligned_labels.append(label_idx)
                  current_word = token
        word_level_predictions.append(aligned_labels)
    return word_level_predictions

def expand_tags(tokenizer, token_list, tag_list):

    final_tags = []

    for t, l in zip(token_list, tag_list):

        for toks in tokenizer.tokenize(t):

            temp_counts = 0
#             print(toks, temp_counts)
            if toks.startswith("##"):
                continue
            else:
                temp_counts +=1
                final_tags.append(l)
    return final_tags

def pad_preds_if_necessary(pred, actual):

    if len(pred) < len(actual):
        pred += ["O"]*(len(actual)-len(pred))

    return pred

def proc_predictions(outputs, tokenizer, text_list, test_df, colname):

    preds_aligned = align_predictions_with_original_words(tokenizer, text_list, outputs)

#     print([len(x) for x in preds_aligned])

#     print(preds_aligned)

    test_df[colname] = preds_aligned

    test_df[colname + "2"] = test_df[colname].apply(lambda x: [selected_tag_names[y] for y in x if y != 11])

    test_df[colname + "_exp"] = test_df.apply(lambda x: pad_preds_if_necessary(x[colname + "2"], x["ner_expanded"]), axis=1)

    return test_df



def extract_tagged(toks, tags):

    ext = defaultdict(lambda :[])

    for tok, tag in zip(toks, tags):
        if tag!="O":
            ext[tag].append(tok)

    return ext

# aa


In [17]:
mtype = "pytorch"
import time


# print(classification_report(val_df["ner_expanded"].tolist(), val_df["pyt_preds_exp"].tolist()))

val = test_df

# text = "The form and terms of the 3.650 % Senior Notes were established pursuant to an Officer ’ s Certificate , dated as of January 12 , 2017 , supplementing the Indenture"


text = val["toks_exp"].apply(lambda x: " ".join(x)).tolist()

model_path = "HariLuru/finer_distillbert_v2"
mtype = "pytorch"
# model_path = "models/fine_distillbert.onnx"
# mtype="onnx"
model = load_model(model_path, mtype=mtype)

# model_path = "models/fine_distillbert.onnx"
device="cuda"
model.to(device)
# model = load_model(model_path, mtype=mtype)
import time

model.eval()

st = time.time()
pyt_output = pred_pipeline(text, model, mtype=mtype, bs=50, device=device)
e1 = time.time()
val_df = proc_predictions(pyt_output, tokenizer, text, val, "pyt_preds")
e2=time.time()

print(e1-st, e2-e1)

0 195 torch.Size([50, 212])
50 96 torch.Size([50, 118])
100 113 torch.Size([50, 127])
150 98 torch.Size([50, 103])
200 75 torch.Size([50, 86])
250 98 torch.Size([50, 113])
300 149 torch.Size([50, 172])
350 96 torch.Size([50, 105])
400 200 torch.Size([50, 272])
450 200 torch.Size([50, 296])
500 200 torch.Size([50, 262])
550 137 torch.Size([50, 155])
600 118 torch.Size([50, 127])
650 200 torch.Size([50, 500])
700 200 torch.Size([50, 415])
750 128 torch.Size([50, 144])
800 200 torch.Size([50, 244])
850 171 torch.Size([50, 188])
900 200 torch.Size([50, 245])
950 200 torch.Size([50, 268])
1000 117 torch.Size([50, 122])
1050 182 torch.Size([50, 198])
1100 200 torch.Size([50, 221])
1150 186 torch.Size([50, 211])
1200 150 torch.Size([50, 170])
1250 200 torch.Size([50, 319])
1300 103 torch.Size([50, 119])
1350 178 torch.Size([50, 188])
1400 200 torch.Size([50, 267])
1450 169 torch.Size([50, 189])
1500 131 torch.Size([50, 142])
1550 119 torch.Size([50, 125])
1600 140 torch.Size([50, 149])
1650 1

13300 178 torch.Size([50, 197])
13350 84 torch.Size([50, 87])
13400 165 torch.Size([50, 180])
13450 200 torch.Size([50, 250])
13500 182 torch.Size([50, 199])
13550 200 torch.Size([50, 248])
13600 94 torch.Size([50, 102])
13650 200 torch.Size([50, 500])
13700 200 torch.Size([50, 451])
13750 200 torch.Size([50, 249])
13800 173 torch.Size([50, 196])
13850 127 torch.Size([50, 149])
13900 125 torch.Size([50, 137])
13950 168 torch.Size([50, 188])
14000 151 torch.Size([50, 159])
14050 91 torch.Size([50, 96])
14100 99 torch.Size([50, 108])
14150 89 torch.Size([50, 100])
14200 87 torch.Size([50, 96])
14250 152 torch.Size([50, 174])
14300 200 torch.Size([50, 223])
14350 84 torch.Size([50, 104])
14400 178 torch.Size([50, 194])
14450 175 torch.Size([50, 182])
14500 200 torch.Size([50, 231])
14550 114 torch.Size([50, 131])
14600 156 torch.Size([50, 175])
14650 145 torch.Size([50, 160])
14700 78 torch.Size([50, 85])
14750 72 torch.Size([50, 80])
14800 145 torch.Size([50, 167])
14850 146 torch.Size([

26250 115 torch.Size([50, 129])
26300 148 torch.Size([50, 155])
26350 91 torch.Size([50, 103])
26400 200 torch.Size([50, 293])
26450 124 torch.Size([50, 147])
26500 108 torch.Size([50, 112])
26550 200 torch.Size([50, 227])
26600 96 torch.Size([50, 107])
26650 171 torch.Size([50, 179])
26700 190 torch.Size([50, 210])
26750 143 torch.Size([50, 155])
26800 200 torch.Size([50, 500])
26850 200 torch.Size([50, 500])
26900 200 torch.Size([50, 500])
26950 110 torch.Size([50, 128])
27000 136 torch.Size([50, 146])
27050 110 torch.Size([50, 125])
27100 200 torch.Size([50, 292])
27150 95 torch.Size([50, 103])
27200 180 torch.Size([50, 206])
27250 121 torch.Size([50, 127])
27300 122 torch.Size([50, 124])
27350 122 torch.Size([50, 134])
27400 144 torch.Size([50, 161])
27450 126 torch.Size([50, 135])
27500 85 torch.Size([50, 98])
27550 200 torch.Size([50, 232])
27600 200 torch.Size([50, 440])
27650 193 torch.Size([50, 212])
27700 200 torch.Size([50, 328])
27750 111 torch.Size([50, 122])
27800 141 tor

39150 200 torch.Size([50, 259])
39200 164 torch.Size([50, 176])
39250 200 torch.Size([50, 253])
39300 89 torch.Size([50, 97])
39350 121 torch.Size([50, 126])
39400 200 torch.Size([50, 212])
39450 200 torch.Size([50, 304])
39500 200 torch.Size([50, 260])
39550 166 torch.Size([50, 183])
39600 200 torch.Size([50, 500])
39650 184 torch.Size([50, 202])
39700 144 torch.Size([50, 155])
39750 180 torch.Size([50, 194])
39800 200 torch.Size([50, 245])
39850 200 torch.Size([50, 280])
39900 160 torch.Size([50, 178])
39950 133 torch.Size([50, 150])
40000 118 torch.Size([50, 123])
40050 158 torch.Size([50, 169])
40100 118 torch.Size([50, 129])
40150 156 torch.Size([50, 166])
40200 200 torch.Size([50, 500])
40250 200 torch.Size([50, 488])
40300 200 torch.Size([50, 256])
40350 153 torch.Size([50, 175])
40400 200 torch.Size([50, 315])
40450 200 torch.Size([50, 367])
40500 102 torch.Size([50, 118])
40550 177 torch.Size([50, 212])
40600 106 torch.Size([50, 117])
40650 156 torch.Size([50, 172])
40700 85 t

52000 199 torch.Size([50, 209])
52050 127 torch.Size([50, 143])
52100 200 torch.Size([50, 282])
52150 108 torch.Size([50, 116])
52200 129 torch.Size([50, 144])
52250 132 torch.Size([50, 136])
52300 184 torch.Size([50, 195])
52350 154 torch.Size([50, 163])
52400 200 torch.Size([50, 255])
52450 200 torch.Size([50, 250])
52500 200 torch.Size([50, 324])
52550 170 torch.Size([50, 182])
52600 191 torch.Size([50, 223])
52650 112 torch.Size([50, 116])
52700 132 torch.Size([50, 140])
52750 188 torch.Size([50, 203])
52800 144 torch.Size([50, 163])
52850 132 torch.Size([50, 137])
52900 200 torch.Size([50, 217])
52950 200 torch.Size([50, 441])
53000 146 torch.Size([50, 158])
53050 175 torch.Size([50, 199])
53100 144 torch.Size([50, 164])
53150 200 torch.Size([50, 500])
53200 200 torch.Size([50, 391])
53250 140 torch.Size([50, 172])
53300 106 torch.Size([50, 115])
53350 200 torch.Size([50, 244])
53400 89 torch.Size([50, 94])
53450 181 torch.Size([50, 187])
53500 96 torch.Size([50, 100])
53550 200 t

64900 123 torch.Size([50, 138])
64950 200 torch.Size([50, 500])
65000 129 torch.Size([50, 137])
65050 200 torch.Size([50, 271])
65100 176 torch.Size([50, 192])
65150 146 torch.Size([50, 179])
65200 164 torch.Size([50, 182])
65250 200 torch.Size([50, 292])
65300 200 torch.Size([50, 427])
65350 132 torch.Size([50, 145])
65400 200 torch.Size([50, 242])
65450 158 torch.Size([50, 196])
65500 151 torch.Size([50, 164])
65550 200 torch.Size([50, 500])
65600 149 torch.Size([50, 165])
65650 174 torch.Size([50, 192])
65700 147 torch.Size([50, 165])
65750 140 torch.Size([50, 206])
65800 142 torch.Size([50, 166])
65850 200 torch.Size([50, 214])
65900 200 torch.Size([50, 244])
65950 118 torch.Size([50, 125])
66000 139 torch.Size([50, 159])
66050 129 torch.Size([50, 145])
66100 137 torch.Size([50, 146])
66150 196 torch.Size([50, 214])
66200 200 torch.Size([50, 364])
66250 102 torch.Size([50, 107])
66300 90 torch.Size([50, 93])
66350 59 torch.Size([50, 63])
66400 91 torch.Size([50, 104])
66450 109 tor

77850 130 torch.Size([50, 143])
77900 171 torch.Size([50, 191])
77950 156 torch.Size([50, 177])
78000 200 torch.Size([50, 500])
78050 161 torch.Size([50, 185])
78100 92 torch.Size([50, 98])
78150 172 torch.Size([50, 188])
78200 82 torch.Size([50, 87])
78250 62 torch.Size([50, 69])
78300 155 torch.Size([50, 178])
78350 146 torch.Size([50, 166])
78400 83 torch.Size([50, 90])
78450 200 torch.Size([50, 500])
78500 161 torch.Size([50, 185])
78550 79 torch.Size([50, 85])
78600 172 torch.Size([50, 188])
78650 82 torch.Size([50, 87])
78700 62 torch.Size([50, 69])
78750 155 torch.Size([50, 178])
78800 146 torch.Size([50, 166])
78850 83 torch.Size([50, 90])
78900 137 torch.Size([50, 150])
78950 188 torch.Size([50, 214])
79000 189 torch.Size([50, 209])
79050 200 torch.Size([50, 229])
79100 200 torch.Size([50, 225])
79150 162 torch.Size([50, 186])
79200 200 torch.Size([50, 321])
79250 150 torch.Size([50, 169])
79300 153 torch.Size([50, 172])
79350 125 torch.Size([50, 159])
79400 102 torch.Size([50

90750 200 torch.Size([50, 333])
90800 165 torch.Size([50, 178])
90850 200 torch.Size([50, 283])
90900 190 torch.Size([50, 212])
90950 200 torch.Size([50, 238])
91000 106 torch.Size([50, 110])
91050 144 torch.Size([50, 147])
91100 129 torch.Size([50, 137])
91150 136 torch.Size([50, 140])
91200 187 torch.Size([50, 193])
91250 67 torch.Size([50, 74])
91300 132 torch.Size([50, 146])
91350 200 torch.Size([50, 229])
91400 200 torch.Size([50, 421])
91450 200 torch.Size([50, 262])
91500 168 torch.Size([50, 182])
91550 200 torch.Size([50, 277])
91600 117 torch.Size([50, 135])
91650 96 torch.Size([50, 105])
91700 88 torch.Size([50, 99])
91750 169 torch.Size([50, 179])
91800 200 torch.Size([50, 333])
91850 140 torch.Size([50, 149])
91900 181 torch.Size([50, 195])
91950 200 torch.Size([50, 238])
92000 200 torch.Size([50, 225])
92050 90 torch.Size([50, 106])
92100 102 torch.Size([50, 107])
92150 200 torch.Size([50, 316])
92200 200 torch.Size([50, 213])
92250 200 torch.Size([50, 267])
92300 86 torch

103550 200 torch.Size([50, 342])
103600 181 torch.Size([50, 192])
103650 200 torch.Size([50, 253])
103700 114 torch.Size([50, 133])
103750 134 torch.Size([50, 146])
103800 147 torch.Size([50, 159])
103850 117 torch.Size([50, 128])
103900 124 torch.Size([50, 139])
103950 200 torch.Size([50, 292])
104000 137 torch.Size([50, 144])
104050 125 torch.Size([50, 133])
104100 93 torch.Size([50, 99])
104150 200 torch.Size([50, 219])
104200 143 torch.Size([50, 151])
104250 102 torch.Size([50, 111])
104300 147 torch.Size([50, 181])
104350 99 torch.Size([50, 103])
104400 200 torch.Size([50, 226])
104450 110 torch.Size([50, 113])
104500 126 torch.Size([50, 141])
104550 176 torch.Size([50, 199])
104600 184 torch.Size([50, 191])
104650 147 torch.Size([50, 158])
104700 192 torch.Size([50, 223])
104750 141 torch.Size([50, 152])
104800 200 torch.Size([50, 268])
104850 149 torch.Size([50, 163])
104900 91 torch.Size([50, 100])
104950 200 torch.Size([50, 500])
105000 200 torch.Size([50, 300])
105050 200 tor

In [154]:
model_path = "HariLuru/finer_distillbert_v2"
mtype = "pytorch"
# model_path = "models/fine_distillbert.onnx"
# mtype="onnx"
model = load_model(model_path, mtype=mtype)

# model_path = "models/fine_distillbert.onnx"
device="cpu"
model.to(device)
# model = load_model(model_path, mtype=mtype)
import time

model.eval()

st = time.time()
pyt_output = pred_pipeline(text, model, mtype=mtype, bs=50, device=device)
e1 = time.time()
val_df = proc_predictions(pyt_output, tokenizer, text, val, "pyt_preds")
e2=time.time()

print(e1-st, e2-e1)

0 195 torch.Size([50, 212])
50 96 torch.Size([50, 118])
100 113 torch.Size([50, 127])
150 98 torch.Size([50, 103])
200 75 torch.Size([50, 86])
250 98 torch.Size([50, 113])
300 149 torch.Size([50, 172])
350 96 torch.Size([50, 105])
400 200 torch.Size([50, 272])
450 200 torch.Size([50, 296])
500 200 torch.Size([50, 262])
550 137 torch.Size([50, 155])
600 118 torch.Size([50, 127])
650 200 torch.Size([50, 500])
700 200 torch.Size([50, 415])
750 128 torch.Size([50, 144])
800 200 torch.Size([50, 244])
850 171 torch.Size([50, 188])
900 200 torch.Size([50, 245])
950 200 torch.Size([50, 268])
latencies
0.50    10.935973
0.75    17.536577
0.90    21.377936
0.99    40.282069
dtype: float64
preds done
277.63116574287415 3.421466827392578


<div id="results"></div>

### Results

In [18]:
print(classification_report(val_df["ner_expanded"].tolist(), val_df["pyt_preds_exp"].tolist()))

  _warn_prf(average, modifier, msg_start, len(result))


                                              precision    recall  f1-score   support

    DebtInstrumentBasisSpreadOnVariableRate1       0.41      0.89      0.56      1340
                    DebtInstrumentFaceAmount       0.22      0.75      0.34      1063
  DebtInstrumentInterestRateStatedPercentage       0.30      0.93      0.45      1399
                  DebtInstrumentMaturityDate       0.00      0.00      0.00       164
LineOfCreditFacilityMaximumBorrowingCapacity       0.37      0.77      0.50      1345

                                   micro avg       0.31      0.82      0.45      5311
                                   macro avg       0.26      0.67      0.37      5311
                                weighted avg       0.32      0.82      0.45      5311



In [18]:
print(classification_report(val_df["ner_expanded"].tolist(), val_df["pyt_preds_exp"].tolist()))

  _warn_prf(average, modifier, msg_start, len(result))


                                              precision    recall  f1-score   support

    DebtInstrumentBasisSpreadOnVariableRate1       0.43      0.89      0.58       238
                    DebtInstrumentFaceAmount       0.25      0.74      0.38       225
  DebtInstrumentInterestRateStatedPercentage       0.34      0.93      0.49       315
                  DebtInstrumentMaturityDate       0.00      0.00      0.00        43
LineOfCreditFacilityMaximumBorrowingCapacity       0.39      0.83      0.53       260

                                   micro avg       0.35      0.82      0.49      1081
                                   macro avg       0.28      0.68      0.40      1081
                                weighted avg       0.34      0.82      0.48      1081



##### Converting to ONNX

In [None]:
import torch
from transformers import AutoConfig

# Prepare model input
text = "The form and terms of the 3.650 % Senior Notes were established pursuant to an Officer ’ s."
inputs = tokenizer(text, return_tensors="pt")
model.to("cpu")
# Exporting
output_path = "models/fine_distillbert_v6.onnx"
torch.onnx.export(model,               # model being run
                 args=(inputs['input_ids'],),  # model input (or a tuple for multiple inputs)
                 f=output_path,        # where to save the model
                 opset_version=11,     # the ONNX version to export the model to
                 do_constant_folding=True,  # whether to execute constant folding for optimization
                 input_names=['input_ids'],   # the model's input names
                 output_names=['output'],    # the model's output names
                 dynamic_axes={'input_ids': {0: 'batch_size', 1: 'seq_len'},  # Dynamic axes for inputs
                                'output': {0: 'batch_size', 1: 'seq_len'}})


In [152]:
# model_path = "HariLuru/finer_distillbert_v2"
# mtype = "pytorch"

text = val["toks_exp"].apply(lambda x: " ".join(x)).tolist()
import onnxruntime
model_path = "models/fine_distillbert_v6.onnx"
mtype="onnx"
model = load_model(model_path, mtype=mtype)

# model_path = "models/fine_distillbert.onnx"
device="cpu"
# model.to(device)
# model = load_model(model_path, mtype=mtype)
import time

# model.eval()

st = time.time()
onnx_output = pred_pipeline(text, model, mtype=mtype, bs=50, device=device)
e1 = time.time()
val_df = proc_predictions(pyt_output, tokenizer, text, val, "onnx_preds")
e2=time.time()

print(e1-st, e2-e1)


0 195 torch.Size([50, 212])
50 96 torch.Size([50, 118])
100 113 torch.Size([50, 127])
150 98 torch.Size([50, 103])
200 75 torch.Size([50, 86])
250 98 torch.Size([50, 113])
300 149 torch.Size([50, 172])
350 96 torch.Size([50, 105])
400 200 torch.Size([50, 272])
450 200 torch.Size([50, 296])
500 200 torch.Size([50, 262])
550 137 torch.Size([50, 155])
600 118 torch.Size([50, 127])
650 200 torch.Size([50, 500])
700 200 torch.Size([50, 415])
750 128 torch.Size([50, 144])
800 200 torch.Size([50, 244])
850 171 torch.Size([50, 188])
900 200 torch.Size([50, 245])
950 200 torch.Size([50, 268])
latencies
0.50     5.307915
0.75     7.489113
0.90     8.765480
0.99    19.331582
dtype: float64
preds done
124.61810040473938 11.113801956176758


<div id="pvo"></div>

##### Pytorch vs ONNX

In [32]:
print(classification_report(val_df["ner_expanded"].tolist(), val_df["pyt_preds_exp"].tolist()))

  _warn_prf(average, modifier, msg_start, len(result))


                                              precision    recall  f1-score   support

    DebtInstrumentBasisSpreadOnVariableRate1       0.48      0.65      0.55        17
                    DebtInstrumentFaceAmount       0.17      0.59      0.27        17
  DebtInstrumentInterestRateStatedPercentage       0.40      0.98      0.56        43
                  DebtInstrumentMaturityDate       0.00      0.00      0.00         3
LineOfCreditFacilityMaximumBorrowingCapacity       0.41      0.88      0.56        16

                                   micro avg       0.35      0.80      0.49        96
                                   macro avg       0.29      0.62      0.39        96
                                weighted avg       0.36      0.80      0.49        96



In [32]:
print(classification_report(val_df["ner_expanded"].tolist(), val_df["onnx_preds_exp"].tolist()))

  _warn_prf(average, modifier, msg_start, len(result))


                                              precision    recall  f1-score   support

    DebtInstrumentBasisSpreadOnVariableRate1       0.48      0.65      0.55        17
                    DebtInstrumentFaceAmount       0.17      0.59      0.27        17
  DebtInstrumentInterestRateStatedPercentage       0.40      0.98      0.56        43
                  DebtInstrumentMaturityDate       0.00      0.00      0.00         3
LineOfCreditFacilityMaximumBorrowingCapacity       0.41      0.88      0.56        16

                                   micro avg       0.35      0.80      0.49        96
                                   macro avg       0.29      0.62      0.39        96
                                weighted avg       0.36      0.80      0.49        96



In [138]:
# del val_df["ner_tags_org"]

val_df.shape

(1000, 18)

In [136]:
val_df = val_df.merge(test_df_org[["id","ner_expanded_org"]], on="id")

In [137]:
val_df

Unnamed: 0,id,tokens,ner_tags,toks_exp,ner_expanded,pyt_preds,pyt_preds2,pyt_preds_exp,onnx_preds,onnx_preds2,onnx_preds_exp,tag_exists,ind_f1,ext_true,ext_true_exp,ext_pred,ext_pred_onnx,ner_expanded_org
0,1012878,"[The, changes, in, the, fair, value, of, the, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[the, changes, in, the, fair, value, of, the, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tensor(10, device='cuda:0'), tensor(10, devic...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tensor(10, device='cuda:0'), tensor(10, devic...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",0,0.0,{},{},{},{},"[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
1,1012879,"[Fair, Values, Financial, Assets, and, Financi...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[fair, values, financial, assets, and, financi...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tensor(10, device='cuda:0'), tensor(10, devic...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tensor(10, device='cuda:0'), tensor(10, devic...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",0,0.0,{},{},{},{},"[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
2,1012880,"[23, Table, of, Contents, AMERICAN, EXPRESS, C...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[23, table, of, contents, american, express, c...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tensor(10, device='cuda:0'), tensor(10, devic...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tensor(10, device='cuda:0'), tensor(10, devic...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",0,0.0,{},{},{},{},"[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
3,1012881,"[The, fair, values, of, these, financial, inst...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[the, fair, values, of, these, financial, inst...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tensor(10, device='cuda:0'), tensor(10, devic...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tensor(10, device='cuda:0'), tensor(10, devic...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",0,0.0,{},{},{},{},"[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
4,1012882,"[(, b, ), Level, 1, amounts, reflect, interest...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[(, b, ), level, 1, amounts, reflect, interest...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tensor(10, device='cuda:0'), tensor(10, devic...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tensor(10, device='cuda:0'), tensor(10, devic...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",0,0.0,{},{},{},{},"[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,1013873,"[We, combine, our, five, current, operating, s...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[we, combine, our, five, current, operating, s...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tensor(10, device='cuda:0'), tensor(10, devic...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tensor(10, device='cuda:0'), tensor(10, devic...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",0,0.0,{},{},{},{},"[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
996,1013874,"[These, four, reporting, segments, are, also, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[these, four, reporting, segments, are, also, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tensor(10, device='cuda:0'), tensor(10, devic...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tensor(10, device='cuda:0'), tensor(10, devic...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",0,0.0,{},{},{},{},"[O, B-NumberOfReportableSegments, O, O, O, O, ..."
997,1013875,"[We, have, strategically, transitioned, from, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[we, have, strategically, transitioned, from, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tensor(10, device='cuda:0'), tensor(10, devic...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tensor(10, device='cuda:0'), tensor(10, devic...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",0,0.0,{},{},{},{},"[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
998,1013876,"[Our, agreements, with, customers, are, typica...","[O, O, O, O, O, O, O, O, O, O]","[our, agreements, with, customers, are, typica...","[O, O, O, O, O, O, O, O, O, O]","[tensor(10, device='cuda:0'), tensor(10, devic...","[O, O, O, O, O, O, O, O, O, O]","[O, O, O, O, O, O, O, O, O, O]","[tensor(10, device='cuda:0'), tensor(10, devic...","[O, O, O, O, O, O, O, O, O, O]","[O, O, O, O, O, O, O, O, O, O]",0,0.0,{},{},{},{},"[O, O, O, O, O, O, O, O, O, O]"


In [143]:
import copy
true_exploded = pd.Series(val_df["ner_expanded"].tolist()).explode().tolist()

pred_exploded = pd.Series(val_df["pyt_preds_exp"].tolist()).explode().tolist()

val_df["onnx_preds_exp_org"] = val_df.apply(lambda x: pad_preds_if_necessary(copy.copy(x["pyt_preds_exp"]), x["ner_expanded_org"]), axis=1)

pred_exploded_org = pd.Series(val_df["onnx_preds_exp_org"].tolist()).explode().tolist()

true_exploded_orig = pd.Series(val_df["ner_expanded_org"].tolist()).explode().tolist()

# pred_exploded = pd.Series(val_df["onnx_preds_exp"].tolist()).explode().tolist()

In [141]:
val_df.loc[val_df.ner_expanded_org.apply(len) != val_df.onnx_preds_exp_org.apply(len)]

Unnamed: 0,id,tokens,ner_tags,toks_exp,ner_expanded,pyt_preds,pyt_preds2,pyt_preds_exp,onnx_preds,onnx_preds2,onnx_preds_exp,tag_exists,ind_f1,ext_true,ext_true_exp,ext_pred,ext_pred_onnx,ner_expanded_org,onnx_preds_exp_org


In [102]:
# pd.DataFrame(classification_report(val_df["ner_expanded_org"].tolist(), val_df["onnx_preds_exp_org"].tolist(), output_dict=True)).T

In [44]:
[x.replace("B-", "").replace("I-", "") for x in selected_tag_names]

['DebtInstrumentBasisSpreadOnVariableRate1',
 'DebtInstrumentFaceAmount',
 'DebtInstrumentInterestRateStatedPercentage',
 'DebtInstrumentMaturityDate',
 'LineOfCreditFacilityMaximumBorrowingCapacity',
 'DebtInstrumentBasisSpreadOnVariableRate1',
 'DebtInstrumentFaceAmount',
 'DebtInstrumentInterestRateStatedPercentage',
 'DebtInstrumentMaturityDate',
 'LineOfCreditFacilityMaximumBorrowingCapacity',
 'O',
 'PAD']

In [None]:
val

<div id="cm"></div>

##### Confusion Matrix

In [144]:
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder


label_encoder = LabelEncoder()
y_true_encoded = label_encoder.fit_transform(true_exploded)
y_pred_encoded = label_encoder.transform(pred_exploded)

label_encoder2 = LabelEncoder()

_ = label_encoder2.fit_transform(true_exploded_orig + true_exploded)
y_true_encoded2 = label_encoder2.transform(true_exploded_orig)
y_pred_encoded2 = label_encoder2.transform(pred_exploded_org)
 

# Step 2: Compute the confusion matrix
cm = confusion_matrix(y_true_encoded, y_pred_encoded)

# Step 3: Create a DataFrame from the confusion matrix
cm_df = pd.DataFrame(cm, 
                     index=label_encoder.classes_,    # Row names
                     columns=label_encoder.classes_)  # Column names

# cm_df.to_excel("data/confusion_matrix_v6.xlsx")

cm_df


# print(confusion_matrix(true_exploded, pred_exploded))

Unnamed: 0,B-DebtInstrumentBasisSpreadOnVariableRate1,B-DebtInstrumentFaceAmount,B-DebtInstrumentInterestRateStatedPercentage,B-DebtInstrumentMaturityDate,B-LineOfCreditFacilityMaximumBorrowingCapacity,I-DebtInstrumentBasisSpreadOnVariableRate1,I-DebtInstrumentFaceAmount,I-DebtInstrumentInterestRateStatedPercentage,I-DebtInstrumentMaturityDate,I-LineOfCreditFacilityMaximumBorrowingCapacity,O
B-DebtInstrumentBasisSpreadOnVariableRate1,11,0,5,0,0,0,0,0,0,0,1
B-DebtInstrumentFaceAmount,0,10,0,0,1,0,0,0,0,0,6
B-DebtInstrumentInterestRateStatedPercentage,0,0,42,0,0,0,0,1,0,0,0
B-DebtInstrumentMaturityDate,0,0,0,0,0,0,0,0,0,0,3
B-LineOfCreditFacilityMaximumBorrowingCapacity,0,2,0,0,14,0,0,0,0,0,0
I-DebtInstrumentBasisSpreadOnVariableRate1,0,0,0,0,0,22,0,8,0,0,2
I-DebtInstrumentFaceAmount,0,0,0,0,0,0,20,0,0,2,12
I-DebtInstrumentInterestRateStatedPercentage,0,0,0,0,0,0,0,85,0,0,1
I-DebtInstrumentMaturityDate,0,0,0,0,0,0,0,0,0,0,9
I-LineOfCreditFacilityMaximumBorrowingCapacity,0,0,0,0,0,0,2,0,0,30,0


In [113]:
cm.shape

(92, 92)

In [145]:
lc = label_encoder2.classes_

lc = [x for x in lc if x in label_encoder.classes_] + [x for x in lc if x not in label_encoder.classes_]

In [148]:
# Step 2: Compute the confusion matrix
cm = confusion_matrix(y_true_encoded2, y_pred_encoded2)

# Step 3: Create a DataFrame from the confusion matrix
cm_df = pd.DataFrame(cm, 
                     index=label_encoder2.classes_,    # Row names
                     columns=label_encoder2.classes_)  # Column names

# cm_df.loc[lc, lc].to_excel("data/confusion_matrix_v7.xlsx")

cm_df.loc[lc, lc]

Unnamed: 0,B-DebtInstrumentBasisSpreadOnVariableRate1,B-DebtInstrumentFaceAmount,B-DebtInstrumentInterestRateStatedPercentage,B-DebtInstrumentMaturityDate,B-LineOfCreditFacilityMaximumBorrowingCapacity,I-DebtInstrumentBasisSpreadOnVariableRate1,I-DebtInstrumentFaceAmount,I-DebtInstrumentInterestRateStatedPercentage,I-DebtInstrumentMaturityDate,I-LineOfCreditFacilityMaximumBorrowingCapacity,...,B-RevenueFromRelatedParties,B-RevenueRemainingPerformanceObligation,B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsGrantsInPeriod,B-ShareBasedCompensationArrangementByShareBasedPaymentAwardEquityInstrumentsOtherThanOptionsGrantsInPeriodWeightedAverageGrantDateFairValue,B-StockIssuedDuringPeriodSharesNewIssues,B-StockRepurchaseProgramAuthorizedAmount1,B-StockRepurchasedAndRetiredDuringPeriodShares,B-StockRepurchasedDuringPeriodShares,B-TreasuryStockAcquiredAverageCostPerShare,B-TreasuryStockValueAcquiredCostMethod
B-DebtInstrumentBasisSpreadOnVariableRate1,11,0,5,0,0,22,0,8,0,0,...,0,0,0,0,0,0,0,0,0,0
B-DebtInstrumentFaceAmount,0,10,0,0,1,0,20,0,0,2,...,0,0,0,0,0,0,0,0,0,0
B-DebtInstrumentInterestRateStatedPercentage,0,0,42,0,0,0,0,86,0,0,...,0,0,0,0,0,0,0,0,0,0
B-DebtInstrumentMaturityDate,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
B-LineOfCreditFacilityMaximumBorrowingCapacity,0,2,0,0,11,0,2,0,0,28,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
B-StockRepurchaseProgramAuthorizedAmount1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
B-StockRepurchasedAndRetiredDuringPeriodShares,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
B-StockRepurchasedDuringPeriodShares,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
B-TreasuryStockAcquiredAverageCostPerShare,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [52]:
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder


label_encoder = LabelEncoder()
y_true_encoded = label_encoder.fit_transform(true_exploded)
y_pred_encoded = label_encoder.transform(pred_exploded)

# Step 2: Compute the confusion matrix
cm = confusion_matrix(y_true_encoded, y_pred_encoded)

# Step 3: Create a DataFrame from the confusion matrix
cm_df = pd.DataFrame(cm, 
                     index=label_encoder.classes_,    # Row names
                     columns=label_encoder.classes_)  # Column names

cm_df.to_excel("data/confusion_matrix_v6.xlsx")

cm_df


# print(confusion_matrix(true_exploded, pred_exploded))

Unnamed: 0,B-DebtInstrumentBasisSpreadOnVariableRate1,B-DebtInstrumentFaceAmount,B-DebtInstrumentInterestRateStatedPercentage,B-DebtInstrumentMaturityDate,B-LineOfCreditFacilityMaximumBorrowingCapacity,I-DebtInstrumentBasisSpreadOnVariableRate1,I-DebtInstrumentFaceAmount,I-DebtInstrumentInterestRateStatedPercentage,I-DebtInstrumentMaturityDate,I-LineOfCreditFacilityMaximumBorrowingCapacity,O
B-DebtInstrumentBasisSpreadOnVariableRate1,11,0,5,0,0,0,0,0,0,0,1
B-DebtInstrumentFaceAmount,0,10,0,0,1,0,0,0,0,0,6
B-DebtInstrumentInterestRateStatedPercentage,0,0,42,0,0,0,0,1,0,0,0
B-DebtInstrumentMaturityDate,0,0,0,0,0,0,0,0,0,0,3
B-LineOfCreditFacilityMaximumBorrowingCapacity,0,2,0,0,14,0,0,0,0,0,0
I-DebtInstrumentBasisSpreadOnVariableRate1,0,0,0,0,0,22,0,8,0,0,2
I-DebtInstrumentFaceAmount,0,0,0,0,0,0,20,0,0,2,12
I-DebtInstrumentInterestRateStatedPercentage,0,0,0,0,0,0,0,85,0,0,1
I-DebtInstrumentMaturityDate,0,0,0,0,0,0,0,0,0,0,9
I-LineOfCreditFacilityMaximumBorrowingCapacity,0,0,0,0,0,0,2,0,0,30,0


In [None]:
from google.colab import drive

drive.mount('/content/drive')


In [None]:
os.chdir( "/content/drive/MyDrive/datasnipper")

In [32]:
val_df["tag_exists"] = val_df["pyt_preds_exp"].apply(lambda x: len([y for y in x if y!= "O"])) + val_df["ner_expanded"].apply(lambda x: len([y for y in x if y!= "O"]))

In [22]:
val_df["tag_exists"] = val_df["pyt_preds_exp"].apply(lambda x: len([y for y in x if y!= "O"])) + val_df["ner_expanded"].apply(lambda x: len([y for y in x if y!= "O"]))

val_df["ind_f1"] = val_df.apply(lambda x: f1_score([x["ner_expanded"]], [x["pyt_preds_exp"]]),axis=1).sort_values()

val_df["ext_true"] = val_df.apply(lambda x: dict(extract_tagged(x["tokens"],x["ner_tags"])),axis=1)

val_df["ext_true_exp"] = val_df.apply(lambda x: dict(extract_tagged(x["toks_exp"],x["ner_expanded"])),axis=1)

val_df["ext_pred"] = val_df.apply(lambda x: dict(extract_tagged(x["toks_exp"],x["pyt_preds_exp"])),axis=1)

val_df["ext_pred_onnx"] = val_df.apply(lambda x: dict(extract_tagged(x["toks_exp"],x["onnx_preds_exp"])),axis=1)

val_df.loc[val_df.tag_exists != 0,["id","tokens", "ner_tags","ner_expanded","ext_true", "ext_true_exp","ext_pred", "ext_pred_onnx","ind_f1"]].sort_values("ind_f1").to_excel("data/error_analysis_v4.xlsx")

  _warn_prf(
