In [1]:
from xml.etree import ElementTree as ET
from torch.utils.data import Dataset, DataLoader
from transformers import XLNetTokenizer, XLNetModel, XLNetForSequenceClassification, AutoTokenizer, AutoModel, BertTokenizer, BertModel
import torch
from torch import nn, optim
import pandas as pd
import numpy as np
import os
import re
from tqdm import tqdm
from sklearn.metrics import f1_score,accuracy_score,precision_score,recall_score,confusion_matrix,classification_report
from sklearn.preprocessing import StandardScaler,MinMaxScaler,OneHotEncoder,LabelEncoder
import nltk
from nltk import word_tokenize

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="darkgrid")
import json
import csv


# Data Preprocessing #

In [None]:
# Configuration

############# Indicate the name of the model for inference: bert, scibert or xlnet #############
############# Indicate data for inference: hedgepeer, bioscope or unsc (it shouldn't be the same dataset the model was fine-tuned on) #############
trans_model = 'bert'

# Data for inference:
data = 'unsc'

# Change path according to the model you want to use for inference
model_path = '/kaggle/input/bert-bioscope-span-infer/bert_bioscope_span_only_model_ep3.pt'

In [None]:
# Dataset load and visualization

root = '../input'
os.chdir(root)

# Change paths 
hedgepeer_path = 'hedgepeer/HedgePeer.jsonl'
bioscope_path = 'merged-bioscope/merged_bioscope.jsonl'  
unsc_path = 'unsc-fixed/output.json'

# Select dataset path based on 'data' variable
if data == 'hedgepeer':
    dataObj = pd.read_json(path_or_buf=hedgepeer_path, lines=True)
elif data == 'bioscope':
    dataObj = pd.read_json(path_or_buf=bioscope_path, lines=True)
elif data == 'unsc':
    dataObj = pd.read_json(path_or_buf=unsc_path, lines=True)
else:
    raise ValueError("Invalid dataset name. Define 'data' as 'bioscope', 'hedgepeer' or 'unsc'.")


data_list = []
for index, row in dataObj.iterrows():
    rev_id = row['Review_id']
    sents = row['Sentences']
    for s in sents:
        hedges = s['Hedges']
        if(len(hedges)==0):
            d = {}
            d['Review_id'] = rev_id
            d['Sentence_id'] = s['Sentence_id']
            d['Raw Sentence'] = s['Sentence']
            d['Hedged Sentence'] = s['Sentence']
            d['Hedge'] = 'NO HEDGE'
            d['Span'] = None
            data_list.append(d)
        else:
            for h in hedges:
                d = {}
                d['Review_id'] = rev_id
                d['Sentence_id'] = s['Sentence_id']
                d['Raw Sentence'] = s['Sentence']
                d['Hedged Sentence'] = h['Hedged Sentence']
                d['Hedge'] = h['Hedge']
                d['Span'] = h['Span']
                data_list.append(d)

In [4]:
df = pd.DataFrame(data_list)
df

Unnamed: 0,Review_id,Sentence_id,Raw Sentence,Hedged Sentence,Hedge,Span
0,UNSC_1995_SPV.3486_spch001.txt,1,The President (interpretation from Spanish): A...,The President (interpretation from Spanish): A...,NO HEDGE,
1,UNSC_1995_SPV.3486_spch001.txt,2,We all look forward with confidence to their p...,We all look forward with confidence to their p...,NO HEDGE,
2,UNSC_1995_SPV.3486_spch001.txt,3,I should also like to express the Council's gr...,I should also like to express the Council's gr...,NO HEDGE,
3,UNSC_1995_SPV.3486_spch001.txt,4,"The representatives of Brazil, Djibouti, New Z...","The representatives of Brazil, Djibouti, New Z...",NO HEDGE,
4,UNSC_1995_SPV.3486_spch001.txt,5,Expression of thanks to the retiring President...,Expression of thanks to the retiring President...,NO HEDGE,
...,...,...,...,...,...,...
594,UNSC_2009_SPV.6154_spch002.txt,29,"Two weeks ago, I visited a so-called call cent...","Two weeks ago, I visited a so-called call cent...",NO HEDGE,
595,UNSC_2004_SPV.5080_spch012.txt,51,So I insisted that the retired President of Ta...,So I insisted that the retired President of Ta...,NO HEDGE,
596,UNSC_2020_SPV.2020_911_spch004.txt,31,"That is why, at the recent Security Council me...","That is why, at the recent Security Council me...",NO HEDGE,
597,UNSC_2001_SPV.4288Resumption1_spch001.txt,16,The Security Council should respond with the s...,The Security Council should respond with the s...,NO HEDGE,


In [5]:
rev_id = df['Review_id']
sen_id = df['Sentence_id']
unq_id = [str(i) + '_' + str(j) for i, j in zip(rev_id, sen_id)]
df['Unique_id'] = unq_id
df

Unnamed: 0,Review_id,Sentence_id,Raw Sentence,Hedged Sentence,Hedge,Span,Unique_id
0,UNSC_1995_SPV.3486_spch001.txt,1,The President (interpretation from Spanish): A...,The President (interpretation from Spanish): A...,NO HEDGE,,UNSC_1995_SPV.3486_spch001.txt_1
1,UNSC_1995_SPV.3486_spch001.txt,2,We all look forward with confidence to their p...,We all look forward with confidence to their p...,NO HEDGE,,UNSC_1995_SPV.3486_spch001.txt_2
2,UNSC_1995_SPV.3486_spch001.txt,3,I should also like to express the Council's gr...,I should also like to express the Council's gr...,NO HEDGE,,UNSC_1995_SPV.3486_spch001.txt_3
3,UNSC_1995_SPV.3486_spch001.txt,4,"The representatives of Brazil, Djibouti, New Z...","The representatives of Brazil, Djibouti, New Z...",NO HEDGE,,UNSC_1995_SPV.3486_spch001.txt_4
4,UNSC_1995_SPV.3486_spch001.txt,5,Expression of thanks to the retiring President...,Expression of thanks to the retiring President...,NO HEDGE,,UNSC_1995_SPV.3486_spch001.txt_5
...,...,...,...,...,...,...,...
594,UNSC_2009_SPV.6154_spch002.txt,29,"Two weeks ago, I visited a so-called call cent...","Two weeks ago, I visited a so-called call cent...",NO HEDGE,,UNSC_2009_SPV.6154_spch002.txt_29
595,UNSC_2004_SPV.5080_spch012.txt,51,So I insisted that the retired President of Ta...,So I insisted that the retired President of Ta...,NO HEDGE,,UNSC_2004_SPV.5080_spch012.txt_51
596,UNSC_2020_SPV.2020_911_spch004.txt,31,"That is why, at the recent Security Council me...","That is why, at the recent Security Council me...",NO HEDGE,,UNSC_2020_SPV.2020_911_spch004.txt_31
597,UNSC_2001_SPV.4288Resumption1_spch001.txt,16,The Security Council should respond with the s...,The Security Council should respond with the s...,NO HEDGE,,UNSC_2001_SPV.4288Resumption1_spch001.txt_16


In [6]:
# Count how many unique_id values are there
unique_id_count = df['Unique_id'].nunique()
unique_id_count

568

In [7]:
df['Span']

0      None
1      None
2      None
3      None
4      None
       ... 
594    None
595    None
596    None
597    None
598    None
Name: Span, Length: 599, dtype: object

In [8]:
df.to_csv(f'/kaggle/working/{data}_data_dic.csv', index=False)

In [9]:
# Run - Group by unique_id

unq_list = []
sent_list = []
hedged_sent_list = []
hed_list = []
span_list = []


gp = df.groupby(by=['Unique_id'])
for name,grp in tqdm(gp):
    sent_df = gp.get_group(name)
    raw_sent = list(set(sent_df['Raw Sentence']))
    hed_sent = list(sent_df['Hedged Sentence'])
        
    sent_hedges = list(sent_df['Hedge'])
    sent_spans = list(sent_df['Span'])
    sent_hed_span = [(i,j,k) for i,j,k in zip(hed_sent,sent_hedges,sent_spans) if j not in ['NO HEDGE','IDENT_PRECED']]
        
    hedged_sents = [i[0] for i in sent_hed_span]
    hedges = [i[1] for i in sent_hed_span]
    spans = [i[2] for i in sent_hed_span]
        
    unq_list.append(name)
    sent_list.append(raw_sent)
    hedged_sent_list.append(hedged_sents)
    hed_list.append(hedges)
    span_list.append(spans)
    

100%|██████████| 568/568 [00:00<00:00, 2939.45it/s]


In [10]:
# Data dictionary

data_dict = {'sentence_id':unq_list, 'sentence':sent_list, 'hedged_sentence':hedged_sent_list, 'speculative_cues':hed_list, 'scope_string':span_list}
df_dict = pd.DataFrame(data_dict)

df_dict.to_csv(f'/kaggle/working/{data}_data_dic.csv', index=False)

# Create dataloaders #

In [11]:
# Creates dictionary with input_tokens, att_mask, targets tensors

class Dataset_gen(Dataset):

    def __init__(self,sentences,targets,att_masks):
        self.sent = sentences
        self.tar = targets
        self.att = att_masks
        
    def __len__(self):
        return len(self.sent)
    
    def __getitem__(self, item):
        sent = torch.tensor(self.sent[item])
        target = torch.tensor(self.tar[item])
        att = torch.tensor(self.att[item])
        ret_dict = {'input':sent,'attention_mask':att,'targets':target}
        
        return ret_dict

In [12]:
def dataloader_gen(sent,data2mark,trans_model,tokenizer,max_len,batch_size):
    spans = data2mark  
    
    b = Biot2_dataset(sent,spans,trans_model,tokenizer,max_len)
        
    x,att,y = b.tokenids_gen()
    
    data = Dataset_gen(x,y,att)
    
    return DataLoader(data,batch_size=batch_size)


In [13]:
# leng_more = list of indices with sent tokens length > max_len

def remove_big_instances(data,sen_list,sent,data2mark,tokenizer,max_len):
    l = list(data['sentence'])
    leng_more = [i[0] for i in enumerate(l) if len(tokenizer.encode_plus(i[1],truncation=False,return_token_type_ids=True,return_attention_mask=True)['input_ids'])>max_len]
    if(sen_list!=None):
        sen_list = [i[1] for i in enumerate(sen_list) if i[0] not in leng_more]
    sent = [i[1] for i in enumerate(sent) if i[0] not in leng_more]
    data2mark = [i[1] for i in enumerate(data2mark) if i[0] not in leng_more]
    return (leng_more,sen_list,sent,data2mark)

In [None]:
# Convert hedged sentences to hashed sentences. Sent and spans used in Biot2_dataset

sent_span_d = df['Hedged Sentence'].to_list()
span_span_d = df['Span'].to_list()
sen_t2list = None

# Preprocess "Hedged Sentence" and "Span" columns
sent = []
spans = []

for i, (hs, s) in enumerate(zip(df['Hedged Sentence'], df['Span'])):
    if isinstance(s, str):
        # Replace hedge tags in span
        s = s.replace('<mh>', 'token[1]').replace('</mh>', '')
        s = s.replace('<h>', 'token[0]').replace('</h>', '')
        s = s.replace('<span>', '#').replace('</span>', '#')

        # Replace hedge tags in hedged sentence
        hs = hs.replace('<mh>', 'token[1]').replace('</mh>', '')
        hs = hs.replace('<h>', 'token[0]').replace('</h>', '')
        hs = hs.replace('<span>', '#').replace('</span>', '#')
    else:
        # If span is not a string (e.g., None), treat it as empty
        s = ''
    
    sent.append(hs)
    spans.append(s)



In [15]:
# 0=out of scope, 1=in scope 

class Biot2_dataset(Dataset):
    def __init__(self,sentences,spans,trans_model,tokenizer,max_len):
        self.sent = sentences
        self.trans_model = trans_model
        self.token = tokenizer
        self.max = max_len
        self.spans = spans
        
    def __len__(self):
        return len(self.sent)
    def tokenids_gen(self):
        targets = []
        senids = []
        attention_masks = []
        pad_token_ids = {'xlnet': 5, 'bert': 0, 'scibert': 0}
    
        for s, sc in zip(self.sent, self.spans):
            encodings = self.token.encode_plus(
                s,
                return_tensors='pt',
                truncation=False,
                return_token_type_ids=True,
                return_attention_mask=True,
            )
    
            att = list(encodings['attention_mask'][0])
            senid = list(encodings['input_ids'][0])
            att = [i.item() for i in att]
            senid = [i.item() for i in senid]
            k = [self.token.decode(i) for i in senid]
    
            tar = [0 for _ in range(len(k))]
    
            try:
                if sc != '' and '#' in k:
                    idxstart = k.index('#')
                    idxend = k.index('#', idxstart + 1)
                    tar[idxstart] = -1
                    tar[idxend] = -1
                    tar[idxstart + 1:idxend] = [1] * (idxend - idxstart - 1)
    
                    # Remove # markers
                    for _ in range(2):
                        idx = k.index('#')
                        if k[idx - 1] == '':
                            del k[idx - 1:idx + 1]
                            del senid[idx - 1:idx + 1]
                            del tar[idx - 1:idx + 1]
                            del att[idx - 1:idx + 1]
                        else:
                            del k[idx]
                            del senid[idx]
                            del tar[idx]
                            del att[idx]
    
            except ValueError:
                # Log and skip faulty sentence-span pairs
                print(f'Skipping invalid span in sentence:\n{s}\nSpan:\n{sc}\n')
                continue
    
            # Remove punctuation-only tokens
            valid = [re.search(r'[A-Za-z0-9]+', tok) for tok in k]
            senid = [i for i, v in zip(senid, valid) if v]
            tar = [i for i, v in zip(tar, valid) if v]
            att = [i for i, v in zip(att, valid) if v]
            k = [i for i, v in zip(k, valid) if v]
    
            # Padding
            pad_len = self.max - len(k)
            tar += [0] * pad_len
            senid += [pad_token_ids[self.trans_model]] * pad_len
            att += [0] * pad_len
    
            targets.append(tar)
            senids.append(senid)
            attention_masks.append(att)
    
        return senids, attention_masks, targets

        

data_span_infer = df.reset_index().drop(columns=['index']).rename(columns = {'Raw Sentence': 'sentence'})

data_span_infer

Unnamed: 0,Review_id,Sentence_id,sentence,Hedged Sentence,Hedge,Span,Unique_id
0,UNSC_1995_SPV.3486_spch001.txt,1,The President (interpretation from Spanish): A...,The President (interpretation from Spanish): A...,NO HEDGE,,UNSC_1995_SPV.3486_spch001.txt_1
1,UNSC_1995_SPV.3486_spch001.txt,2,We all look forward with confidence to their p...,We all look forward with confidence to their p...,NO HEDGE,,UNSC_1995_SPV.3486_spch001.txt_2
2,UNSC_1995_SPV.3486_spch001.txt,3,I should also like to express the Council's gr...,I should also like to express the Council's gr...,NO HEDGE,,UNSC_1995_SPV.3486_spch001.txt_3
3,UNSC_1995_SPV.3486_spch001.txt,4,"The representatives of Brazil, Djibouti, New Z...","The representatives of Brazil, Djibouti, New Z...",NO HEDGE,,UNSC_1995_SPV.3486_spch001.txt_4
4,UNSC_1995_SPV.3486_spch001.txt,5,Expression of thanks to the retiring President...,Expression of thanks to the retiring President...,NO HEDGE,,UNSC_1995_SPV.3486_spch001.txt_5
...,...,...,...,...,...,...,...
594,UNSC_2009_SPV.6154_spch002.txt,29,"Two weeks ago, I visited a so-called call cent...","Two weeks ago, I visited a so-called call cent...",NO HEDGE,,UNSC_2009_SPV.6154_spch002.txt_29
595,UNSC_2004_SPV.5080_spch012.txt,51,So I insisted that the retired President of Ta...,So I insisted that the retired President of Ta...,NO HEDGE,,UNSC_2004_SPV.5080_spch012.txt_51
596,UNSC_2020_SPV.2020_911_spch004.txt,31,"That is why, at the recent Security Council me...","That is why, at the recent Security Council me...",NO HEDGE,,UNSC_2020_SPV.2020_911_spch004.txt_31
597,UNSC_2001_SPV.4288Resumption1_spch001.txt,16,The Security Council should respond with the s...,The Security Council should respond with the s...,NO HEDGE,,UNSC_2001_SPV.4288Resumption1_spch001.txt_16


In [16]:
# Choose tokenizer type 

tokenizer1 = XLNetTokenizer.from_pretrained('xlnet-base-cased')

tokenizer2 = BertTokenizer.from_pretrained('bert-base-cased')

tokenizer3 = AutoTokenizer.from_pretrained('allenai/scibert_scivocab_cased')

tokenizer_dict = {'xlnet':tokenizer1,'bert':tokenizer2,'scibert':tokenizer3}

tokenizer_dict = {'xlnet':tokenizer1,'bert':tokenizer2,'scibert':tokenizer3}

tokenizer = tokenizer_dict[trans_model]

# Remove instances with length more than 100 ---- for unsc if we set max len to 100 we only have 6 rows left
len_more,sen_t2list,sent,spans = remove_big_instances(data_span_infer,sen_t2list,sent,spans,tokenizer,100)

spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/760 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/222k [00:00<?, ?B/s]

In [None]:
# Adapt to get input data 
max_len = 100
batch_size = 4

# Create DataLoaders 
infer_data_loader = dataloader_gen(sent, spans, trans_model, tokenizer, max_len, batch_size)


# Models # 


In [18]:
# Model
# Select model based on 'trans_model' variable
class scoperes_model(nn.Module):
    def __init__(self):
        super().__init__()
        if trans_model == 'xlnet':
            self.model = XLNetModel.from_pretrained('xlnet-base-cased')
        elif trans_model == 'bert':
            self.model = BertModel.from_pretrained('bert-base-cased')
        elif trans_model == 'scibert':
            self.model = AutoModel.from_pretrained('allenai/scibert_scivocab_cased')
        else:
            raise ValueError("Invalid model name. Choose 'xlnet', 'bert', or 'scibert'.")
        
        self.lin = nn.Linear(768, 2)

    def forward(self, x, att):
        xl = self.model(x, attention_mask=att)[0]
        xl = xl.view(-1, xl.shape[2])
        lin = self.lin(xl)
        return lin  

In [19]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [20]:
model = scoperes_model()
model.to(device)

model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

scoperes_model(
  (model): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwi

In [None]:
# Evaluation function with predicted spans

def evaluate(model, val_data):
    model.eval()
    model.to(device)
    main_loss = 0
    true = []
    pred = []

    predicted_spans = []

    with torch.no_grad():
        for i, d in enumerate(val_data):
            inp = d['input'].to(device)
            att = d['attention_mask'].to(device)
            targets = d['targets'].view(-1).to(device)

            logits = model(inp, att)

            loss = cse_loss(logits, targets)
            main_loss += loss.item()

            _, predictions = torch.max(logits, dim=1)

            targets = targets.cpu().detach().numpy()
            predictions = predictions.cpu().detach().numpy()

            true += list(targets)
            pred += list(predictions)

            # Reconstruct predicted span text per example
            for j in range(inp.shape[0]):
                input_ids = inp[j].cpu().tolist()
                attention_mask = att[j].cpu().tolist()
                pred_labels = predictions[j * inp.shape[1] : (j + 1) * inp.shape[1]]
                true_len = sum(attention_mask)

                input_ids = input_ids[:true_len]
                pred_labels = pred_labels[:true_len]

                tokens = tokenizer.convert_ids_to_tokens(input_ids)
                span_tokens = [tok for tok, lab in zip(tokens, pred_labels) if lab == 1]

                # Reconstruct text from subword tokens
                clean_tokens = []
                for token in span_tokens:
                    if token.startswith("##") and clean_tokens:
                        clean_tokens[-1] += token[2:]
                    else:
                        clean_tokens.append(token)

                predicted_span = " ".join(clean_tokens)
                predicted_spans.append(predicted_span)


    # Build comparison df
    df_compare = pd.DataFrame({
        "Sentence": sent[:len(predicted_spans)],
        "Manual_Annotated_Span": spans[:len(predicted_spans)],
        "Predicted_Span_Text": predicted_spans
    })

    return main_loss, true, pred, df_compare

In [None]:
# Results on Inference Data

# Define model name
model_name = trans_model

# Ensure metrics are saved 
output_dir = '/kaggle/working/output_metrics/'
os.makedirs(output_dir, exist_ok=True)

# Store results in a list
infer_metrics = []

# Define cse_loss
cse_loss = torch.nn.CrossEntropyLoss()

# Evaluate on inference data
print(f'{model_name} MODEL INFERENCE RESULTS ON {data} DATA')

model = torch.load(model_path)
model.to(device)
span_infer_loss, span_infer_true, span_infer_pred, df_span_comparison = evaluate(model, infer_data_loader)

# Compute evaluation metrics
span_infer_f1 = f1_score(span_infer_true, span_infer_pred, average='macro')
span_infer_acc = accuracy_score(span_infer_true, span_infer_pred)
span_infer_precision = precision_score(span_infer_true, span_infer_pred, average='macro', zero_division=0)
span_infer_recall = recall_score(span_infer_true, span_infer_pred, average='macro', zero_division=0)
span_infer_cm = confusion_matrix(span_infer_true, span_infer_pred)
                           
# Print results
print(f'Model: {model_name}')
print(f'Infer Loss: {span_infer_loss:.4f}| Infer precision F1: {span_infer_precision:.4f} | Infer recall: {span_infer_recall:.4f} | Infer F1: {span_infer_f1:.4f} | Infer Accuracy: {span_infer_acc:.4f}')
print('Test Confusion Matrix:')
print(span_infer_cm)
print(classification_report(span_infer_true, span_infer_pred))
print('\n')

# Confusion Matrix plot
plt.figure(figsize=(8, 6))
sns.heatmap(span_infer_cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['out-scope', 'in-scope'],
            yticklabels=['out-scope', 'in-scope'])
plt.title(f'{model_name} - Span Infer Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.tight_layout()
plt.savefig(os.path.join(output_dir, f'{model_name}_{data}_span_infer_confusion_matrix.png'))
plt.close()

# Store metrics for CSV
infer_metrics.append({
    'model_name': model_name,
    'infer_data': data,
    'infer_loss': span_infer_loss,
    'infer_f1': span_infer_f1,
    'infer_accuracy': span_infer_acc,
    'infer_precision': span_infer_precision,
    'infer_recall': span_infer_recall,
})

# Save test metrics to CSV
df_infer_span = pd.DataFrame(infer_metrics)
metrics_path = os.path.join(output_dir, f'span_{data}_infer_metrics.csv')
df_infer_span.to_csv(metrics_path, index=False)
print(f"Span infer metrics saved to {metrics_path}")

# Save comparison df to CSV
df_span_comparison.to_csv('/kaggle/working/span_prediction_comparison.csv', index=False)
print(f'Saved span prediction comparison CSV to {metrics_path}')

bert MODEL INFERENCE RESULTS ON unsc DATA


  model = torch.load(model_path)


Model: bert
Infer Loss: 11.8485| Infer precision F1: 0.9127 | Infer recall: 0.8993 | Infer F1: 0.9059 | Infer Accuracy: 0.9845
Test Confusion Matrix:
[[56287   416]
 [  504  2093]]
              precision    recall  f1-score   support

           0       0.99      0.99      0.99     56703
           1       0.83      0.81      0.82      2597

    accuracy                           0.98     59300
   macro avg       0.91      0.90      0.91     59300
weighted avg       0.98      0.98      0.98     59300



Span infer metrics saved to /kaggle/working/output_metrics/span_unsc_infer_metrics.csv
Saved span prediction comparison CSV to /kaggle/working/output_metrics/span_unsc_infer_metrics.csv
