In [9]:
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import pandas as pd
from torch.utils.tensorboard import SummaryWriter
import os
import shutil

df_single_train=pd.read_csv("single_type_train.csv")
df_single_test=pd.read_csv("single_type_test.csv")
df_multi_train=pd.read_csv("multi_type_train.csv")
df_multi_test=pd.read_csv("multi_type_test.csv")

train_method="all_data"
load_model=True
load_model_name="model\model_state_dict_all_data_1.pth"

if os.path.exists('runs/{:}/loss'.format(train_method)):
    shutil.rmtree('runs/{:}/loss'.format(train_method))

logdir='runs/{:}/loss'.format(train_method)
writer = SummaryWriter(log_dir=logdir)

In [2]:
if train_method=="all_multi":
    #all the train data is multi question type
    df=df_multi_train
    train_texts=df_multi_train.loc[:,"Question"].values.tolist()
    train_labels=df_multi_train.loc[:, "Confirmation":"Complex"].values.tolist()
    val_texts=pd.concat([df_multi_test.loc[:,"Question"], df_single_test.loc[:,"Question"].sample(n=len(df_multi_test),replace=False, random_state=42)], ignore_index=True).values.tolist()
    val_labels=pd.concat([df_multi_test.loc[:,"Confirmation":"Complex"], df_single_test.loc[:,"Confirmation":"Complex"].sample(n=len(df_multi_test),replace=False, random_state=42)], ignore_index=True).values.tolist()
if train_method=="half_multi_half_single":
    df=pd.concat([df_multi_train, df_single_train.sample(n=len(df_multi_train),replace=False, random_state=42)], ignore_index=True)
    train_texts=df.loc[:,"Question"].values.tolist()
    train_labels=df.loc[:,"Confirmation":"Complex"].values.tolist()
    val_texts=pd.concat([df_multi_test.loc[:,"Question"], df_single_test.loc[:,"Question"].sample(n=len(df_multi_test),replace=False, random_state=42)], ignore_index=True).values.tolist()
    val_labels=pd.concat([df_multi_test.loc[:,"Confirmation":"Complex"], df_single_test.loc[:,"Confirmation":"Complex"].sample(n=len(df_multi_test),replace=False, random_state=42)], ignore_index=True).values.tolist()
if train_method=="all_data":
    df=pd.concat([df_multi_train, df_single_train], ignore_index=True)
    train_texts=df.loc[:,"Question"].values.tolist()
    train_labels=df.loc[:,"Confirmation":"Complex"].values.tolist()
    val_texts=pd.concat([df_multi_test.loc[:,"Question"], df_single_test.loc[:,"Question"].sample(n=len(df_multi_test),replace=False, random_state=42)], ignore_index=True).values.tolist()
    val_labels=pd.concat([df_multi_test.loc[:,"Confirmation":"Complex"], df_single_test.loc[:,"Confirmation":"Complex"].sample(n=len(df_multi_test),replace=False, random_state=42)], ignore_index=True).values.tolist()
if train_method=="all_single":
    df=df_single_train
    train_texts=df_single_train.loc[:,"Question"].values.tolist()
    train_labels=df_single_train.loc[:, "Confirmation":"Complex"].values.tolist()
    val_texts=pd.concat([df_multi_test.loc[:,"Question"], df_single_test.loc[:,"Question"].sample(n=len(df_multi_test),replace=False, random_state=42)], ignore_index=True).values.tolist()
    val_labels=pd.concat([df_multi_test.loc[:,"Confirmation":"Complex"], df_single_test.loc[:,"Confirmation":"Complex"].sample(n=len(df_multi_test),replace=False, random_state=42)], ignore_index=True).values.tolist()


In [3]:
import numpy as np
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=128)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=128)

train_seq = torch.tensor(train_encodings['input_ids'])
train_mask = torch.tensor(train_encodings['attention_mask'])
train_y = torch.tensor(train_labels, dtype=torch.float)

val_seq = torch.tensor(val_encodings['input_ids'])
val_mask = torch.tensor(val_encodings['attention_mask'])
val_y = torch.tensor(val_labels, dtype=torch.float)

batch_size = 32

# DataLoader
train_data = TensorDataset(train_seq, train_mask, train_y)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

val_data = TensorDataset(val_seq, val_mask, val_y)
val_sampler = SequentialSampler(val_data)
val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=batch_size)

In [4]:
epochs=500
early_stopping_patience = 30 
best_loss = np.Inf
early_stopping_counter = 0  
lr=2e-6
start_epoch=0

In [5]:
from transformers import AdamW,get_linear_schedule_with_warmup
from torch.nn import BCEWithLogitsLoss
from model import BERTMultiLabelBinaryClassification,BERTMultiLabelBinaryClassification_FactorLoss
from tqdm import tqdm
import copy

positive_proportions = df[['Confirmation', 'Factoid', 'List', 'Causal', 'Hypothetical', 'Complex']].sum() / len(df[['Confirmation', 'Factoid', 'List', 'Causal', 'Hypothetical', 'Complex']])
negative_proportions = 1 - positive_proportions
label_weight=negative_proportions/positive_proportions
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
#model = BERTMultiLabelBinaryClassification(num_labels=6,label_weight=label_weight)
model = BERTMultiLabelBinaryClassification_FactorLoss(num_labels=6,label_weight=label_weight,gamma=2.5,mix_ratio=0.3)
if load_model:
    model.load_state_dict(torch.load(load_model_name))
optimizer = AdamW(model.parameters(), lr=lr,weight_decay=0.001)
loss_fn = BCEWithLogitsLoss()
scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps=5, 
                                            num_training_steps=len(train_dataloader)*epochs)
progress_bar = tqdm(range(start_epoch,epochs), desc='Training Progress')
model=model.to(device)
for epoch in progress_bar: 
    model.train()
    total_loss = 0
    for batch in train_dataloader:
        batch = tuple(b.to(device) for b in batch)
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2]}
        loss = model(**inputs)

        #loss = loss_fn(outputs, inputs['labels'])
        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
        total_loss += loss.item()
    
    total_loss=total_loss / len(train_dataloader)

    val_loss = 0
    model.eval()
    with torch.no_grad():
        for batch in val_dataloader:
            batch = tuple(b.to(device) for b in batch)
            inputs = {'input_ids': batch[0],
                      'attention_mask': batch[1],
                      'labels': batch[2]}
            loss = model(**inputs)
            #batch_loss = loss_fn(outputs, inputs['labels'])
            val_loss += loss.item()
    val_loss = val_loss / len(val_dataloader)

    if val_loss < best_loss:
        best_loss = val_loss
        best_model_state = copy.deepcopy(model.state_dict())
        early_stopping_counter = 0 
    else:
        early_stopping_counter += 1
        if early_stopping_counter >= early_stopping_patience:
            print(f"Early stopping triggered after {epoch} epochs.")
            break
    writer.add_scalars("loss",{"train loss":total_loss,"val loss":val_loss},epoch)
    progress_bar.set_description("Epoch {:} train loss:{:.6f},val loss:{:.6f}".format(epoch,total_loss,val_loss))
model.load_state_dict(best_model_state)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  self.label_weight = torch.tensor(label_weight)
Epoch 44 train loss:0.066992,val loss:0.375491:   9%|▉         | 45/500 [26:45<4:30:37, 35.69s/it]

Early stopping triggered after 45 epochs.





<All keys matched successfully>

In [6]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
import numpy as np

model.eval()

predictions, true_labels = [], []

with torch.no_grad():
    model.to(device)
    for batch in val_dataloader:
        batch = tuple(b.to(device) for b in batch)
        inputs = {
            'input_ids': batch[0],
            'attention_mask': batch[1]
        }
        labels = batch[2]

        outputs = model(**inputs)

        logits =  torch.sigmoid(outputs).detach().cpu().numpy()
        label_ids = labels.to('cpu').numpy()

        predictions.append(logits)
        true_labels.append(label_ids)

predictions = np.vstack(predictions) 
true_labels = np.vstack(true_labels) 

pred_labels=np.zeros_like(predictions)
split=[0.5,0.5,0.5,0.5,0.5,0.5]
for i in range(len(split)):
    predictions[:,i]=predictions[:,i]/predictions[:,i].max()
    pred_labels[:,i]=(predictions[:,i] > split[i]).astype(int)

precision, recall, f1, _ = precision_recall_fscore_support(true_labels, pred_labels, average='micro')
accuracy = accuracy_score(true_labels, pred_labels)
roc_auc = roc_auc_score(true_labels, predictions, average='micro')

writer.add_text("Evaluation","Accuracy:{:.4f} Precision:{:.4f} Recall:{:.4f} F1 Score:{:.4f} ROC AUC:{:.4f}".format(accuracy,precision,recall,f1,roc_auc))
print("Accuracy:{:.4f} Precision:{:.4f} Recall:{:.4f} F1 Score:{:.4f} ROC AUC:{:.4f}".format(accuracy,precision,recall,f1,roc_auc))

list_name=['Confirmation','Factoid','List','Causal','Hypothetical','Complex']
for i in range(6):
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels[:,i], pred_labels[:,i], average='micro')
    accuracy = accuracy_score(true_labels[:,i], pred_labels[:,i])
    roc_auc = roc_auc_score(true_labels[:,i], predictions[:,i], average='micro')
    writer.add_text("Evaluation","{:} Accuracy:{:.4f} Precision:{:.4f} Recall:{:.4f} F1 Score:{:.4f} ROC AUC:{:.4f}".format(list_name[i],accuracy,precision,recall,f1,roc_auc))
    print("{:} Accuracy:{:.4f} Precision:{:.4f} Recall:{:.4f} F1 Score:{:.4f} ROC AUC:{:.4f}".format(list_name[i],accuracy,precision,recall,f1,roc_auc))

index = (true_labels.sum(axis=1) == 1)
precision, recall, f1, _ = precision_recall_fscore_support(true_labels[index], pred_labels[index], average='micro')
accuracy = accuracy_score(true_labels[index], pred_labels[index])
roc_auc = roc_auc_score(true_labels[index], predictions[index], average='micro')

writer.add_text("Evaluation","Single-type qutions Accuracy:{:.4f} Precision:{:.4f} Recall:{:.4f} F1 Score:{:.4f} ROC AUC:{:.4f}".format(accuracy,precision,recall,f1,roc_auc))
print("Single-type qutions Accuracy:{:.4f} Precision:{:.4f} Recall:{:.4f} F1 Score:{:.4f} ROC AUC:{:.4f}".format(accuracy,precision,recall,f1,roc_auc))

index = (true_labels.sum(axis=1) > 1)
precision, recall, f1, _ = precision_recall_fscore_support(true_labels[index], pred_labels[index], average='micro')
accuracy = accuracy_score(true_labels[index], pred_labels[index])
roc_auc = roc_auc_score(true_labels[index], predictions[index], average='micro')

print("Multi-type qutions Accuracy:{:.4f} Precision:{:.4f} Recall:{:.4f} F1 Score:{:.4f} ROC AUC:{:.4f}".format(accuracy,precision,recall,f1,roc_auc))
writer.add_text("Evaluation","Multi-type qutions Accuracy:{:.4f} Precision:{:.4f} Recall:{:.4f} F1 Score:{:.4f} ROC AUC:{:.4f}".format(accuracy,precision,recall,f1,roc_auc))

Accuracy:0.5953 Precision:0.8590 Recall:0.7846 F1 Score:0.8201 ROC AUC:0.9643
Confirmation Accuracy:0.9415 Precision:0.9415 Recall:0.9415 F1 Score:0.9415 ROC AUC:0.9766
Factoid Accuracy:0.7458 Precision:0.7458 Recall:0.7458 F1 Score:0.7458 ROC AUC:0.8349
List Accuracy:0.9967 Precision:0.9967 Recall:0.9967 F1 Score:0.9967 ROC AUC:0.9993
Causal Accuracy:0.9080 Precision:0.9080 Recall:0.9080 F1 Score:0.9080 ROC AUC:0.9661
Hypothetical Accuracy:0.9967 Precision:0.9967 Recall:0.9967 F1 Score:0.9967 ROC AUC:0.9998
Complex Accuracy:0.8796 Precision:0.8796 Recall:0.8796 F1 Score:0.8796 ROC AUC:0.9018
Single-type qutions Accuracy:0.6522 Precision:0.7123 Recall:0.8361 F1 Score:0.7692 ROC AUC:0.9543
Multi-type qutions Accuracy:0.5385 Precision:0.9635 Recall:0.7600 F1 Score:0.8497 ROC AUC:0.9822


In [7]:
torch.save(model.state_dict(), 'model_state_dict_{:}.pth'.format(train_method))

In [21]:
sentence="Is Paris the capital of France?"

sentence_encodings = tokenizer(sentence, truncation=True, padding=True, max_length=128)
sentence_seq = torch.tensor(sentence_encodings['input_ids'])
sentence_mask = torch.tensor(sentence_encodings['attention_mask'])
model.eval()
with torch.no_grad():
    model.cpu()
    inputs = {
            'input_ids':sentence_seq.unsqueeze(0),
            'attention_mask':sentence_mask.unsqueeze(0)
        }
    outputs = model(**inputs)
    logits =  torch.sigmoid(outputs).detach().cpu().numpy()
    #logits = outputs.detach().cpu().numpy()
pred_labels=(logits > 0.5).astype(int)
print(pred_labels)

[[1 0 0 0 0 0]]
