In [None]:
!pip install transformers
!pip install datasets

In [1]:
import torch
from torch.utils.data import DataLoader, Dataset, TensorDataset
from datasets import load_dataset
from transformers import BertTokenizer, BertModel, BertForSequenceClassification, BertConfig
from transformers import Trainer, TrainingArguments
from transformers import AdamW, get_cosine_schedule_with_warmup
from sklearn.metrics import classification_report, accuracy_score
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
dataset = load_dataset('kiddothe2b/contract-nli', 'contractnli_a') # https://huggingface.co/datasets/kiddothe2b/contract-nli

train_data = dataset['train']
val_data = dataset['validation']
test_data = dataset['test']



  0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
train_texts_premise = train_data['premise']
train_texts_hypothesis = train_data['hypothesis']
train_labels = train_data['label']

val_texts_premise = val_data['premise']
val_texts_hypothesis = val_data['hypothesis']
val_labels = val_data['label']

test_texts_premise = test_data['premise']
test_texts_hypothesis = test_data['hypothesis']
test_labels = test_data['label']

In [4]:
len(train_labels), len(val_labels), len(test_labels)

(6819, 978, 1991)

In [5]:
train_texts_premise = train_texts_premise[:1000]
train_texts_hypothesis = train_texts_hypothesis[:1000]
train_labels = train_labels[:1000]

val_texts_premise = val_texts_premise[:200]
val_texts_hypothesis = val_texts_hypothesis[:200]
val_labels = val_labels[:200]

test_texts_premise = test_texts_premise[:500]
test_texts_hypothesis = test_texts_hypothesis[:500]
test_labels = test_labels[:500]

In [6]:
len(train_labels), len(val_labels), len(test_labels)

(1000, 200, 500)

# BERT

In [7]:
model_config = BertConfig.from_pretrained('bert-base-multilingual-uncased', num_labels=3)
model = BertForSequenceClassification.from_pretrained('bert-base-multilingual-uncased', config=model_config)
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-uncased')

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

In [8]:
train_inputs = tokenizer([x for x in train_texts_premise], [x for x in train_texts_hypothesis], \
                   padding=True, truncation='only_first', return_overflowing_tokens=True, return_tensors='pt')
train_labels = torch.tensor([x for x in train_labels])

train_dataset = torch.utils.data.TensorDataset(train_inputs['input_ids'], train_inputs['attention_mask'], train_inputs['token_type_ids'], train_labels)
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)

In [9]:
val_inputs = tokenizer([x for x in val_texts_premise], [x for x in val_texts_hypothesis], \
                   padding=True, truncation='only_first', return_overflowing_tokens=True, return_tensors='pt')
val_labels = torch.tensor([x for x in val_labels])

val_dataset = torch.utils.data.TensorDataset(val_inputs['input_ids'], val_inputs['attention_mask'], val_inputs['token_type_ids'], val_labels)
val_dataloader = DataLoader(val_dataset, batch_size=8, shuffle=True)

In [10]:
test_inputs = tokenizer([x for x in test_texts_premise], [x for x in test_texts_hypothesis], \
                   padding=True, truncation='only_first', return_overflowing_tokens=True, return_tensors='pt')
test_labels = torch.tensor([x for x in test_labels])

test_dataset = torch.utils.data.TensorDataset(test_inputs['input_ids'], test_inputs['attention_mask'], test_inputs['token_type_ids'], test_labels)
test_dataloader = DataLoader(test_dataset, batch_size=8, shuffle=True)

In [11]:
total_epochs = 5

optimizer = AdamW(model.parameters(), lr=1e-5)

num_train_steps = len(train_dataloader) * total_epochs
num_warmup_steps = int(num_train_steps * 0.1) 

lr_scheduler = get_cosine_schedule_with_warmup(optimizer,
                                               num_warmup_steps=num_warmup_steps,
                                               num_training_steps = num_train_steps)



In [12]:
def train(dataloader, optimizer, scheduler, device):
    global model
    model.train()
    model.to(device)
    
    prediction_labels = []
    true_labels = []
    
    total_loss = []
    
    for batch in dataloader:
        batch_dict = {
            'input_ids': batch[0],
            'attention_mask': batch[1],
            'token_type_ids': batch[2],
            'labels': batch[3]
        }
        batch = batch_dict
        true_labels += batch['labels'].numpy().flatten().tolist()
        batch = {k:v.type(torch.long).to(device) for k, v in batch.items()}
        
        outputs = model(**batch)
        loss, logits = outputs[:2]
        logits = logits.detach().cpu().numpy()
        total_loss.append(loss.item())
        
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) # prevent exploding gradient

        optimizer.step()
        scheduler.step()
        
        prediction_labels += logits.argmax(axis=-1).flatten().tolist()
    
    return true_labels, prediction_labels, total_loss

def validation(dataloader, device):
    global model
    model.eval()
    model.to(device)
    
    prediction_labels = []
    true_labels = []
    
    total_loss = []
    
    for batch in dataloader:
        batch_dict = {
              'input_ids': batch[0],
              'attention_mask': batch[1],
              'token_type_ids': batch[2],
              'labels': batch[3]
          }
        batch = batch_dict
        true_labels += batch['labels'].numpy().flatten().tolist()
        batch = {k:v.type(torch.long).to(device) for k, v in batch.items()}
        
        with torch.no_grad():
            outputs = model(**batch)
            loss, logits = outputs[:2]
            logits = logits.detach().cpu().numpy()
            total_loss.append(loss.item())

            prediction_labels += logits.argmax(axis=-1).flatten().tolist()
        
    return true_labels, prediction_labels, total_loss

In [13]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)

all_loss = {'train_loss': [], 'val_loss': []}
all_acc = {'train_acc': [], 'val_acc': []}

for epoch in range(total_epochs):
    y, y_pred, train_loss = train(train_dataloader, optimizer, lr_scheduler, device)
    train_acc = accuracy_score(y, y_pred)
    
    y, y_pred, val_loss = validation(val_dataloader, device)
    val_acc = accuracy_score(y, y_pred)
    
    all_loss['train_loss'] += train_loss
    all_loss['val_loss'] += val_loss
    
    all_acc['train_acc'].append(train_acc)
    all_acc['val_acc'].append(val_acc)
    
    print(f'Epoch: {epoch}, train_loss: {torch.tensor(train_loss).mean():.3f}, train_acc: {train_acc:.3f}, val_loss: {torch.tensor(val_loss).mean():.3f}, val_acc: {val_acc:.3f}') 

Epoch: 0, train_loss: 0.952, train_acc: 0.558, val_loss: 0.743, val_acc: 0.690
Epoch: 1, train_loss: 0.749, train_acc: 0.716, val_loss: 0.677, val_acc: 0.750
Epoch: 2, train_loss: 0.650, train_acc: 0.754, val_loss: 0.641, val_acc: 0.750
Epoch: 3, train_loss: 0.558, train_acc: 0.785, val_loss: 0.660, val_acc: 0.740
Epoch: 4, train_loss: 0.516, train_acc: 0.815, val_loss: 0.657, val_acc: 0.750


In [14]:
_, y_pred, _ = validation(test_dataloader, device)
print("Accuracy: {}".format(accuracy_score(test_labels, y_pred)))

Accuracy: 0.41


In [18]:
model.eval()
input_text1 = "Everyone should not share confidential information with others without consent"
input_text2 = "No one should share confidential information"
inputs = tokenizer(input_text1, input_text2, padding=True, truncation='only_first', return_tensors='pt')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=1).item()
print(f"Predicted class: {predicted_class}")

Predicted class: 1
