In [1]:
import torch
import torch.nn.functional as F
from transformers import BertTokenizer

import sys
import os
sys.path.append(os.path.abspath('../..'))

from app.classes.all_classes import *

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
with open("../../app/helper/classifier_head.pkl", 'rb') as f:
    classifier_head = torch.load(f)

with open("../../app/helper/tokenizer.pkl", 'rb') as f:
    tokenizer = torch.load(f)

with open("../../app/models/BERT/model.pkl", 'rb') as f:
    bert = torch.load(f)

  classifier_head = torch.load(f)
  tokenizer = torch.load(f)
  bert = torch.load(f)


In [4]:
# define mean pooling function
def mean_pool(token_embeds, attention_mask):
    # reshape attention_mask to cover 768-dimension embeddings
    in_mask = attention_mask.unsqueeze(-1).expand(
        token_embeds.size()
    ).float()
    # perform mean-pooling but exclude padding tokens (specified by in_mask)
    pool = torch.sum(token_embeds * in_mask, 1) / torch.clamp(
        in_mask.sum(1), min=1e-9
    )
    return pool

In [5]:
from sklearn.metrics.pairwise import cosine_similarity

def predict_nli_class_with_similarity(model, classifier_head, premise, hypothesis, tokenizer, device):
    # Tokenize and convert to input IDs and attention masks
    inputs_a = tokenizer(premise, return_tensors='pt', truncation=True, padding=True).to(device)
    inputs_b = tokenizer(hypothesis, return_tensors='pt', truncation=True, padding=True).to(device)
    
    inputs_ids_a = inputs_a['input_ids']
    attention_a = inputs_a['attention_mask']
    inputs_ids_b = inputs_b['input_ids']
    attention_b = inputs_b['attention_mask']
    
    segment_ids_a = torch.zeros_like(inputs_ids_a).to(device)
    segment_ids_b = torch.zeros_like(inputs_ids_b).to(device)
    
    # Get BERT embeddings
    with torch.no_grad():
        u_last_hidden_state = model.get_last_hidden_state(inputs_ids_a, segment_ids_a).to(device)
        v_last_hidden_state = model.get_last_hidden_state(inputs_ids_b, segment_ids_b).to(device)
    
    # Mean-pooling
    u_mean_pool = mean_pool(u_last_hidden_state, attention_a).detach().cpu().numpy()
    v_mean_pool = mean_pool(v_last_hidden_state, attention_b).detach().cpu().numpy()
    
    # Create the feature vector for classification
    uv_abs = torch.abs(torch.sub(torch.tensor(u_mean_pool).to(device), torch.tensor(v_mean_pool).to(device)))
    x = torch.cat([torch.tensor(u_mean_pool).to(device), torch.tensor(v_mean_pool).to(device), uv_abs], dim=-1)
    
    # Get logits from the classifier head
    logits = classifier_head(x)
    
    # Compute class probabilities
    probs = F.softmax(logits, dim=-1)
    
    # Map probabilities to classes
    class_labels = ['contradiction', 'neutral', 'entailment']
    predicted_class = class_labels[torch.argmax(probs).item()]
    
    # Calculate cosine similarity
    cosine_sim = cosine_similarity(u_mean_pool.reshape(1, -1), v_mean_pool.reshape(1, -1))[0, 0]
    
    return {
        'predicted_class': predicted_class,
        'class_probabilities': probs.detach().cpu().numpy(),
        'cosine_similarity': cosine_sim
    }

# Example Usage
premise = "A man is playing a guitar on stage."
hypothesis = "The man is performing music."
result = predict_nli_class_with_similarity(bert, classifier_head, premise, hypothesis, tokenizer, device)
predicted_class = result['predicted_class']
probabilities = result['class_probabilities']
cosine_sim = result['cosine_similarity']

print (f"Predicted Class: {predicted_class}", f"Class Probabilities: {probabilities}", f"Cosine Similarity: {cosine_sim:.4f}", sep='\n')

Predicted Class: contradiction
Class Probabilities: [[9.9814522e-01 2.6594464e-06 1.8521816e-03]]
Cosine Similarity: 0.9887
