In [None]:
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModel
import torch
from sklearn.metrics.pairwise import cosine_similarity



df = pd.read_csv("train.csv")
df.head()

index = 1

In [None]:
prompt = df["prompt"][index]
response_a = df["response_a"][index]
response_b = df["response_b"][index]

print(prompt)
print(response_a)
print(response_b)


In [None]:
model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"  # Good model for semantic similarity
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


In [None]:
def get_embedding(text, tokenizer, model, device):
    # Tokenize input
    encoded_input = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors='pt')
    encoded_input = {k: v.to(device) for k, v in encoded_input.items()}
    
    # Get model output
    with torch.no_grad():
        model_output = model(**encoded_input)
    
    # Use the [CLS] token embedding as the sentence representation
    # or use mean pooling for a potentially better representation
    token_embeddings = model_output.last_hidden_state
    
    # Mean pooling - taking average of all token embeddings
    attention_mask = encoded_input['attention_mask']
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
    sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
    embedding = (sum_embeddings / sum_mask).cpu().numpy()
    
    return embedding[0]  # Return the first (and only) embedding

In [None]:
# Get embeddings for prompt and response
prompt_embedding = get_embedding(prompt, tokenizer, model, device)
responsea_embedding = get_embedding(response_a, tokenizer, model, device)
responseb_embedding = get_embedding(response_b, tokenizer, model, device)

In [None]:
similarity = cosine_similarity([prompt_embedding], [responsea_embedding])[0][0]

print(f"Similarity between prompt and response A: {similarity}")

similarity = cosine_similarity([prompt_embedding], [responseb_embedding])[0][0]

print(f"Similarity between prompt and response B: {similarity}")


In [None]:
'''
sentence-transformers/multi-qa-mpnet-base-dot-v1: Specifically optimized for question-answering relevance with strong performance on QA benchmarks.
intfloat/e5-large-v2: Designed with a "query-document" paradigm that works particularly well for determining if answers are semantically relevant to questions.
BAAI/bge-large-en-v1.5: Consistently top performer on retrieval tasks, including question-answering, with strong semantic alignment capabilities.
OpenAI text-embedding-3-small: While a general-purpose embedding, it demonstrates excellent performance on question relevance tasks with a good balance of efficiency and quality.
sentence-transformers/all-MiniLM-L12-v2: A good compromise between the minimal L6 version and the full MPNet model, with strong QA performance.
'''

In [63]:
def semantic_overlap(index, model_name):
    
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModel.from_pretrained(model_name)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    prompt = df["prompt"][index]
    response_a = df["response_a"][index]
    response_b = df["response_b"][index]
    winner_model = 'a' if df["winner_model_a"][index] == 1 else 'b'
    prompt_embedding = get_embedding(prompt, tokenizer, model, device)
    responsea_embedding = get_embedding(response_a, tokenizer, model, device)
    responseb_embedding = get_embedding(response_b, tokenizer, model, device)
    
    similarity_a = cosine_similarity([prompt_embedding], [responsea_embedding])[0][0]
    similarity_b = cosine_similarity([prompt_embedding], [responseb_embedding])[0][0]
    print("Model: ", model_name)
    print("Index 1: ")
    print("Prompt: ", prompt)
    print("Response A: ", response_a)
    print("Response B: ", response_b)
    print(f"Similarity between prompt and response A: {similarity_a}, response B: {similarity_b}")
    
    print("\n\n")
    
    


# semantic_overlap(0, "sentence-transformers/multi-qa-mpnet-base-dot-v1")
# semantic_overlap(0, "intfloat/e5-large-v2")
semantic_overlap(5, "BAAI/bge-large-en-v1.5")
# semantic_overlap(1, "OpenAI/text-embedding-3-small")
semantic_overlap(5, "sentence-transformers/all-MiniLM-L12-v2")
        

Model:  BAAI/bge-large-en-v1.5
Index 1: 
Prompt:  ["Construct a rap battle, in the style of Epic Rap Battles of History, with rhyme and meter and personal attacks relevant to each character, between Pikachu and Zeus. Each participant gets 2 verses. There is no announcer and Zeus goes first."]
Response A:  ["[Zeus]\nYo, it's the king of the gods on the mic\nI control the skies, the thunder, and the lightning strike\nYou're just a cute little rodent, Pikachu, with a spark\nBut don't get it twisted, I'll leave you in a blur\n\nYou've got electric powers, but I'm the one with the might\nI can summon lightning bolts and make them ignite\nYou can't even compare, you're just a mere mouse\nI'm the ruler of the gods, I'm the one who chooses\n\n[Pikachu]\nYou may be the king of the gods, but I'm the one with the flow\nI've got rhymes that'll make you bow down, Zeus, I'm the boss\nYou may have lightning bolts, but I've got electric charm\nI can shock you with my power, make you feel like you're n

In [None]:
semantic_overlap(index, model_name)