In [2]:
import pandas as pd

df = pd.read_csv("train.csv")

df.head()

Unnamed: 0,id,model_a,model_b,prompt,response_a,response_b,winner_model_a,winner_model_b,winner_tie
0,30192,gpt-4-1106-preview,gpt-4-0613,"[""Is it morally right to try to have a certain...","[""The question of whether it is morally right ...","[""As an AI, I don't have personal beliefs or o...",1,0,0
1,53567,koala-13b,gpt-4-0613,"[""What is the difference between marriage lice...","[""A marriage license is a legal document that ...","[""A marriage license and a marriage certificat...",0,1,0
2,65089,gpt-3.5-turbo-0613,mistral-medium,"[""explain function calling. how would you call...","[""Function calling is the process of invoking ...","[""Function calling is the process of invoking ...",0,0,1
3,96401,llama-2-13b-chat,mistral-7b-instruct,"[""How can I create a test set for a very rare ...","[""Creating a test set for a very rare category...","[""When building a classifier for a very rare c...",1,0,0
4,198779,koala-13b,gpt-3.5-turbo-0314,"[""What is the best way to travel from Tel-Aviv...","[""The best way to travel from Tel Aviv to Jeru...","[""The best way to travel from Tel-Aviv to Jeru...",0,1,0


In [4]:
import numpy as np
from transformers import AutoTokenizer, AutoModel
import torch
from sklearn.metrics.pairwise import cosine_similarity

# Load model and tokenizer once
model_name = "sentence-transformers/all-mpnet-base-v2"  # Good model for semantic similarity
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def get_batch_embeddings(texts, tokenizer, model, device, batch_size=32):
    embeddings = []
    
    for i in range(0, len(texts), batch_size):
        batch_texts = texts[i:i+batch_size]
        encoded_input = tokenizer(batch_texts, padding=True, truncation=True, max_length=512, return_tensors='pt')
        encoded_input = {k: v.to(device) for k, v in encoded_input.items()}
        
        with torch.no_grad():
            model_output = model(**encoded_input)
        
        # Mean pooling
        token_embeddings = model_output.last_hidden_state
        attention_mask = encoded_input['attention_mask']
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
        sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
        batch_embeddings = (sum_embeddings / sum_mask).cpu().numpy()
        
        embeddings.append(batch_embeddings)
    
    return np.vstack(embeddings)

  from .autonotebook import tqdm as notebook_tqdm


In [None]:

# Process all texts in batches
prompts = df['prompt'].tolist()
responses_a = df['response_a'].tolist()
responses_b = df['response_b'].tolist()

prompt_embeddings = get_batch_embeddings(prompts, tokenizer, model, device)
response_a_embeddings = get_batch_embeddings(responses_a, tokenizer, model, device)
response_b_embeddings = get_batch_embeddings(responses_b, tokenizer, model, device)

# Calculate similarities
similarities_a = [cosine_similarity([p], [r])[0][0] for p, r in zip(prompt_embeddings, response_a_embeddings)]
similarities_b = [cosine_similarity([p], [r])[0][0] for p, r in zip(prompt_embeddings, response_b_embeddings)]

df['a_semantic_overlap'] = similarities_a
df['b_semantic_overlap'] = similarities_b

In [None]:

# 1. Create a sentiment analysis pipeline
sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

# 2. Function to get sentiment scores
def get_sentiment_score(text):
    # Some sentiment analyzers return multiple results for longer texts
    # Let's limit the text length to avoid this issue
    text = text[:512]  # Most transformer models have a token limit
    
    result = sentiment_analyzer(text)
    
    # If only one result, extract the score
    if isinstance(result, dict):
        # Convert to positive sentiment score (0 to 1)
        return result["score"] if result["label"] == "POSITIVE" else 1 - result["score"]
    
    # If multiple results, average the sentiment scores
    positive_score = sum(r["score"] for r in result if r["label"] == "POSITIVE") / len(result)
    negative_score = sum(r["score"] for r in result if r["label"] == "NEGATIVE") / len(result)
    
    # Return positive sentiment proportion (0 to 1)
    if positive_score + negative_score > 0:
        return positive_score / (positive_score + negative_score)
    else:
        return 0.5  # Neutral

# 3. Function to calculate sentiment similarity
def sentiment_similarity(sentiment1, sentiment2):
    # Simple absolute difference, inverted so higher means more similar
    return 1.0 - abs(sentiment1 - sentiment2)

# 4. Apply functions to dataframe
# Extract sentiment scores
df['prompt_sentiment'] = df['prompt'].apply(get_sentiment_score)
df['response_a_sentiment'] = df['response_a'].apply(get_sentiment_score)
df['response_b_sentiment'] = df['response_b'].apply(get_sentiment_score)

# Calculate sentiment similarity
df['a_sentiment_match'] = df.apply(lambda row: sentiment_similarity(
    row['prompt_sentiment'], row['response_a_sentiment']), axis=1)
df['b_sentiment_match'] = df.apply(lambda row: sentiment_similarity(
    row['prompt_sentiment'], row['response_b_sentiment']), axis=1)

# 5. Get sentiment difference between models (which model has more similar sentiment)
df['sentiment_match_advantage_a'] = df['a_sentiment_match'] - df['b_sentiment_match']