In [None]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np
from rouge import Rouge

# Load the dataset

In [None]:
github_url = "https://github.com/DataTalksClub/llm-zoomcamp/blob/main/04-monitoring/data/results-gpt4o-mini.csv"
url = f"{github_url}?raw=1"
df = pd.read_csv(url)
df = df.iloc[:300]

# Question 1

In [None]:
model_name = "multi-qa-mpnet-base-dot-v1"

embedding_model = SentenceTransformer(model_name)

In [None]:
embedding_model.encode(df.iloc[0].answer_llm)[0]

# Question 2

In [None]:
df["answer_llm_embedding"] = df["answer_llm"].apply(embedding_model.encode)
df["answer_orig_embedding"] = df["answer_orig"].apply(embedding_model.encode)

In [None]:
df["dot_similarity"] = df.apply(
    lambda x: x["answer_llm_embedding"] @ x["answer_orig_embedding"], axis=1
)

df["dot_similarity"].quantile(0.75)

# Question 3

In [None]:
def calculate_cosine_similarity(vector_1, vector_2):
    vector_1_norm = vector_1 / np.linalg.norm(vector_1)
    vector_2_norm = vector_2 / np.linalg.norm(vector_2)

    return vector_1_norm @ vector_2_norm

In [None]:
df["cosine_similarity"] = df.apply(
    lambda x: calculate_cosine_similarity(
        x["answer_llm_embedding"], x["answer_orig_embedding"]
    ),
    axis=1,
)

df["cosine_similarity"].quantile(0.75)

# Question 4

In [None]:
rouge_score = Rouge()

In [None]:
row = df.iloc[10]
scores = rouge_score.get_scores(row["answer_llm"], row["answer_orig"])[0]

scores["rouge-1"]["f"]

# Question 5

In [None]:
(scores["rouge-1"]["f"]+ scores["rouge-2"]["f"] + scores["rouge-l"]["f"]) / 3

# Question 6

In [None]:
def calculate_rouge2(row):
    scores = rouge_score.get_scores(row["answer_llm"], row["answer_orig"])[0]
    return scores["rouge-2"]["f"]

In [None]:
df["rouge2"] = df.apply(calculate_rouge2, axis=1)
df["rouge2"].mean()