In [21]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np
from rouge import Rouge

In [3]:
github_url = "https://github.com/DataTalksClub/llm-zoomcamp/blob/main/04-monitoring/data/results-gpt4o-mini.csv"
url = f"{github_url}?raw=1"
df = pd.read_csv(url)
df = df.iloc[:300]

# Question 1

In [4]:
model_name = "multi-qa-mpnet-base-dot-v1"

embedding_model = SentenceTransformer(model_name)

You try to use a model that was created with version 3.0.0.dev0, however, your version is 2.7.0. This might cause unexpected behavior or errors. In that case, try to update to the latest version.





In [14]:
embedding_model.encode(df.iloc[0].answer_llm)[0]

np.float32(-0.42244658)

# Question 2

In [9]:
df["answer_llm_embedding"] = df["answer_llm"].apply(embedding_model.encode)
df["answer_orig_embedding"] = df["answer_orig"].apply(embedding_model.encode)

In [10]:
df["answer_llm_embedding"]

0      [-0.42244658, -0.22485569, -0.32405874, -0.284...
1      [-0.38068154, 0.047848597, -0.31510973, -0.210...
2      [-0.058813937, -0.33736968, -0.36157575, 0.021...
3      [-0.2275367, -0.00813403, -0.21719897, -0.1104...
4      [-0.069693744, -0.500509, -0.1659841, 0.306661...
                             ...                        
295    [-0.2193304, 0.036429286, -0.22642444, 0.19385...
296    [-0.4683964, 0.10411292, -0.19394597, 0.040338...
297    [-0.35566193, 0.112268955, -0.28439155, 0.0621...
298    [-0.23460822, -0.11785728, -0.20596509, 0.0863...
299    [-0.12844159, -0.093136236, -0.20886736, 0.120...
Name: answer_llm_embedding, Length: 300, dtype: object

In [12]:
df["dot_similarity"] = df.apply(
    lambda x: x["answer_llm_embedding"] @ x["answer_orig_embedding"], axis=1
)

In [8]:
df["dot_similarity"].quantile(0.75)

np.float64(31.674306392669678)

# Question 3

In [13]:
def calculate_cosine_similarity(vector_1, vector_2):
    vector_1_norm = vector_1 / np.linalg.norm(vector_1)
    vector_2_norm = vector_2 / np.linalg.norm(vector_2)

    return vector_1_norm @ vector_2_norm

In [17]:
df["cosine_similarity"] = df.apply(
    lambda x: calculate_cosine_similarity(
        x["answer_llm_embedding"], x["answer_orig_embedding"]
    ),
    axis=1,
)

In [18]:
df["cosine_similarity"].quantile(0.75)

np.float64(0.8362347781658173)

# Question 4

In [22]:
rouge_score = Rouge()

In [30]:
row = df.iloc[10]
scores = rouge_score.get_scores(row["answer_llm"], row["answer_orig"])[0]

scores["rouge-1"]["f"]

0.45454544954545456

# Question 5

In [36]:
(scores["rouge-1"]["f"]+ scores["rouge-2"]["f"] + scores["rouge-l"]["f"]) / 3

0.35490034990035496

# Question 6

In [39]:
def calculate_rouge2(row):
    scores = rouge_score.get_scores(row["answer_llm"], row["answer_orig"])[0]
    return scores["rouge-2"]["f"]

In [42]:
df["rouge2"] = df.apply(calculate_rouge2, axis=1)
df["rouge2"].mean()

np.float64(0.20696501983423318)