In [30]:
import pandas as pd
import requests
from io import StringIO


In [31]:
url = 'https://raw.githubusercontent.com/DataTalksClub/llm-zoomcamp/main/04-monitoring/data/results-gpt4o-mini.csv'
response = requests.get(url, verify=False)
df = pd.read_csv(StringIO(response.text))



In [32]:
df = df.iloc[:300]

In [33]:
from sentence_transformers import SentenceTransformer
embedding_model = SentenceTransformer("multi-qa-mpnet-base-dot-v1")

You try to use a model that was created with version 3.0.0.dev0, however, your version is 2.7.0. This might cause unexpected behavior or errors. In that case, try to update to the latest version.





# Q1. Getting the embeddings model

In [34]:
answer_llm = df.iloc[0].answer_llm

In [35]:
answer_embedding = embedding_model.encode(answer_llm)

In [36]:
answer_embedding[0]

-0.4224467

# Q2. Computing the dot product

In [37]:
import numpy as np

In [38]:
df

Unnamed: 0,answer_llm,answer_orig,document,question,course
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp
3,The context does not provide any specific info...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Does this course have a GitHub repository for ...,machine-learning-zoomcamp
4,To structure your questions and answers for th...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,How can I structure my questions and answers f...,machine-learning-zoomcamp
...,...,...,...,...,...
295,An alternative way to load the data using the ...,Above users showed how to load the dataset dir...,8d209d6d,What is an alternative way to load the data us...,machine-learning-zoomcamp
296,You can directly download the dataset from Git...,Above users showed how to load the dataset dir...,8d209d6d,How can I directly download the dataset from G...,machine-learning-zoomcamp
297,You can fetch data for homework using the `req...,Above users showed how to load the dataset dir...,8d209d6d,Could you share a method to fetch data for hom...,machine-learning-zoomcamp
298,If the status code is 200 when downloading dat...,Above users showed how to load the dataset dir...,8d209d6d,What should I do if the status code is 200 whe...,machine-learning-zoomcamp


In [39]:
scores = []

# Iterate over the dataframe and compute embeddings and dot products
for index, row in df.iterrows():
    answer_llm_emb = embedding_model.encode(row['answer_llm'])
    answer_orig_emb = embedding_model.encode(row['answer_orig'])
    
    # Compute the dot product
    dot_product_score = np.dot(answer_llm_emb, answer_orig_emb)
    scores.append(dot_product_score)

In [40]:
# Calculate the 75th percentile
percentile_75 = np.percentile(scores, 75)

print(f"The 75th percentile of the scores is: {percentile_75}")

The 75th percentile of the scores is: 31.674306869506836


# Q3. Computing the cosine

In [41]:
# Function to normalize a vector
def normalize_vector(v):
    norm = np.sqrt((v * v).sum())
    return v / norm if norm != 0 else v

# Function to compute cosine similarity between two vectors
def cosine_similarity(v1, v2):
    v1_norm = normalize_vector(v1)
    v2_norm = normalize_vector(v2)
    return np.dot(v1_norm, v2_norm)

# Create a list to store scores
scores = []

# Iterate over the dataframe and compute embeddings and cosine similarities
for index, row in df.iterrows():
    answer_llm_emb = embedding_model.encode(row['answer_llm'])
    answer_orig_emb = embedding_model.encode(row['answer_orig'])
    
    # Compute the cosine similarity
    similarity_score = cosine_similarity(answer_llm_emb, answer_orig_emb)
    scores.append(similarity_score)

# Calculate the 75th percentile
percentile_75 = np.percentile(scores, 75)

print(f"The 75th percentile of the cosine similarity scores is: {percentile_75}")

The 75th percentile of the cosine similarity scores is: 0.8362347185611725


# Q4. Rouge

In [42]:
pip install rouge

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


In [45]:
from rouge import Rouge
rouge_scorer = Rouge()

r = {
    'answer_llm': "This is the answer generated by the language model.",
    'answer_orig': "This is the original answer."
}

scores = rouge_scorer.get_scores(r['answer_llm'], r['answer_orig'])[0]

In [46]:
scores

{'rouge-1': {'r': 0.8, 'p': 0.5, 'f': 0.6153846106508877},
 'rouge-2': {'r': 0.5, 'p': 0.25, 'f': 0.33333332888888895},
 'rouge-l': {'r': 0.8, 'p': 0.5, 'f': 0.6153846106508877}}

# Q5. Average rouge score

In [48]:
from statistics import mean

In [49]:
# Extract the F1-scores for rouge-1, rouge-2, and rouge-l
rouge1_f1 = scores['rouge-1']['f']
rouge2_f1 = scores['rouge-2']['f']
rougeL_f1 = scores['rouge-l']['f']

# Calculate the average F1-score using statistics.mean
average_f1 = mean([rouge1_f1, rouge2_f1, rougeL_f1])
average_f1

0.5213675167302214

# Q6. Average rouge score for all the data points

In [None]:
rouge_1_scores = []
rouge_2_scores = []
rouge_l_scores = []
rouge_avg_scores = []

In [None]:
# Compute ROUGE scores for each record
for r in data:
    scores = rouge_scorer.get_scores(r['answer_llm'], r['answer_orig'])
    rouge_1 = scores['rouge-1']['f']
    rouge_2 = scores['rouge-2']['f']
    rouge_l = scores['rouge-l']['f']
    rouge_avg = (rouge_1 + rouge_2 + rouge_l) / 3

    rouge_1_scores.append(rouge_1)
    rouge_2_scores.append(rouge_2)
    rouge_l_scores.append(rouge_l)
    rouge_avg_scores.append(rouge_avg)

# Create a dataframe from the scores
df_scores = pd.DataFrame({
    'rouge-1': rouge_1_scores,
    'rouge-2': rouge_2_scores,
    'rouge-l': rouge_l_scores,
    'rouge-avg': rouge_avg_scores
})

# Calculate the average ROUGE-2 score across all records
average_rouge_2 = df_scores['rouge-2'].mean()

print(df_scores)
print(f"Average ROUGE-2 score: {average_rouge_2}")

In [51]:
rouge_1_scores = []
rouge_2_scores = []
rouge_l_scores = []
rouge_avg_scores = []

# Iterate over each row in the DataFrame
for index, row in df.iterrows():
    answer_llm = row['answer_llm']
    answer_orig = row['answer_orig']
    
    scores = rouge_scorer.get_scores(answer_llm, answer_orig)[0]
    rouge_1 = scores['rouge-1']['f']
    rouge_2 = scores['rouge-2']['f']
    rouge_l = scores['rouge-l']['f']
    rouge_avg = (rouge_1 + rouge_2 + rouge_l) / 3

    rouge_1_scores.append(rouge_1)
    rouge_2_scores.append(rouge_2)
    rouge_l_scores.append(rouge_l)
    rouge_avg_scores.append(rouge_avg)

# Create a dataframe from the scores
df_scores = pd.DataFrame({
    'rouge-1': rouge_1_scores,
    'rouge-2': rouge_2_scores,
    'rouge-l': rouge_l_scores,
    'rouge-avg': rouge_avg_scores
})

# Calculate the average ROUGE-2 score across all records
average_rouge_2 = df_scores['rouge-2'].mean()

In [52]:
average_rouge_2

0.20696501983423318

Version 2

In [55]:
from rouge import Rouge
import pandas as pd

# Assuming df is your DataFrame
df = pd.DataFrame({
    'answer_llm': [
        "This is the first answer generated by the model.",
        "Another model answer.",
        "Yet another generated answer."
    ],
    'answer_orig': [
        "This is the first original answer.",
        "Another original answer.",
        "Yet another original answer."
    ]
})


rouge_scorer = Rouge()
rouge_1_scores = []
rouge_2_scores = []
rouge_l_scores = []
rouge_avg_scores = []

# Iterate over each row in the DataFrame
for index, row in df.iterrows():
    answer_llm = row['answer_llm']
    answer_orig = row['answer_orig']
    
    scores = rouge_scorer.get_scores(answer_llm, answer_orig)[0]
    rouge_1 = scores['rouge-1']['f']
    rouge_2 = scores['rouge-2']['f']
    rouge_l = scores['rouge-l']['f']
    rouge_avg = (rouge_1 + rouge_2 + rouge_l) / 3

    rouge_1_scores.append(rouge_1)
    rouge_2_scores.append(rouge_2)
    rouge_l_scores.append(rouge_l)
    rouge_avg_scores.append(rouge_avg)

# Create a dataframe from the scores
df_scores = pd.DataFrame({
    'rouge-1': rouge_1_scores,
    'rouge-2': rouge_2_scores,
    'rouge-l': rouge_l_scores,
    'rouge-avg': rouge_avg_scores
})

# Calculate the average ROUGE-2 score across all records
average_rouge_2 = df_scores['rouge-2'].mean()

print(df_scores)
print(f"Average ROUGE-2 score: {average_rouge_2}")


    rouge-1   rouge-2   rouge-l  rouge-avg
0  0.714286  0.461538  0.714286   0.630037
1  0.666667  0.000000  0.666667   0.444444
2  0.750000  0.333333  0.750000   0.611111
Average ROUGE-2 score: 0.264957261712689
