In [1]:
import pandas as pd

github_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/04-monitoring/data/results-gpt4o-mini.csv'

df = pd.read_csv(f'{github_url}?raw=1')

df_300 = df.iloc[:300]

df.head(5)

Unnamed: 0,answer_llm,answer_orig,document,question,course
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp
3,The context does not provide any specific info...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Does this course have a GitHub repository for ...,machine-learning-zoomcamp
4,To structure your questions and answers for th...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,How can I structure my questions and answers f...,machine-learning-zoomcamp


In [2]:
from sentence_transformers import SentenceTransformer

model_name = 'multi-qa-mpnet-base-dot-v1'
embedding_model = SentenceTransformer(model_name)

answer_llm = df_300.iloc[0].answer_llm

embedding_vector = embedding_model.encode(answer_llm)

first_value = embedding_vector[0]

print("The first value of the resulting vector is:", first_value)

The first value of the resulting vector is: -0.42244658


In [4]:

import numpy as np

evaluations = []

for _, row in df_300.iterrows():
    answer_llm = row['answer_llm']
    answer_orig = row['answer_orig']
    
    embedding_llm = embedding_model.encode(answer_llm)
    embedding_orig = embedding_model.encode(answer_orig)
    
    score = np.dot(embedding_llm, embedding_orig)
    
    evaluations.append(score)

percentile_75 = np.percentile(evaluations, 75)

print("The 75th percentile of the scores is:", percentile_75)

The 75th percentile of the scores is: 31.674312114715576


In [5]:
def normalize_vector(v):
    norm = np.sqrt((v * v).sum())
    return 0 if norm==0 else (v / norm)

evaluations = []

for _, row in df_300.iterrows():
    answer_llm = row['answer_llm']
    answer_orig = row['answer_orig']
    
    embedding_llm = normalize_vector(embedding_model.encode(answer_llm))
    embedding_orig= normalize_vector(embedding_model.encode(answer_orig))
    
    score = np.dot(embedding_llm, embedding_orig)
    
    evaluations.append(score)

percentile_75 = np.percentile(evaluations, 75)

print("The 75th percentile of the scores is:", percentile_75)

The 75th percentile of the scores is: 0.8362347930669785


In [8]:
from rouge import Rouge

rouge_scorer = Rouge()

r = df_300[df_300['document'] == '5170565b'].iloc[0]

scores = rouge_scorer.get_scores(r['answer_llm'], r['answer_orig'])[0]

rouge_1_f1_score = scores['rouge-1']['f']

print("The F1 score for ROUGE-1 is:", rouge_1_f1_score)

The F1 score for ROUGE-1 is: 0.45454544954545456


In [9]:
from rouge import Rouge

rouge_scorer = Rouge()

r = df_300[df_300['document'] == '5170565b'].iloc[0]

scores = rouge_scorer.get_scores(r['answer_llm'], r['answer_orig'])[0]

rouge_1_f1 = scores['rouge-1']['f']
rouge_2_f1 = scores['rouge-2']['f']
rouge_l_f1 = scores['rouge-l']['f']

average_f1_score = (rouge_1_f1 + rouge_2_f1 + rouge_l_f1) / 3

print("The average F1 score between ROUGE-1, ROUGE-2, and ROUGE-L is:", average_f1_score)


The average F1 score between ROUGE-1, ROUGE-2, and ROUGE-L is: 0.35490034990035496


In [11]:
import pandas as pd
from rouge import Rouge

rouge_scorer = Rouge()

scores_list = []

for _, row in df_300.iterrows():
    answer_llm = row['answer_llm']
    answer_orig = row['answer_orig']
    
    scores = rouge_scorer.get_scores(answer_llm, answer_orig)[0]

    rouge_1_f1 = scores['rouge-1']['f']
    rouge_2_f1 = scores['rouge-2']['f']
    rouge_l_f1 = scores['rouge-l']['f']
    
    scores_list.append({
        'document': row['document'],
        'rouge_1_f1': rouge_1_f1,
        'rouge_2_f1': rouge_2_f1,
        'rouge_l_f1': rouge_l_f1
    })

scores_df = pd.DataFrame(scores_list)

average_rouge_2_f1 = scores_df['rouge_2_f1'].mean()

print("The average ROUGE-2 F1 score across all records is:", average_rouge_2_f1)

The average ROUGE-2 F1 score across all records is: 0.20696501983423318
