In [11]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np

In [2]:
github_url = "https://github.com/DataTalksClub/llm-zoomcamp/blob/main/04-monitoring/data/results-gpt4o-mini.csv"
url = f'{github_url}?raw=1'
df = pd.read_csv(url)

In [3]:
df.head()

Unnamed: 0,answer_llm,answer_orig,document,question,course
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp
3,The context does not provide any specific info...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Does this course have a GitHub repository for ...,machine-learning-zoomcamp
4,To structure your questions and answers for th...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,How can I structure my questions and answers f...,machine-learning-zoomcamp


In [5]:
df = df.iloc[:300] # use only 300 items

# Q1. Getting the embeddings model

In [6]:
model_name = "multi-qa-mpnet-base-dot-v1"
embedding_model = SentenceTransformer(model_name)

  from tqdm.autonotebook import tqdm, trange


In [7]:
answer_llm = df.iloc[0].answer_llm

In [9]:
emb = embedding_model.encode(answer_llm)
emb[0]

-0.42244655

What's the first value of the resulting vector?  
`-0.42244655`

# Q2. Computing the dot product

In [10]:
df.head(1)

Unnamed: 0,answer_llm,answer_orig,document,question,course
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp


In [12]:
def dot_prod(a, b):
    a = embedding_model.encode(a)
    b = embedding_model.encode(b)
    return np.dot(a, b)

In [13]:
dot_prod(df.iloc[0].answer_llm, df.iloc[0].answer_orig)

17.515987

In [14]:
df['evaluations'] = df.apply(lambda row: dot_prod(row['answer_llm'], row['answer_orig']), axis=1)
df.head()

Unnamed: 0,answer_llm,answer_orig,document,question,course,evaluations
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp,17.515987
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp,13.418402
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp,25.313255
3,The context does not provide any specific info...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Does this course have a GitHub repository for ...,machine-learning-zoomcamp,12.147415
4,To structure your questions and answers for th...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,How can I structure my questions and answers f...,machine-learning-zoomcamp,18.747736


In [15]:
df.describe()

Unnamed: 0,evaluations
count,300.0
mean,27.495996
std,6.384742
min,4.547923
25%,24.307844
50%,28.33687
75%,31.674309
max,39.476013


What's the 75% percentile of the score?  
`31.674309`

# Q3. Computing the cosine

In [16]:
def get_norm(v):
    norm = np.sqrt((v * v).sum())
    v_norm = v / norm
    return v_norm


def cosine(a, b):
    a, b = embedding_model.encode(a), embedding_model.encode(b)
    a, b = get_norm(a), get_norm(b)
    return np.dot(a, b)

In [18]:
df['cosine'] = df.apply(lambda row: cosine(row['answer_llm'], row['answer_orig']), axis=1)
df.describe()

Unnamed: 0,evaluations,cosine
count,300.0,300.0
mean,27.495996,0.728393
std,6.384742,0.157755
min,4.547923,0.125357
25%,24.307844,0.651273
50%,28.33687,0.763761
75%,31.674309,0.836235
max,39.476013,0.958796


What's the 75% cosine in the scores?  
`0.836235`

# Q4. Rouge

In [20]:
# !pip install rouge

In [24]:
from rouge import Rouge
rouge_scorer = Rouge()

df.iloc[10]

scores = rouge_scorer.get_scores(df.iloc[10]['answer_llm'], df.iloc[10]['answer_orig'])[0]
scores

{'rouge-1': {'r': 0.45454545454545453,
  'p': 0.45454545454545453,
  'f': 0.45454544954545456},
 'rouge-2': {'r': 0.21621621621621623,
  'p': 0.21621621621621623,
  'f': 0.21621621121621637},
 'rouge-l': {'r': 0.3939393939393939,
  'p': 0.3939393939393939,
  'f': 0.393939388939394}}

What's the F score for rouge-1?  
`0.45454544954545456`

# Q5. Average rouge score

In [25]:
def average_rouge_score(rouge_scores):
    f_scores = [scores['f'] for scores in rouge_scores.values()]
    return sum(f_scores) / len(f_scores)

In [26]:
average_score = average_rouge_score(scores)
average_score

0.35490034990035496

# Q6. Average rouge score for all the data points


In [27]:
def get_rouge_scores(a, b):
    scores = rouge_scorer.get_scores(a, b)[0]
    rouge_1 = scores['rouge-1']['f']
    rouge_2 = scores['rouge-2']['f']
    rouge_l = scores['rouge-l']['f']
    rouge_avg = (rouge_1 + rouge_2 + rouge_l) / 3
    return rouge_1, rouge_2, rouge_l, rouge_avg

In [28]:
df[['rouge_1', 'rouge_2', 'rouge_l', 'rouge_avg']] = df.apply(
    lambda row: pd.Series(get_rouge_scores(row['answer_llm'], row['answer_orig'])), 
    axis=1
)
df.describe()

Unnamed: 0,evaluations,cosine,rouge_1,rouge_2,rouge_l,rouge_avg
count,300.0,300.0,300.0,300.0,300.0,300.0
mean,27.495996,0.728393,0.378844,0.206965,0.353807,0.313205
std,6.384742,0.157755,0.165977,0.15355,0.162965,0.158133
min,4.547923,0.125357,0.0,0.0,0.0,0.0
25%,24.307844,0.651273,0.261625,0.097809,0.228032,0.197358
50%,28.33687,0.763761,0.378762,0.178671,0.337792,0.29864
75%,31.674309,0.836235,0.479281,0.286181,0.451613,0.404169
max,39.476013,0.958796,0.85,0.73913,0.85,0.813043


What's the average rouge_2 across all the records?  
`0.206965`