In [2]:
import pandas as pd

In [3]:
github_url = "https://github.com/DataTalksClub/llm-zoomcamp/blob/main/04-monitoring/data/results-gpt4o-mini.csv"
url = f'{github_url}?raw=1'
df = pd.read_csv(url)

In [4]:
df = df.iloc[:300]
df.head()

Unnamed: 0,answer_llm,answer_orig,document,question,course
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp
3,The context does not provide any specific info...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Does this course have a GitHub repository for ...,machine-learning-zoomcamp
4,To structure your questions and answers for th...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,How can I structure my questions and answers f...,machine-learning-zoomcamp


## Q1

In [5]:
from sentence_transformers import SentenceTransformer

model_name = "multi-qa-mpnet-base-dot-v1"
embedding_model = SentenceTransformer(model_name)

  from tqdm.autonotebook import tqdm, trange


In [6]:
answer_llm = df.iloc[0].answer_llm
answer_llm

'You can sign up for the course by visiting the course page at [http://mlzoomcamp.com/](http://mlzoomcamp.com/).'

In [7]:
embedding_model.encode(answer_llm)[0]

np.float32(-0.42244655)

## Q2

In [8]:
from tqdm import tqdm
tqdm.pandas()

def get_dot_prod(row):
    v_llm = embedding_model.encode(row['answer_llm'])
    v_orig = embedding_model.encode(row['answer_orig'])
    return v_llm.dot(v_orig)

df['dot'] = df.progress_apply(lambda x: get_dot_prod(x), axis=1)

  0%|          | 0/300 [00:00<?, ?it/s]

100%|██████████| 300/300 [02:26<00:00,  2.04it/s]


In [9]:
df['dot'].describe()

count    300.000000
mean      27.495996
std        6.384742
min        4.547924
25%       24.307844
50%       28.336870
75%       31.674309
max       39.476013
Name: dot, dtype: float64

## Q3

In [15]:
import numpy as np
norm = np.linalg.norm(df['dot'], axis=0)

In [17]:
df['dot'].apply(lambda x: x/norm).describe()

count    300.000000
mean       0.056244
std        0.013060
min        0.009303
25%        0.049722
50%        0.057964
75%        0.064790
max        0.080749
Name: dot, dtype: float64

## Q4

In [21]:
%pip install --upgrade -q pip
%pip install -q rouge

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


In [22]:
from rouge import Rouge
rouge_scorer = Rouge()

In [27]:
target = df.loc[10, :]
target

answer_llm     Yes, all sessions are recorded, so if you miss...
answer_orig    Everything is recorded, so you won’t miss anyt...
document                                                5170565b
question                    Are sessions recorded if I miss one?
course                                 machine-learning-zoomcamp
dot                                                    32.344711
Name: 10, dtype: object

In [28]:
scores = rouge_scorer.get_scores(target['answer_llm'], target['answer_orig'])[0]
scores

{'rouge-1': {'r': 0.45454545454545453,
  'p': 0.45454545454545453,
  'f': 0.45454544954545456},
 'rouge-2': {'r': 0.21621621621621623,
  'p': 0.21621621621621623,
  'f': 0.21621621121621637},
 'rouge-l': {'r': 0.3939393939393939,
  'p': 0.3939393939393939,
  'f': 0.393939388939394}}

In [30]:
score = 0
for key in scores.keys():
    print(scores[key]['f'])
    score += scores[key]['f']

score/len(list(scores.keys()))

0.45454544954545456
0.21621621121621637
0.393939388939394


0.35490034990035496

## Q6

In [40]:
def find_rouge_2_f_score(row):
    scores = rouge_scorer.get_scores(row['answer_llm'], row['answer_orig'])[0]
    return scores['rouge-2']['f']

df['rogue_2_f'] = df.progress_apply(lambda x: find_rouge_2_f_score(x), axis=1)

100%|██████████| 300/300 [00:00<00:00, 306.06it/s]


In [41]:
df['rogue_2_f'].describe()

count    300.000000
mean       0.206965
std        0.153550
min        0.000000
25%        0.097809
50%        0.178671
75%        0.286181
max        0.739130
Name: rogue_2_f, dtype: float64