[homework question link](https://github.com/DataTalksClub/llm-zoomcamp/blob/main/cohorts/2024/04-monitoring/homework.md)

In [1]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer

  from tqdm.autonotebook import tqdm, trange


In [2]:
github_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/04-monitoring/data/results-gpt4o-mini.csv'
url = f'{github_url}?raw=1'
df = pd.read_csv(url)

In [3]:
df = df.iloc[:300]

# Q1. Getting the embeddings model

In [4]:
model_name = 'multi-qa-mpnet-base-dot-v1'
embedding_model = SentenceTransformer(model_name)

In [28]:
answer_llm = df.iloc[0].answer_llm
embedding_model.encode(answer_llm)[0]

np.float32(-0.42244655)

# Q2. Computing the dot product

In [29]:
def evaluate_score(document):
    original = embedding_model.encode(document['answer_orig'])
    llm = embedding_model.encode(document['answer_llm'])
    return original.dot(llm)

In [30]:
evaluate_score(df.iloc[0].to_dict())

np.float32(17.515987)

In [36]:
df['score'] = df.apply(lambda x: evaluate_score(x), axis=1)

In [38]:
df['score'].describe()

count    300.000000
mean      27.495996
std        6.384742
min        4.547923
25%       24.307844
50%       28.336870
75%       31.674309
max       39.476013
Name: score, dtype: float64

# Q3. Computing the cosine

In [43]:
def evaluate_norm_score(document):
    original = embedding_model.encode(document['answer_orig'])
    llm = embedding_model.encode(document['answer_llm'])

    norm_original = np.sqrt((original * original).sum())
    original = original / norm_original

    norm_llm = np.sqrt((llm * llm).sum())
    llm = llm / norm_llm

    return original.dot(llm)

In [44]:
df['norm_score'] = df.apply(lambda x: evaluate_norm_score(x), axis=1)

In [45]:
df['norm_score'].describe()

count    300.000000
mean       0.728393
std        0.157755
min        0.125357
25%        0.651273
50%        0.763761
75%        0.836235
max        0.958796
Name: norm_score, dtype: float64

# Q4. Rouge

In [4]:
df

Unnamed: 0,answer_llm,answer_orig,document,question,course
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp
3,The context does not provide any specific info...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Does this course have a GitHub repository for ...,machine-learning-zoomcamp
4,To structure your questions and answers for th...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,How can I structure my questions and answers f...,machine-learning-zoomcamp
...,...,...,...,...,...
295,An alternative way to load the data using the ...,Above users showed how to load the dataset dir...,8d209d6d,What is an alternative way to load the data us...,machine-learning-zoomcamp
296,You can directly download the dataset from Git...,Above users showed how to load the dataset dir...,8d209d6d,How can I directly download the dataset from G...,machine-learning-zoomcamp
297,You can fetch data for homework using the `req...,Above users showed how to load the dataset dir...,8d209d6d,Could you share a method to fetch data for hom...,machine-learning-zoomcamp
298,If the status code is 200 when downloading dat...,Above users showed how to load the dataset dir...,8d209d6d,What should I do if the status code is 200 whe...,machine-learning-zoomcamp


In [7]:
from rouge import Rouge
rouge_scorer = Rouge()
r = df.iloc[10]

scores = rouge_scorer.get_scores(r['answer_llm'], r['answer_orig'])[0]
scores

{'rouge-1': {'r': 0.45454545454545453,
  'p': 0.45454545454545453,
  'f': 0.45454544954545456},
 'rouge-2': {'r': 0.21621621621621623,
  'p': 0.21621621621621623,
  'f': 0.21621621121621637},
 'rouge-l': {'r': 0.3939393939393939,
  'p': 0.3939393939393939,
  'f': 0.393939388939394}}

# Q5. Average rouge score

In [11]:
np.mean([x['f'] for x in scores.values()])

np.float64(0.35490034990035496)

# Q6. Average rouge score for all the data points

In [19]:
def calc_rouge_2(document):
    answer_llm = document['answer_llm']
    answer_orig = document['answer_orig']
    return rouge_scorer.get_scores(answer_llm, answer_orig)[0]['rouge-2']['f']

df['rouge-2'] = df.apply(lambda x: calc_rouge_2(x), axis =1)

In [20]:
df['rouge-2'].describe()

count    300.000000
mean       0.206965
std        0.153550
min        0.000000
25%        0.097809
50%        0.178671
75%        0.286181
max        0.739130
Name: rouge-2, dtype: float64