In [23]:
import pandas as pd
import numpy as np

from tqdm.auto import tqdm

In [1]:
github_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/04-monitoring/data/results-gpt4o-mini.csv'

In [4]:
url = f'{github_url}?raw=1'
df = pd.read_csv(url)

In [5]:
df.head()

Unnamed: 0,answer_llm,answer_orig,document,question,course
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp
3,The context does not provide any specific info...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Does this course have a GitHub repository for ...,machine-learning-zoomcamp
4,To structure your questions and answers for th...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,How can I structure my questions and answers f...,machine-learning-zoomcamp


In [7]:
df.shape

(1830, 5)

In [8]:
df = df.iloc[:300]

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   answer_llm   300 non-null    object
 1   answer_orig  300 non-null    object
 2   document     300 non-null    object
 3   question     300 non-null    object
 4   course       300 non-null    object
dtypes: object(5)
memory usage: 11.8+ KB


In [10]:
model_name = 'multi-qa-mpnet-base-dot-v1'

In [11]:
from sentence_transformers import SentenceTransformer
embedding_model = SentenceTransformer(model_name)

  from .autonotebook import tqdm as notebook_tqdm
You try to use a model that was created with version 3.0.0.dev0, however, your version is 2.7.0. This might cause unexpected behavior or errors. In that case, try to update to the latest version.





In [12]:
answer_llm = df.iloc[0].answer_llm
v_answer_llm = embedding_model.encode(answer_llm)

v_answer_llm[:5]

array([-0.42244673, -0.22485583, -0.32405847, -0.28475878,  0.00725701],
      dtype=float32)

### ans.1 -0.42

In [14]:
def compute_similarity(record):
    answer_orig = record['answer_orig']
    answer_llm = record['answer_llm']
    
    v_llm = embedding_model.encode(answer_llm)
    v_orig = embedding_model.encode(answer_orig)
    
    return v_llm.dot(v_orig)

In [15]:
results_gpt4o_mini = df.to_dict(orient='records')

In [18]:
evaluations = []

for record in tqdm(results_gpt4o_mini):
    score = compute_similarity(record)
    evaluations.append(score)

100%|████████████████████████████████████████████████████████| 300/300 [00:10<00:00, 28.95it/s]


In [19]:
evaluations[0]

17.515999

In [20]:
max(evaluations)

39.476017

In [None]:
df['dot_prod'] = evaluations

In [22]:
df.dot_prod.describe()

count    300.000000
mean      27.495996
std        6.384743
min        4.547922
25%       24.307846
50%       28.336861
75%       31.674309
max       39.476017
Name: dot_prod, dtype: float64

### ans.2 31.67

In [24]:
def compute_cosine(record):
    answer_orig = record['answer_orig']
    answer_llm = record['answer_llm']
    
    v_llm = embedding_model.encode(answer_llm)
    v_orig = embedding_model.encode(answer_orig)

    v_norm_llm = v_llm/np.sqrt((v_llm * v_llm).sum())
    v_norm_orig = v_orig/np.sqrt((v_orig * v_orig).sum())
    
    return v_norm_llm.dot(v_norm_orig)

In [25]:
evaluations2 = []

for record in tqdm(results_gpt4o_mini):
    score = compute_cosine(record)
    evaluations2.append(score)

100%|████████████████████████████████████████████████████████| 300/300 [00:10<00:00, 29.90it/s]


In [26]:
max(evaluations2)

0.9587959

In [27]:
df['cosine'] = evaluations2

In [28]:
df.cosine.describe()

count    300.000000
mean       0.728392
std        0.157755
min        0.125357
25%        0.651273
50%        0.763761
75%        0.836235
max        0.958796
Name: cosine, dtype: float64

### ans.3 0.83