## Using Minisearch

In [1]:
!wget https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py

--2024-09-25 05:29:14--  https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3832 (3,7K) [text/plain]
Saving to: ‘minsearch.py’


2024-09-25 05:29:15 (40,7 MB/s) - ‘minsearch.py’ saved [3832/3832]



In [1]:
import pandas as pd

In [7]:
df = pd.read_csv('../data/data.csv')

In [8]:
df.columns

Index(['id', 'name', 'cuisine', 'type', 'ingredients', 'serving', 'price',
       'calories'],
      dtype='object')

In [9]:
df.dtypes

id               int64
name            object
cuisine         object
type            object
ingredients     object
serving         object
price          float64
calories         int64
dtype: object

In [10]:
# Convert 'price' and 'calories' to string
df['price'] = df['price'].astype(str)
df['calories'] = df['calories'].astype(str)

In [11]:
documents = df.to_dict(orient='records')

In [12]:
documents

[{'id': 0,
  'name': 'Margherita Pizza',
  'cuisine': 'Italian',
  'type': 'Main Dish',
  'ingredients': 'Dough, Tomato Sauce, Mozzarella, Basil',
  'serving': '1 slice',
  'price': '12.0',
  'calories': '250'},
 {'id': 1,
  'name': 'Chicken Caesar Salad',
  'cuisine': 'American',
  'type': 'Salad',
  'ingredients': 'Romaine Lettuce, Grilled Chicken, Dressing',
  'serving': '1 bowl',
  'price': '10.5',
  'calories': '400'},
 {'id': 2,
  'name': 'Pad Thai',
  'cuisine': 'Thai',
  'type': 'Main Dish',
  'ingredients': 'Rice Noodles, Shrimp, Peanuts, Bean Sprouts',
  'serving': '1 plate',
  'price': '14.0',
  'calories': '600'},
 {'id': 3,
  'name': 'Sushi Roll (California)',
  'cuisine': 'Japanese',
  'type': 'Appetizer',
  'ingredients': 'Sushi Rice, Nori, Crab, Avocado, Cucumber',
  'serving': '8 pieces',
  'price': '9.0',
  'calories': '300'},
 {'id': 4,
  'name': 'Cheeseburger',
  'cuisine': 'American',
  'type': 'Main Dish',
  'ingredients': 'Ground Beef, Cheddar, Lettuce, Tomato, B

In [13]:
import minsearch

In [14]:
index = minsearch.Index(
    text_fields=['name', 'cuisine', 'type', 'ingredients',
       'serving', 'price', 'calories'],
    keyword_fields=['id']
)

In [15]:
index.fit(documents)

<minsearch.Index at 0x78feb82e22d0>

In [21]:
import os

In [22]:
os.environ['OPENAI_API_KEY']='YOUR OPEN API'

In [23]:
from openai import OpenAI

client = OpenAI()

In [24]:
def search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [25]:
prompt_template = """
You're a food order. Answer the QUESTION based on the CONTEXT from our exercises database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

entry_template = """
name: {name}
cuisine: {cuisine}
type: {type}
ingredients: {ingredients}
serving: {serving}
price: {price}
calories: {calories}
""".strip()

def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [26]:
def llm(prompt, model='gpt-4o-mini'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [27]:
def rag(query, model='gpt-4o-mini'):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    #print(prompt)
    answer = llm(prompt, model=model)
    return answer

In [30]:
question = 'I just have 30$ can you suggest me some Thai lan food?'
answer = rag(question)
print(answer)

With your budget of $30, you can order the following Thai food options:

1. **Pad Thai** - $14.00
2. **Tom Yum Soup** - $11.00
3. **Chicken Satay** - $8.00

You can choose any combination of these options. For example, you can order:
- Pad Thai and Tom Yum Soup for a total of $25.00.
- Pad Thai and Chicken Satay for a total of $22.00.
- Tom Yum Soup and Chicken Satay for a total of $19.00.

Or, you can order all three for a total of $33.00, which would exceed your budget. 


## Using elastic search

Run the docker elastic search

    docker run -it \
    --rm \
    --name elasticsearch \
    -p 9200:9200 \
    -p 9300:9300 \
    -e "discovery.type=single-node" \
    -e "xpack.security.enabled=false" \
    elasticsearch:8.4.3


In [12]:
import pandas as pd
import tqdm

df = pd.read_csv('../data/data.csv')

# Convert 'price' and 'calories' to string
df['price'] = df['price'].astype(str)
df['calories'] = df['calories'].astype(str)

documents = df.to_dict(orient='records')

In [13]:
documents

[{'id': 0,
  'name': 'Margherita Pizza',
  'cuisine': 'Italian',
  'type': 'Main Dish',
  'ingredients': 'Dough, Tomato Sauce, Mozzarella, Basil',
  'serving': '1 slice',
  'price': '12.0',
  'calories': '250'},
 {'id': 1,
  'name': 'Chicken Caesar Salad',
  'cuisine': 'American',
  'type': 'Salad',
  'ingredients': 'Romaine Lettuce, Grilled Chicken, Dressing',
  'serving': '1 bowl',
  'price': '10.5',
  'calories': '400'},
 {'id': 2,
  'name': 'Pad Thai',
  'cuisine': 'Thai',
  'type': 'Main Dish',
  'ingredients': 'Rice Noodles, Shrimp, Peanuts, Bean Sprouts',
  'serving': '1 plate',
  'price': '14.0',
  'calories': '600'},
 {'id': 3,
  'name': 'Sushi Roll (California)',
  'cuisine': 'Japanese',
  'type': 'Appetizer',
  'ingredients': 'Sushi Rice, Nori, Crab, Avocado, Cucumber',
  'serving': '8 pieces',
  'price': '9.0',
  'calories': '300'},
 {'id': 4,
  'name': 'Cheeseburger',
  'cuisine': 'American',
  'type': 'Main Dish',
  'ingredients': 'Ground Beef, Cheddar, Lettuce, Tomato, B

In [66]:
from sentence_transformers import SentenceTransformer

model_name = 'multi-qa-MiniLM-L6-cos-v1'
model_encode = SentenceTransformer(model_name)

In [15]:
from elasticsearch import Elasticsearch
es_client = Elasticsearch('http://localhost:9200') 

es_client.info()

ObjectApiResponse({'name': '8eba2dc8d7a1', 'cluster_name': 'docker-cluster', 'cluster_uuid': '9jCEDLXnQWmwQkbPvwGuQw', 'version': {'number': '8.4.3', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '42f05b9372a9a4a470db3b52817899b99a76ee73', 'build_date': '2022-10-04T07:17:24.662462378Z', 'build_snapshot': False, 'lucene_version': '9.3.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

In [41]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "name": {"type": "text"},
            "cuisine": {"type": "text"},
            "type": {"type": "text"},
            "ingredients": {"type": "text"},
            "serving": {"type": "text"},
            "price": {"type": "text"},
            "calories": {"type": "text"},
            "name_vector": {"type": "dense_vector", "dims": 384, "index": True, "similarity": "cosine"},
            "cuisine_vector": {"type": "dense_vector", "dims": 384, "index": True, "similarity": "cosine"},
            "type_vector": {"type": "dense_vector", "dims": 384, "index": True, "similarity": "cosine"},
            "ingredients_vector": {"type": "dense_vector", "dims": 384, "index": True, "similarity": "cosine"},
            "text_vector": {"type": "dense_vector", "dims": 384, "index": True, "similarity": "cosine"},
        }
    }
}

In [42]:
index_name = "food-list"

es_client.indices.delete(index=index_name, ignore_unavailable=True)
es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'food-list'})

In [43]:
from tqdm.auto import tqdm

for doc in tqdm(documents):
        name = doc["name"]
        cuisine = doc["cuisine"]
        types = doc["type"]
        ingredients = doc["ingredients"]
        serving = doc["serving"]
        price = doc["price"]
        calories = doc["calories"]
        doc['name_vector'] = model_encode.encode(name)
        doc['cuisine_vector'] = model_encode.encode(cuisine)
        doc['type_vector'] = model_encode.encode(types)
        doc['ingredients_vector'] = model_encode.encode(ingredients)
        doc["text_vector"] = model_encode.encode(name + " " + cuisine + " " + types \
                                          + " " + ingredients + " " + serving + " " + price + " " + calories \
                                         ).tolist()
        es_client.index(index=index_name, document=doc)

  0%|          | 0/100 [00:00<?, ?it/s]

In [44]:
import os

os.environ['OPENAI_API_KEY']='Your OPEN API KEY'

In [45]:
from openai import OpenAI

client = OpenAI()

In [67]:
def elastic_search_hybrid(field, query, vector):
    knn_query = {
        "field": field,
        "query_vector": vector,
        "k": 10,
        "num_candidates": 10000,
        "boost": 0.5
    }

    keyword_query = {
        "bool": {
            "must": {
                "multi_match": {
                    "query": query,
                    "fields": ["name", "cuisine", "type", "ingredients", "serving", "price", "calories"],
                    "type": "best_fields",
                    "boost": 0.5,
                }
            }
        }
    }

    search_query = {
        "knn": knn_query,
        "query": keyword_query,
        "size": 10,
        "_source": ["name", "cuisine", "type", "ingredients", "serving", "price", "calories", "id"]
    }

    es_results = es_client.search(
        index=index_name,
        body=search_query
    )
    
    result_docs = []
    
    for hit in es_results['hits']['hits']:
        result_docs.append(hit['_source'])

    return result_docs

In [68]:
prompt_template = """
You're a food order. Answer the QUESTION based on the CONTEXT from our exercises database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

entry_template = """
name: {name}
cuisine: {cuisine}
type: {type}
ingredients: {ingredients}
serving: {serving}
price: {price}
calories: {calories}
""".strip()

def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [69]:
def llm(prompt, model='gpt-4o-mini'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [70]:
def rag(query, model='gpt-4o-mini'):
    q_v = model_encode.encode(query)
    search_results = elastic_search_hybrid("name_vector",query, q_v)
    prompt = build_prompt(query, search_results)
    #print(prompt)
    answer = llm(prompt, model=model)
    return answer

In [71]:
question = "I just have 30$ can you suggest me some Thai lan food?"
answer = rag(question)
print(answer)

With a budget of $30, you can order the following Thai food options:

1. **Pad Thai** - $14.00
2. **Tom Yum Soup** - $11.00
3. **Chicken Satay** - $8.00

Total: $33.00 (you can choose two options to stay within $30)

Alternatively, you can choose:

1. **Chicken Pad See Ew** - $13.50
2. **Tom Yum Soup** - $11.00
3. **Chicken Satay** - $8.00 

This total is also $32.50 (again, you'll need to pick two of them to stay within your budget). 

A good combination could be:
- **Pad Thai** - $14.00
- **Chicken Satay** - $8.00 

Total: $22.00

This leaves you with some extra budget. You can choose the **Tom Yum Soup** as well, or just enjoy the two.


## Retrieval Evaluation

In [76]:
df_question = pd.read_csv('../data/ground-truth-retrieval.csv')

In [77]:
df_question.head()

Unnamed: 0,id,question
0,0,What type of cuisine is Margherita Pizza class...
1,0,How many calories does one slice of Margherita...
2,0,What are the main ingredients found in Margher...
3,0,How much does one slice of Margherita Pizza cost?
4,0,What type of dish is Margherita Pizza considered?


In [78]:
ground_truth = df_question.to_dict(orient='records')

In [79]:
ground_truth[0]

{'id': 0,
 'question': 'What type of cuisine is Margherita Pizza classified under?'}

In [80]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)

def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

In [36]:
def minsearch_search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [81]:
def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['id']
        results = search_function(q)
        relevance = [d['id'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [82]:
from tqdm.auto import tqdm

In [39]:
evaluate(ground_truth, lambda q: minsearch_search(q['question']))

  0%|          | 0/500 [00:00<?, ?it/s]

{'hit_rate': 0.942, 'mrr': 0.8820936507936509}

## Finding the best paramater

In [40]:
df_validation = df_question[:50]
df_test = df_question[50:]

In [41]:
import random

def simple_optimize(param_ranges, objective_function, n_iterations=10):
    best_params = None
    best_score = float('-inf')  # Assuming we're minimizing. Use float('-inf') if maximizing.

    for _ in range(n_iterations):
        # Generate random parameters
        current_params = {}
        for param, (min_val, max_val) in param_ranges.items():
            if isinstance(min_val, int) and isinstance(max_val, int):
                current_params[param] = random.randint(min_val, max_val)
            else:
                current_params[param] = random.uniform(min_val, max_val)
        
        # Evaluate the objective function
        current_score = objective_function(current_params)
        
        # Update best if current is better
        if current_score > best_score:  # Change to > if maximizing
            best_score = current_score
            best_params = current_params
    
    return best_params, best_score

In [42]:
gt_val = df_validation.to_dict(orient='records')

In [43]:
def minsearch_search(query, boost=None):
    if boost is None:
        boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [44]:
param_ranges = {
    'name': (0.0, 3.0),
    'cuisine': (0.0, 3.0),
    'type': (0.0, 3.0),
    'ingredients': (0.0, 3.0),
    'serving': (0.0, 3.0),
    'price': (0.0, 3.0),
    'calories': (0.0, 3.0),
}

def objective(boost_params):
    def search_function(q):
        return minsearch_search(q['question'], boost_params)

    results = evaluate(gt_val, search_function)
    return results['mrr']

In [45]:
simple_optimize(param_ranges, objective, n_iterations=20)

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

({'name': 1.6469746098249833,
  'cuisine': 1.842003371513142,
  'type': 1.6343882591625838,
  'ingredients': 2.8659431246768454,
  'serving': 0.9517295732734925,
  'price': 1.3677454527668251,
  'calories': 0.2320056428390671},
 0.94)

In [46]:
def minsearch_improved(query):
    boost = {
      'name': 1.65,
      'cuisine': 1.84,
      'type': 1.63,
      'ingredients': 2.87,
      'serving': 0.95,
      'price': 1.37,
      'calories': 0.23
    }

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [47]:
evaluate(ground_truth, lambda q: minsearch_improved(q['question']))

  0%|          | 0/500 [00:00<?, ?it/s]

{'hit_rate': 0.946, 'mrr': 0.8921103174603174}

### Hybrid search

Remember run code index data to elastic search above  

In [73]:
from sentence_transformers import SentenceTransformer

model_name = 'multi-qa-MiniLM-L6-cos-v1'
model_encode = SentenceTransformer(model_name)



In [74]:
from elasticsearch import Elasticsearch
es_client = Elasticsearch('http://localhost:9200') 

es_client.info()

ObjectApiResponse({'name': '066f25e3c1ab', 'cluster_name': 'docker-cluster', 'cluster_uuid': 'Bme0iDDQQOquu6TdrHG_JQ', 'version': {'number': '8.4.3', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '42f05b9372a9a4a470db3b52817899b99a76ee73', 'build_date': '2022-10-04T07:17:24.662462378Z', 'build_snapshot': False, 'lucene_version': '9.3.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

In [75]:
def elastic_search_hybrid(field, query, vector):
    knn_query = {
        "field": field,
        "query_vector": vector,
        "k": 10,
        "num_candidates": 10000,
        "boost": 0.5
    }

    keyword_query = {
        "bool": {
            "must": {
                "multi_match": {
                    "query": query,
                    "fields": ["name", "cuisine", "type", "ingredients", "serving", "price", "calories"],
                    "type": "best_fields",
                    "boost": 0.5,
                }
            }
        }
    }

    search_query = {
        "knn": knn_query,
        "query": keyword_query,
        "size": 10,
        "_source": ["name", "cuisine", "type", "ingredients", "serving", "price", "calories", "id"]
    }

    es_results = es_client.search(
        index=index_name,
        body=search_query
    )
    
    result_docs = []
    
    for hit in es_results['hits']['hits']:
        result_docs.append(hit['_source'])

    return result_docs

In [89]:
def evaluate_elastic(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['id']
       
        results = search_function(q)
        relevance = [d['id'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [102]:
def question_hybrid(q):
    question = q

    v_q = model.encode(question)

    return elastic_search_hybrid('name_vector', question, v_q)

In [103]:
evaluate_elastic(ground_truth, lambda q: question_hybrid(q['question']))

  0%|          | 0/500 [00:00<?, ?it/s]

{'hit_rate': 0.946, 'mrr': 0.9135801587301586}

## RAG evaluation

In [48]:
prompt2_template = """
You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: {question}
Generated Answer: {answer_llm}

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}}
""".strip()

In [49]:
len(ground_truth)

500

In [50]:
record = ground_truth[0]

In [51]:
import json

In [55]:
df_sample = df_question.sample(n=100, random_state=1)

In [56]:
sample = df_sample.to_dict(orient='records')

In [57]:
evaluations = []

for record in tqdm(sample):
    question = record['question']
    answer_llm = rag(question) 

    prompt = prompt2_template.format(
        question=question,
        answer_llm=answer_llm
    )

    evaluation = llm(prompt)
    evaluation = json.loads(evaluation)

    evaluations.append((record, answer_llm, evaluation))

  0%|          | 0/100 [00:00<?, ?it/s]

In [58]:
df_eval = pd.DataFrame(evaluations, columns=['record', 'answer', 'evaluation'])

df_eval['id'] = df_eval.record.apply(lambda d: d['id'])
df_eval['question'] = df_eval.record.apply(lambda d: d['question'])

df_eval['relevance'] = df_eval.evaluation.apply(lambda d: d['Relevance'])
df_eval['explanation'] = df_eval.evaluation.apply(lambda d: d['Explanation'])

del df_eval['record']
del df_eval['evaluation']

In [59]:
df_eval.relevance.value_counts(normalize=True)

relevance
RELEVANT           0.92
PARTLY_RELEVANT    0.07
NON_RELEVANT       0.01
Name: proportion, dtype: float64

In [60]:
df_eval.to_csv('../data/rag-eval-gpt-4o-mini.csv', index=False)

In [61]:
df_eval[df_eval.relevance == 'NON_RELEVANT']

Unnamed: 0,answer,id,question,relevance,explanation
12,The Avocado Toast serves 1 slice.,23,How many slices does the Avocado Toast serve?,NON_RELEVANT,The question asks how many slices the Avocado ...


In [62]:
evaluations_gpt4o = []

for record in tqdm(sample):
    question = record['question']
    answer_llm = rag(question, model='gpt-4o') 

    prompt = prompt2_template.format(
        question=question,
        answer_llm=answer_llm
    )

    evaluation = llm(prompt)
    evaluation = json.loads(evaluation)
    
    evaluations_gpt4o.append((record, answer_llm, evaluation))

  0%|          | 0/100 [00:00<?, ?it/s]

In [63]:
df_eval = pd.DataFrame(evaluations_gpt4o, columns=['record', 'answer', 'evaluation'])

df_eval['id'] = df_eval.record.apply(lambda d: d['id'])
df_eval['question'] = df_eval.record.apply(lambda d: d['question'])

df_eval['relevance'] = df_eval.evaluation.apply(lambda d: d['Relevance'])
df_eval['explanation'] = df_eval.evaluation.apply(lambda d: d['Explanation'])

del df_eval['record']
del df_eval['evaluation']

In [64]:
df_eval.relevance.value_counts()

relevance
RELEVANT           93
PARTLY_RELEVANT     7
Name: count, dtype: int64

In [65]:
df_eval.relevance.value_counts(normalize=True)

relevance
RELEVANT           0.93
PARTLY_RELEVANT    0.07
Name: proportion, dtype: float64

In [66]:
df_eval.to_csv('../data/rag-eval-gpt-4o.csv', index=False)