In [1]:
#Courier profile repo

from tinydb import TinyDB, Query
from pathlib import Path

courier_profiles_db_file = Path("tmp_tinydb_storage/courier_profiles_db.json")
db = TinyDB(courier_profiles_db_file)
User = Query()

db.search(User.index == 10)

[{'index': 10,
  'first_name': 'Ethan',
  'last_name': 'Thomas',
  'date_of_birth': '1994-01-18',
  'contract_type': 'Employee',
  'vehicle_type': 'car',
  'contract_id': 'ID-EMP-1011',
  'country': 'GB'}]

In [8]:
#FAQs repo

from qdrant_client import QdrantClient, models

qd_client = QdrantClient("http://localhost:6333")
EMBEDDING_DIMENSIONALITY = 512
model_handle = "jinaai/jina-embeddings-v2-small-en"
collection_name = "courier-faq"

qd_client.scroll(
    collection_name=collection_name,
    with_vectors=True,
    limit=100
)[0][5].payload

{'country': 'DE',
 'question': 'What are my rights as a part-time employee?',
 'answer': 'As a part-time employee, you have the same rights and protections as a full-time employee, including minimum wage, paid leave, and social security. Your hours and pay are proportional to your work schedule.'}

In [9]:
def build_evaluation_retrieval_prompt(question, answer, courier):
    prompt_template = """
You emulate a food delivery courier for iDelivery company who is talking to a courier support agent.
Formulate 5 questions this courier might ask based on a FAQ record and courier profile. The record
should contain the answer to the questions, and the questions should be complete and not too short.
If possible, use as fewer words as possible from the record. 


The record:
courier first name: {courier_first_name}
courier age: {courier_age}
courier contract type: {courier_contract_type}
courier vehicle type: {courier_vehicle_type}
courier country: {courier_country}
question: {question}
answer: {answer}

Provide the output in parsable JSON without using code blocks:

["question1", "question2", ..., "question5"]
""".strip()

    return prompt_template.format(question=question, 
                                    answer=answer, 
                                    courier_first_name=courier['first_name'],
                                    courier_age=courier['age'],
                                    courier_contract_type=courier['contract_type'],
                                    courier_vehicle_type=courier['vehicle_type'],
                                    courier_country=courier['country'],
                                   ).strip()

In [16]:
from openai import OpenAI
import keys_secret

openai_client = OpenAI(api_key=openai_api_key)


def llm(prompt):
    response = openai_client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [13]:
# Prepare ground truth and generated faq questions
from tqdm.auto import tqdm
import random, json
import helpers

ground_truth = []

faqs = qd_client.scroll(
    collection_name=collection_name,
    with_vectors=True,
    limit=100000
)[0]

for faq in tqdm(faqs):
    # print("FAQ: ",one_faq)
    faq_id = faq.id
    faq_payload = faq.payload
    
    if (faq_payload['country'] == "all"): question_country = ["DE", "NL", "GB"][random.randint(0,2)]
    else: question_country = faq_payload['country']

    one_courier = db.search(User.country == question_country)[0]
    one_courier['age'] = helpers.get_age_by_birthdate(one_courier['date_of_birth'])
    # print("Courier: ",one_courier)
    
    prompt = build_evaluation_retrieval_prompt(faq_payload['question'], faq_payload['answer'], one_courier)
    # print("Prompt:")
    # print(prompt)
    
    raw_generated_questions = llm(prompt)
    generated_question_list = json.loads(raw_generated_questions)
    # print("LLM response:")
    # print(generated_question_list)

    for generated_question in generated_question_list:
        ground_truth.append({
            "ground_truth_faq_id": faq_id,
            "generated_question": generated_question,
            "ground_truth_question": faq_payload['question'],
            "ground_truth_answer":faq_payload['answer'],
            "ground_truth_courier": one_courier,
        })
    # break
    
ground_truth[0]

  0%|          | 0/385 [00:00<?, ?it/s]

{'ground_truth_faq_id': 0,
 'generated_question': 'What type of employment contract do I have with iDelivery?',
 'ground_truth_question': 'What is my contract type as an employee?',
 'ground_truth_answer': 'As an employee, you will have a part-time or full-time employment contract with iDelivery. This provides you with a fixed salary, paid vacation, and social security benefits, unlike a freelancer.',
 'ground_truth_courier': {'index': 2,
  'first_name': 'Olivia',
  'last_name': 'Davis',
  'date_of_birth': '1995-02-10',
  'contract_type': 'Employee',
  'vehicle_type': 'bike',
  'contract_id': 'ID-EMP-1003',
  'country': 'DE',
  'age': 30}}

In [14]:
file_path = "evaluation_ground_truth.json"
with open(file_path, 'w') as json_file:
    # json.dump() writes the Python data to the file
    # 'indent=4' is optional but highly recommended for human-readable formatting
    json.dump(ground_truth, json_file, indent=4)