## Prepare input data

In [28]:
import pandas as pd
import numpy as np

couriers_profiles_df = pd.read_csv("../dataset/courier_profiles.csv")
couriers_faq_df = pd.read_csv("../dataset/couriers_faq.csv")

countries = ['DE', 'NL', 'GB']
couriers_profiles_df['country'] = np.random.choice(countries, size=len(couriers_profiles_df))
couriers_profiles_df = couriers_profiles_df.reset_index()


replacement_map = {
    'germany': 'DE',
    'netherlands': 'NL',
    'uk': 'GB'
}

couriers_faq_df['country'] = couriers_faq_df['country'].str.lower().replace(replacement_map)


couriers_profiles_df

Unnamed: 0,index,first_name,last_name,date_of_birth,contract_type,vehicle_type,contract_id,country
0,0,Emma,Johnson,1992-05-15,Employee,bike,ID-EMP-1001,GB
1,1,Liam,Smith,1988-11-23,Freelancer,car,ID-FREE-1002,NL
2,2,Olivia,Davis,1995-02-10,Employee,bike,ID-EMP-1003,NL
3,3,Noah,Wilson,1990-07-01,Freelancer,car,ID-FREE-1004,GB
4,4,Ava,Brown,1998-09-28,Employee,car,ID-EMP-1005,NL
...,...,...,...,...,...,...,...,...
95,95,Aaliyah,Moore,1986-12-30,Freelancer,bike,ID-FREE-1096,GB
96,96,Jason,Phillips,1994-02-14,Employee,bike,ID-EMP-1097,NL
97,97,Skylar,Ramirez,1999-06-08,Freelancer,car,ID-FREE-1098,NL
98,98,Adam,Stewart,1989-08-03,Employee,car,ID-EMP-1099,NL


In [29]:
couriers_faq_df

Unnamed: 0,country,question,answer
0,DE,What is my contract type as an employee?,"As an employee, you will have a part-time or f..."
1,DE,How is my hourly pay calculated as an employee?,Your pay is based on an agreed-upon hourly wag...
2,DE,Do I get paid sick leave as an employee?,"Yes, if you fall ill, you are entitled to cont..."
3,DE,What is the process for employee onboarding?,Onboarding includes an in-person session where...
4,DE,How do I report a work-related injury as an em...,"If you are injured on the job, you must report..."
...,...,...,...
650,all,What if my phone battery dies mid-delivery?,Charge your phone as quickly as possible. If t...
651,all,Can I use a VPN while using the app?,"No, using a VPN can cause issues with GPS and ..."
652,all,What happens if a customer complains about me?,The company will investigate the complaint. If...
653,all,I was unable to complete an order due to a per...,Contact iDelivery support immediately and let ...


## Insert Couriers data to NoSQL: TinyDB 

In [30]:
from tinydb import TinyDB, Query
from tinydb.storages import MemoryStorage

db = TinyDB(storage=MemoryStorage)
User = Query()

db.insert_multiple(couriers_profiles_df.to_dict('records'))

db.search(User.index == 10)


[{'index': 10,
  'first_name': 'Ethan',
  'last_name': 'Thomas',
  'date_of_birth': '1994-01-18',
  'contract_type': 'Employee',
  'vehicle_type': 'car',
  'contract_id': 'ID-EMP-1011',
  'country': 'DE'}]

## Insert FAQ data to Qdrant

In [31]:
# Start Qdrant first:
# podman run --rm -p 6333:6333 -p 6334:6334 -v "$(pwd)/tmp_datastore/tmp_qdrant_storage:/qdrant/storage:z" qdrant/qdrant

from qdrant_client import QdrantClient, models

qd_client = QdrantClient("http://localhost:6333")
EMBEDDING_DIMENSIONALITY = 512
model_handle = "jinaai/jina-embeddings-v2-small-en"
collection_name = "courier-faq"

#prepare collection
qd_client.delete_collection(collection_name=collection_name)

qd_client.create_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=EMBEDDING_DIMENSIONALITY,
        distance=models.Distance.COSINE
    )
)

qd_client.create_payload_index(
    collection_name=collection_name,
    field_name="country",
    field_schema="keyword"
)


UpdateResult(operation_id=1, status=<UpdateStatus.COMPLETED: 'completed'>)

In [32]:
points = []

for i, doc in couriers_faq_df.iterrows() :
    text = doc['question'] + ' ' + doc['answer']
    vector = models.Document(text=text, model=model_handle)
    point = models.PointStruct(
        id=i,
        vector=vector,
        payload=doc.to_dict()
    )
    points.append(point)

print("Inserting " + str(len(points)) + " points.")

qd_client.upsert(
    collection_name=collection_name,
    points=points
)

Inserting 655 points.


UpdateResult(operation_id=2, status=<UpdateStatus.COMPLETED: 'completed'>)

In [33]:
### Delete similar questions based on high cosine similarity.

all_points = list(qd_client.scroll(
    collection_name=collection_name,
    with_vectors=True,
    limit=100000
)[0])

points_to_delete = set()
processed_points = set()

for point in all_points:
    point_id = point.id

    if point_id in processed_points:
        continue 

    #Search for nearest neighbors using the current point's vector
    search_results = qd_client.search(
        collection_name=collection_name,
        query_vector=point.vector,
        limit=20,          
        score_threshold=0.9999, 
    )

    #Identify duplicates (points with high similarity)
    duplicates = []
    for hit in search_results:
        if hit.id != point_id:
            duplicates.append(hit.id)

    #Mark the original point as processed and duplicates for deletion
    processed_points.add(point_id)
    for dup_id in duplicates:
        points_to_delete.add(dup_id)
        processed_points.add(dup_id) 


# Convert the set of IDs to a list
deletion_list = list(points_to_delete)

if deletion_list:
    qd_client.delete(
        collection_name=collection_name,
        points_selector=models.PointIdsList(
            points=deletion_list
        )
    )
    print(f"Successfully deleted {len(deletion_list)} duplicate points.")
else:
    print("No duplicates found above the threshold.")

  search_results = qd_client.search(


Successfully deleted 474 duplicate points.


In [34]:
def vector_search(question, country):
    print('vector_search is called on question: '+question)
    
    query_points = qd_client.query_points(
        collection_name=collection_name,
        query=models.Document(
            text=question,
            model=model_handle 
        ),
        query_filter=models.Filter( 
            must=[
                models.FieldCondition(
                    key="country",
                    match=models.MatchAny(any=[country, "all"] )
                )
            ]
        ),
        limit=5,
        with_payload=True
    )

    results = []
    
    for point in query_points.points:
        results.append(point.payload)
    
    return results

vector_search("Can I reject orders?", 'DE')


vector_search is called on question: Can I reject orders?


[{'country': 'all',
  'question': 'Can I refuse an order?',
  'answer': 'Yes, you can decline an order. However, having a high acceptance rate can lead to more opportunities, and a very low rate may be reviewed by the company.'},
 {'country': 'all',
  'question': 'Can I refuse an order if the weather is bad?',
  'answer': 'Yes, you have the right to decline an order if you feel unsafe due to weather conditions. Your safety is a priority.'},
 {'country': 'all',
  'question': 'The customer has a special request in the notes that I cannot fulfill.',
  'answer': 'Politely inform the customer you cannot fulfill the request and advise them to contact customer service to make a change to the order.'},
 {'country': 'all',
  'question': 'What if the restaurant is refusing to give me the order?',
  'answer': 'Do not argue with the staff. Contact iDelivery support immediately and report the issue. They will intervene on your behalf.'},
 {'country': 'all',
  'question': "The restaurant won't give 

## Load Courier contract

In [35]:
from string import Template


def get_contract(contract_type, contract_date, courier_name, courier_address):
    if (contract_type == "EMPLOYEE_CONTRACT_TYPE"): filename = "../dataset/courier_contract_employee.txt"
    elif (contract_type == "FREELANCE_CONTRACT_TYPE"): filename = "../dataset/courier_contract_freelance.txt"
    else: raise Exception("Unknown contract_type: "+contract_type)

    
    with open(filename, 'r') as f_out: 
        template = f_out.read()

        template_vars = {
            "CONTRACT_DATE": contract_date,
            "COURIER_NAME": courier_name,
            "COURIER_ADDRESS": courier_address
        }
        
        return Template(template).safe_substitute(template_vars)
       

print(get_contract("FREELANCE_CONTRACT_TYPE", "30.02.2025", "John Doe", "22 Maril street"))

Independent Contractor Agreement

This Independent Contractor Agreement ("Agreement") is made and entered into as of 
30.02.2025
, by and between iDelivery ("Company"), and 

John Doe
, a self-employed individual with a mailing address of 

22 Maril street
 ("Contractor").

1. Services Provided
The Contractor agrees to provide food and/or goods delivery services to customers of the Company ("Services"). The Contractor shall perform the Services as an independent contractor and not as an employee of the Company. The Contractor retains sole discretion over the manner and means of performing the Services, including the routes taken and the working hours, subject to the terms of this Agreement.

2. Compensation and Payment
The Company shall compensate the Contractor for the Services rendered based on the terms outlined in the Company's delivery payment schedule, which include a hourly base fee per delivery of 20 euro, mileage fee of 0.5 euro, and tips. The Company will provide a detailed b

## Prepare prompt

In [36]:
from openai import OpenAI
import keys_secret

openai_client = OpenAI(api_key=keys_secret.openai_api_key)


def llm(prompt):
    response = openai_client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content


def build_prompt(question, search_results, courier):
    prompt_template = """
You are the courier suport agent of a iDelivery company that handles food delivery in Germany, Netherlands and UK. 
The couriers working for this company are employees and freelancers. 

Courier {courier_first_name} is {courier_age} years old, has a {courier_contract_type} working contract and uses a {courier_vehicle_type} for delivery.
    
Answer the courier's QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}

""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"country: {doc['country']}\nquestion: {doc['question']}\nanswer: {doc['answer']}\n\n"

    # print(courier)
    prompt = prompt_template.format(question=question, 
                                    context=context, 
                                    courier_first_name=courier['first_name'],
                                    courier_age=courier['age'],
                                    courier_contract_type=courier['contract_type'],
                                    courier_vehicle_type=courier['vehicle_type'],
                                   ).strip()
    return prompt


## Question example (hardcoded)

In [38]:
import helpers

#get matching FAQ by question for one courier
courier_result = db.search(User.index == 10)
if (len(courier_result) != 1): raise Exception("Can not find unique courier by ID: ...")
courier = courier_result[0]

question = "Can I reject orders?"

search_results = vector_search(question, courier['country'])

courier['age'] = helpers.get_age_by_birthdate(courier['date_of_birth'])

# build prompt
prompt = build_prompt(question, search_results, courier)
print(prompt)

print()
print("LLM answer:")
llm(prompt)


vector_search is called on question: Can I reject orders?


NameError: name 'get_age_by_birthdate' is not defined