# evaluation-data-generation

Ground truth data generation for evaluation

In [1]:
import pandas as pd
from openai import OpenAI
import json
from tqdm.auto import tqdm

In [2]:
# client = OpenAI(api_key = 'YOUR_KEY')
client = OpenAI()

In [3]:
df = pd.read_csv('../data/data-clean.csv', sep=",")
df.head()

Unnamed: 0,id,question,category,subcategory,intent,product_feature,response,related_articles,tags
0,0,How do I reset my password?,Account,Password Management,Password Recovery,Authentication & Security,"To reset your password, click on the 'Forgot P...",https://help.example.com/account/reset-password,"password, reset, account access"
1,1,What payment methods do you accept?,Billing,Payment Options,Payment Information,Billing & Invoicing,"We accept major credit cards (Visa, MasterCard...",https://help.example.com/billing/payment-methods,"payment, credit card, PayPal"
2,2,Can I integrate your service with Salesforce?,Technical,Integrations,Integration Setup,API & Integrations,"Yes, we offer a Salesforce integration. You ca...",https://developers.example.com/integrations/sa...,"Salesforce, integration, API"
3,3,How do I add a new user to my account?,Account,User Management,Add User,User Roles & Permissions,"To add a new user, go to 'Settings' > 'User Ma...",https://help.example.com/account/adding-users,"add user, invite, user management"
4,4,What's included in the Enterprise plan?,General Inquiry,Pricing & Plans,Plan Information,Subscription Management,"Our Enterprise plan includes unlimited users, ...",https://www.example.com/pricing,"Enterprise plan, pricing, features"


In [4]:
df.columns

Index(['id', 'question', 'category', 'subcategory', 'intent',
       'product_feature', 'response', 'related_articles', 'tags'],
      dtype='object')

In [5]:
documents = df.to_dict(orient='records')

In [6]:
documents[0]

{'id': 0,
 'question': 'How do I reset my password?',
 'category': 'Account',
 'subcategory': 'Password Management',
 'intent': 'Password Recovery',
 'product_feature': 'Authentication & Security',
 'response': "To reset your password, click on the 'Forgot Password' link on the login page. Enter your email address, and we'll send you instructions to create a new password.",
 'related_articles': 'https://help.example.com/account/reset-password',
 'tags': 'password, reset, account access'}

In [7]:
prompt_template = """
You emulate a user of B2B SaaS company's support queue assistant application.
Formulate 5 questions this user might ask based on a provided category, subcategory, or intent.
Make the questions specific to this category, subcategory, or intent.
The record should contain the answer to the questions, and the questions should
be complete and not too short. Use as fewer words as possible from the record.

The record:

question: {question}
category: {category}
subcategory: {subcategory}
intent: {intent}
product_feature: {product_feature}
response: {response}
related_articles: {related_articles}
tags: {tags}

Provide the output in parsable JSON without using code blocks:

{{"questions": ["question1", "question2", ..., "question5"]}}
""".strip()

In [8]:
prompt = prompt_template.format(**documents[0])

In [9]:
prompt

'You emulate a user of B2B SaaS company\'s support queue assistant application.\nFormulate 5 questions this user might ask based on a provided category, subcategory, or intent.\nMake the questions specific to this category, subcategory, or intent.\nThe record should contain the answer to the questions, and the questions should\nbe complete and not too short. Use as fewer words as possible from the record. \n\nThe record:\n\nquestion: How do I reset my password?\ncategory: Account\nsubcategory: Password Management\nintent: Password Recovery\nproduct_feature: Authentication & Security\nresponse: To reset your password, click on the \'Forgot Password\' link on the login page. Enter your email address, and we\'ll send you instructions to create a new password.\nrelated_articles: https://help.example.com/account/reset-password\ntags: password, reset, account access\n\nProvide the output in parsable JSON without using code blocks:\n\n{"questions": ["question1", "question2", ..., "question5"]

In [10]:
def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )

    return response.choices[0].message.content

In [11]:
questions = llm(prompt)

In [12]:
json.loads(questions)

{'questions': ['What steps should I follow to reset my password?',
  "Where can I find the 'Forgot Password' link for my account?",
  'How will I receive instructions to change my password?',
  'What information do I need to provide to reset my password?',
  'Is there a specific page I should visit to reset my password?']}

In [13]:
def generate_questions(doc):
    prompt = prompt_template.format(**doc)

    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )

    json_response = response.choices[0].message.content
    return json_response

In [14]:
results = {}

In [15]:
for doc in tqdm(documents):
    doc_id = doc['id']
    if doc_id in results:
        continue

    questions_raw = generate_questions(doc)
    questions = json.loads(questions_raw)
    results[doc_id] = questions['questions']

  0%|          | 0/125 [00:00<?, ?it/s]

In [16]:
final_results = []

for doc_id, questions in results.items():
    for q in questions:
        final_results.append((doc_id, q))

In [17]:
final_results[0]

(0, 'What steps do I need to follow to reset my password for my account?')

In [18]:
df_results = pd.DataFrame(final_results, columns=['id', 'question'])
df_results

Unnamed: 0,id,question
0,0,What steps do I need to follow to reset my pas...
1,0,Can you guide me through the password recovery...
2,0,Where can I find the link to reset my password...
3,0,Is there a specific email address I need to us...
4,0,What resources are available if I encounter is...
...,...,...
620,124,What are the steps to create a custom workflow...
621,124,Can I integrate external systems when setting ...
622,124,How does the Workflow Builder help in automati...
623,124,Is there a way to test my custom workflow befo...


In [19]:
df_results.to_csv('../data/ground-truth-retrieval.csv', index=False)

In [20]:
!head ../data/ground-truth-retrieval.csv

id,question
0,What steps do I need to follow to reset my password for my account?
0,Can you guide me through the password recovery process for the application?
0,Where can I find the link to reset my password on the login page?
0,Is there a specific email address I need to use to receive password reset instructions?
0,What resources are available if I encounter issues while trying to reset my password?
1,What are the accepted payment methods for your service?
1,Can I use PayPal to pay for my subscription?
1,Do you accept credit cards for payment?
1,What invoicing options are available for enterprise customers?
