# support-queue-assistant-rag

In [1]:
# Uncomment and execute this once to download minsearch
# !wget https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py

In [2]:
# Import libraries
import pandas as pd
import random
import json
from tqdm.auto import tqdm
import minsearch
from openai import OpenAI

## Ingestion

In [3]:
df = pd.read_csv('../data/data-unclean.csv', sep=",")
df.head()

Unnamed: 0,id,question,category,subcategory,intent,product_feature,response,related_articles,tags
0,0,How do I reset my account password?,Account,Password Reset,Password Recovery,Authentication & Security,To reset your password: 1. Go to the login pag...,/account-security/password-reset;/account-mana...,"password,reset,account,security,login"
1,1,Can I upgrade my subscription plan?,Billing,Subscription Management,Plan Upgrade,Billing & Subscriptions,"Yes, you can upgrade anytime. To do so: 1. Log...",/billing/changing-subscription-plans;/billing/...,"upgrade,subscription,billing,plan,pricing"
2,2,How do I integrate your API with my CRM?,Technical,API Integration,Integration Setup,API & Integrations,To integrate: 1. Generate an API key 2. Review...,/api-reference;/integrations/crm-setup-guide,"API,integration,CRM,developer,technical"
3,3,What's included in your enterprise SLA?,General Inquiry,Service Level Agreement,Contract Information,Enterprise Support,"Our Enterprise SLA includes: 99.99% uptime, 24...",/enterprise/sla;/enterprise/support-tiers,"SLA,enterprise,uptime,support,contract"
4,4,How do I enable two-factor authentication?,Account,Security Settings,Enable 2FA,Authentication & Security,To enable 2FA: 1. Go to 'Account Settings' 2. ...,/account-security/two-factor-authentication;/a...,"2FA,authentication,security,account,login"


In [4]:
df.columns

Index(['id', 'question', 'category', 'subcategory', 'intent',
       'product_feature', 'response', 'related_articles', 'tags'],
      dtype='object')

In [5]:
documents = df.to_dict(orient='records')

In [6]:
documents[0]

{'id': 0,
 'question': 'How do I reset my account password?',
 'category': 'Account',
 'subcategory': 'Password Reset',
 'intent': 'Password Recovery',
 'product_feature': 'Authentication & Security',
 'response': "To reset your password: 1. Go to the login page 2. Click 'Forgot Password' 3. Enter your email 4. Check your email for a reset link 5. Click the link and enter a new password",
 'related_articles': '/account-security/password-reset;/account-management/updating-account-info',
 'tags': 'password,reset,account,security,login'}

In [7]:
index = minsearch.Index(
    text_fields=['question', 'category', 'subcategory', 'intent',
       'product_feature', 'response', 'related_articles', 'tags'],
    keyword_fields=['id']
)

In [8]:
index.fit(documents)

<minsearch.Index at 0x706ae62d7830>

In [9]:
query = 'How do I integrate your API with my CRM?'

In [10]:
index.search(query, num_results=10)

[{'id': 2,
  'question': 'How do I integrate your API with my CRM?',
  'category': 'Technical',
  'subcategory': 'API Integration',
  'intent': 'Integration Setup',
  'product_feature': 'API & Integrations',
  'response': 'To integrate: 1. Generate an API key 2. Review our API docs 3. Use our SDKs or make REST calls 4. Configure your CRM 5. Test the integration thoroughly',
  'related_articles': '/api-reference;/integrations/crm-setup-guide',
  'tags': 'API,integration,CRM,developer,technical'},
 {'id': 14,
  'question': 'How do I generate an API key?',
  'category': 'Technical',
  'subcategory': 'API Management',
  'intent': 'API Key Generation',
  'product_feature': 'API & Integrations',
  'response': "To generate an API key: 1. Go to 'Developer Settings' 2. Click 'API Keys' 3. Select 'Generate New Key' 4. Set permissions for the key 5. Copy and securely store the key (it won't be shown again)",
  'related_articles': '/api-reference/authentication;/api-management/key-security',
  'ta

## RAG Flow

In [11]:
client = OpenAI(api_key = 'YOUR_KEY')

In [12]:
def search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [13]:
prompt_template = """
You're a B2B SaaS company's customer support analyst. Answer the QUESTION based on the CONTEXT from our support queue knowledge database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

entry_template = """
question: {question}
category: {category}
subcategory: {subcategory}
intent: {intent}
product_feature: {product_feature}
response: {response}
related_articles: {related_articles}
tags: {tags}
""".strip()

def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [14]:
def llm(prompt, model='gpt-4o-mini'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [15]:
def rag(query, model='gpt-4o-mini'):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    #print(prompt)
    answer = llm(prompt, model=model)
    return answer

In [16]:
question = 'How can I reset my password?'
answer = rag(question)
print(answer)

To reset your password, follow these steps: 

1. Go to the login page.
2. Click 'Forgot Password'.
3. Enter your email.
4. Check your email for a reset link.
5. Click the link and enter a new password.


In [17]:
question = 'How can I setup enterprise account?'
answer = rag(question)
print(answer)

To set up an enterprise account, you need to follow these steps:

1. Upgrade to the Enterprise plan.
2. Go to 'Security Settings'.
3. Enable Single Sign-On (SSO) if required by your organization.
4. Choose your Identity Provider (IdP) such as Okta or Azure AD.
5. Configure SAML or OAuth as per your requirements.
6. Test the SSO connection to ensure it's working properly.

For more details, you can refer to related articles on SSO setup and enterprise features.


## Retrieval evaluation

Make sure that ground truth data was generated using `evaluation-data-generation.ipynb`

In [18]:
df_question = pd.read_csv('../data/ground-truth-retrieval.csv')

In [19]:
df_question

Unnamed: 0,id,question
0,0,What steps should I follow to reset my account...
1,0,Can you guide me on how to recover my password?
2,0,What should I do if I forgot my password for m...
3,0,Where can I find the link to reset my password?
4,0,How can I change my password if I can't log in?
...,...,...
120,24,What steps should I follow to download my acco...
121,24,Where can I find the option to download my inv...
122,24,Is there a specific format in which the invoic...
123,24,"After downloading an invoice, where will it be..."


In [20]:
ground_truth = df_question.to_dict(orient='records')

In [21]:
ground_truth[0]

{'id': 0,
 'question': 'What steps should I follow to reset my account password?'}

In [22]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)

def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

In [23]:
def minsearch_search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [24]:
def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['id']
        results = search_function(q)
        relevance = [d['id'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [25]:
evaluate(ground_truth, lambda q: minsearch_search(q['question']))

  0%|          | 0/125 [00:00<?, ?it/s]

{'hit_rate': 0.992, 'mrr': 0.888}

## Finding the best parameters

In [26]:
df_validation = df_question[:25]
df_test = df_question[25:]

In [27]:
df_validation

Unnamed: 0,id,question
0,0,What steps should I follow to reset my account...
1,0,Can you guide me on how to recover my password?
2,0,What should I do if I forgot my password for m...
3,0,Where can I find the link to reset my password?
4,0,How can I change my password if I can't log in?
5,1,Is it possible to upgrade my current subscript...
6,1,What steps do I need to take to change my subs...
7,1,Where can I find the option to upgrade my plan...
8,1,Will I be able to choose a new plan when I upg...
9,1,Do I need to confirm anything after selecting ...


In [28]:
df_test

Unnamed: 0,id,question
25,5,What types of credit cards can I use for payment?
26,5,Can I pay for my subscription using PayPal?
27,5,Is it possible to use a bank transfer for my a...
28,5,How do I update my current payment method in t...
29,5,Where can I find information about the payment...
...,...,...
120,24,What steps should I follow to download my acco...
121,24,Where can I find the option to download my inv...
122,24,Is there a specific format in which the invoic...
123,24,"After downloading an invoice, where will it be..."


In [29]:
import random

def simple_optimize(param_ranges, objective_function, n_iterations=10):
    best_params = None
    best_score = float('-inf')  # Assuming we're minimizing. Use float('-inf') if maximizing.

    for _ in range(n_iterations):
        # Generate random parameters
        current_params = {}
        for param, (min_val, max_val) in param_ranges.items():
            if isinstance(min_val, int) and isinstance(max_val, int):
                current_params[param] = random.randint(min_val, max_val)
            else:
                current_params[param] = random.uniform(min_val, max_val)
        
        # Evaluate the objective function
        current_score = objective_function(current_params)
        
        # Update best if current is better
        if current_score > best_score:  # Change to > if maximizing
            best_score = current_score
            best_params = current_params
    
    return best_params, best_score

In [30]:
gt_val = df_validation.to_dict(orient='records')

In [31]:
def minsearch_search(query, boost=None):
    if boost is None:
        boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [34]:
param_ranges = {
    'question': (0.0, 3.0),
    'category': (0.0, 3.0),
    'subcategory': (0.0, 3.0),
    'intent': (0.0, 3.0),
    'product_feature': (0.0, 3.0),
    'response': (0.0, 3.0),
    'related_articles': (0.0, 3.0),
    'tags': (0.0, 3.0),
}

def objective(boost_params):
    def search_function(q):
        return minsearch_search(q['question'], boost_params)

    results = evaluate(gt_val, search_function)
    return results['mrr']

In [35]:
simple_optimize(param_ranges, objective, n_iterations=20)

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

({'question': 1.6006767365614856,
  'category': 1.1642961700557812,
  'subcategory': 0.7233985971305616,
  'intent': 1.7157525054180156,
  'product_feature': 2.4161287885772005,
  'response': 1.01960899464581,
  'related_articles': 2.9092863477803985,
  'tags': 2.3802803346878116},
 0.96)

In [36]:
def minsearch_improved(query):
    boost = {
        'question': 1.60,
        'category': 1.16,
        'subcategory': 0.72,
        'intent': 1.72,
        'product_feature': 2.41,
        'response': 1.02,
        'related_articles': 2.90,
        'tags': 2.38,
    }

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

evaluate(ground_truth, lambda q: minsearch_improved(q['question']))

  0%|          | 0/125 [00:00<?, ?it/s]

{'hit_rate': 0.992, 'mrr': 0.8958095238095238}

## RAG evaluation

Let's use LLM-as-a-judge

In [37]:
prompt2_template = """
You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: {question}
Generated Answer: {answer_llm}

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}}
""".strip()

In [39]:
len(ground_truth)

125

In [40]:
record = ground_truth[0]
question = record['question']
answer_llm = rag(question)

In [41]:
print(answer_llm)

To reset your account password, follow these steps:

1. Go to the login page.
2. Click 'Forgot Password'.
3. Enter your email.
4. Check your email for a reset link.
5. Click the link and enter a new password.


In [42]:
prompt = prompt2_template.format(question=question, answer_llm=answer_llm)
print(prompt)

You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: What steps should I follow to reset my account password?
Generated Answer: To reset your account password, follow these steps:

1. Go to the login page.
2. Click 'Forgot Password'.
3. Enter your email.
4. Check your email for a reset link.
5. Click the link and enter a new password.

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}


In [43]:
llm(prompt)

'{\n  "Relevance": "RELEVANT",\n  "Explanation": "The generated answer provides clear and specific steps that directly address the question about how to reset an account password. Each step is relevant and logical, allowing the user to effectively complete the password reset process."\n}'

In [46]:
df_sample = df_question.sample(n=20, random_state=1)

In [47]:
sample = df_sample.to_dict(orient='records')

In [48]:
evaluations = []

for record in tqdm(sample):
    question = record['question']
    answer_llm = rag(question) 

    prompt = prompt2_template.format(
        question=question,
        answer_llm=answer_llm
    )

    evaluation = llm(prompt)
    evaluation = json.loads(evaluation)

    evaluations.append((record, answer_llm, evaluation))

  0%|          | 0/20 [00:00<?, ?it/s]

In [49]:
evaluations[0]

({'id': 14,
  'question': 'What should I do to ensure the security of my API key after I create it?'},
 'To ensure the security of your API key after you create it, make sure to securely store the key as it will not be shown again once copied. Avoid sharing your API key publicly or through unsecured channels. For more detailed security practices regarding API keys, you may refer to the related article on key security.',
 {'Relevance': 'RELEVANT',
  'Explanation': 'The generated answer directly addresses the question about ensuring the security of an API key by providing specific advice on securely storing the key, avoiding public sharing, and suggesting further reading on security practices. This makes it highly relevant to the question asked.'})

In [50]:
df_eval = pd.DataFrame(evaluations, columns=['record', 'answer', 'evaluation'])

df_eval['id'] = df_eval.record.apply(lambda d: d['id'])
df_eval['question'] = df_eval.record.apply(lambda d: d['question'])

df_eval['relevance'] = df_eval.evaluation.apply(lambda d: d['Relevance'])
df_eval['explanation'] = df_eval.evaluation.apply(lambda d: d['Explanation'])

del df_eval['record']
del df_eval['evaluation']

In [51]:
df_eval.relevance.value_counts

<bound method IndexOpsMixin.value_counts of 0            RELEVANT
1            RELEVANT
2            RELEVANT
3            RELEVANT
4            RELEVANT
5            RELEVANT
6            RELEVANT
7            RELEVANT
8            RELEVANT
9            RELEVANT
10           RELEVANT
11    PARTLY_RELEVANT
12           RELEVANT
13           RELEVANT
14           RELEVANT
15           RELEVANT
16           RELEVANT
17           RELEVANT
18           RELEVANT
19           RELEVANT
Name: relevance, dtype: object>

In [52]:
df_eval.relevance.value_counts(normalize=True)

relevance
RELEVANT           0.95
PARTLY_RELEVANT    0.05
Name: proportion, dtype: float64

In [53]:
df_eval.to_csv('../data/rag-eval-gpt-4o-mini.csv', index=False)

In [55]:
# df_eval[df_eval.relevance == 'NON_RELEVANT']
df_eval[df_eval.relevance == 'PARTLY_RELEVANT']

Unnamed: 0,answer,id,question,relevance,explanation
11,"Based on the provided context, there is no inf...",8,Is there a limit on the number of team members...,PARTLY_RELEVANT,The generated answer addresses the question by...


In [56]:
evaluations_gpt4o = []

for record in tqdm(sample):
    question = record['question']
    answer_llm = rag(question, model='gpt-4o') 

    prompt = prompt2_template.format(
        question=question,
        answer_llm=answer_llm
    )

    evaluation = llm(prompt)
    evaluation = json.loads(evaluation)
    
    evaluations_gpt4o.append((record, answer_llm, evaluation))

  0%|          | 0/20 [00:00<?, ?it/s]

In [57]:
evaluations_gpt4o[0]

({'id': 14,
  'question': 'What should I do to ensure the security of my API key after I create it?'},
 "To ensure the security of your API key after you create it, you should:\n\n1. **Copy and Securely Store the Key:** The API key will only be shown once, so it's critical to copy it immediately and store it securely.\n2. **Set Appropriate Permissions:** Configure the appropriate permissions while generating the key to limit its access as needed.\n3. **Refer to Security Best Practices:** You can find related best practices and guidelines in articles like `/api-management/key-security`.\n\nBy following these steps, you help protect your API key from unauthorized access or misuse.",
 {'Relevance': 'RELEVANT',
  'Explanation': 'The generated answer provides specific and actionable steps to secure an API key after its creation, directly addressing the question. It includes methods such as securely storing the key and setting appropriate permissions, which are essential practices for ensuri

In [58]:
df_eval = pd.DataFrame(evaluations_gpt4o, columns=['record', 'answer', 'evaluation'])

df_eval['id'] = df_eval.record.apply(lambda d: d['id'])
df_eval['question'] = df_eval.record.apply(lambda d: d['question'])

df_eval['relevance'] = df_eval.evaluation.apply(lambda d: d['Relevance'])
df_eval['explanation'] = df_eval.evaluation.apply(lambda d: d['Explanation'])

del df_eval['record']
del df_eval['evaluation']

In [59]:
df_eval.relevance.value_counts()

relevance
RELEVANT    20
Name: count, dtype: int64

In [60]:
df_eval.relevance.value_counts(normalize=True)

relevance
RELEVANT    1.0
Name: proportion, dtype: float64

In [61]:
df_eval.to_csv('../data/rag-eval-gpt-4o.csv', index=False)