## Experiment analysis 

Premise: we have enabled the ability to customize the fields included in the Azure Cognitive Search index definition - this lets us extract the available document metadata and use that information to specify custom fields per document, with the hopes of making retrieval more accurate. 

In [33]:
import pandas as pd 
import numpy as np
import requests
import os
from dotenv import load_dotenv
import json
from tqdm import tqdm

### Varying the settings of custom metadata fields included in the domain config

TBD

### Custom metadata field + query concatenation experiments

Premise: We now have an updated index definition that includes the document category metadata as keyword fields in the semantic search configuration. What will happen if we pass correct document category information along with the query? Will the semantic search be able to effectively use that information to better match the query to the available document metadata?

#### Analyzing `in_top_init` chunks across experiment runs

In [34]:
qc_config1_baseline_results_path = './data/queryconcat-config1-baseline-results.csv'  # ea5884b0-fac5-4548-a863-c3c0be2cf3f8 - experiment with same retrieval config, but no change to query structure
qc_config1_exp3_results_path = './data/queryconcat-config1-exp3-results.csv'
qc_config1_exp4_results_path = './data/queryconcat-config1-exp4-results.csv'
qc_config1_exp8_results_path = './data/queryconcat-config1-exp8-results.csv'
groundtruth_qs_70_no_intop_path = './data/review_70qs_no_in_top.xlsx'

qc_config1_baseline_results_df = pd.read_csv(qc_config1_baseline_results_path)
qc_config1_exp3_results_df = pd.read_csv(qc_config1_exp3_results_path)
qc_config1_exp4_results_df = pd.read_csv(qc_config1_exp4_results_path)
qc_config1_exp8_results_df = pd.read_csv(qc_config1_exp8_results_path)
groundtruth_qs_70_no_intop_df = pd.read_excel(groundtruth_qs_70_no_intop_path)

In [35]:
ids_no_intop_prior_baseline = groundtruth_qs_70_no_intop_df['id']
print("Prior baseline: " + str(len(ids_no_intop_prior_baseline)) + " questions with in_top_init = 0")

Prior baseline: 75 questions with in_top_init = 0


In [29]:
ids_no_intop_qc_baseline = qc_config1_baseline_results_df.loc[qc_config1_baseline_results_df['in_top_init'] == 0]['id'].to_list()
print("Query concatenation baseline: " + str(len(ids_no_intop_qc_baseline)) + " questions with in_top_init = 0")

Query concatenation baseline: 69 questions with in_top_init = 0


In [30]:
ids_no_intop_qc_exp3 = qc_config1_exp3_results_df.loc[qc_config1_exp3_results_df['in_top_init'] == 0]['id'].to_list()
print("Query concatenation exp3 (Cat1 + Cat2 + Cat3 + Query): " + str(len(ids_no_intop_qc_exp3)) + " questions with in_top_init = 0")

Query concatenation exp3 (Cat1 + Cat2 + Cat3 + Query): 36 questions with in_top_init = 0


In [31]:
ids_no_intop_qc_exp4 = qc_config1_exp4_results_df.loc[qc_config1_exp4_results_df['in_top_init'] == 0]['id'].to_list()
print("Query concatenation exp4 (Query + Cat1 + Cat2 + Cat3): " + str(len(ids_no_intop_qc_exp4)) + " questions with in_top_init = 0")

Query concatenation exp4 (Query + Cat1 + Cat2 + Cat3): 36 questions with in_top_init = 0


In [25]:
ids_no_intop_qc_exp8 = qc_config1_exp8_results_df.loc[qc_config1_exp8_results_df['in_top_init'] == 0]['id'].to_list()
print("Query concatenation exp4 (Query + Cat3): " + str(len(ids_no_intop_qc_exp8)) + " questions with in_top_init = 0")

Query concatenation exp4 (Query + Cat3): 42 questions with in_top_init = 0


In [36]:
def analyze_list_differences(list1: list, list2: list, list1_name: str, list2_name):
    set1 = set(list1)
    set2 = set(list2)

    common_items = set1.intersection(set2)
    print(f'# of items in common between {list1_name} and {list2_name}: {len(common_items)}')

    unique_items_in_list1 = set1 - set2
    print(f'Items in {list1_name} but NOT in {list2_name}: {unique_items_in_list1}')

    unique_items_in_list2 = set2 - set1
    print(f'Items in {list2_name} but NOT in {list1_name}: {unique_items_in_list2}')
    print('')

    return unique_items_in_list1, unique_items_in_list2

def get_df_rows_given_ids(id_list: list[str], df: pd.DataFrame, id_col: str = 'id'):
    return df.loc[df[id_col].isin(id_list)][['id', 'source', 'bcss_question', 'answer', 'article_number', 'filename', 'BW_TXNMY_GRP_1', 'BW_TXNMY_GRP_2', 'BW_TXNMY_GRP_3', 'init_chunks', 'in_top_init']]

def get_df_rows_given_ids_prev_base(id_list: list[str], df: pd.DataFrame, id_col: str = 'id'):
    return df.loc[df[id_col].isin(id_list)][['id', 'source', 'bcss_question', 'answer', 'article_number', 'filename', 'BW_TXNMY_GRP_1', 'BW_TXNMY_GRP_2', 'BW_TXNMY_GRP_3']]

In [32]:
_prior_baseline = 'prior_baseline_no_in_top'
_qc_config1_baseline = 'qc_baseline_no_in_top'
_qc_config1_exp3 = 'qc_exp3_no_in_top'
_qc_config1_exp4 = 'qc_exp4_no_in_top'
_qc_config1_exp8 = 'qc_exp8_no_in_top'

uniq_prev_base_not_in_curr_base, uniq_curr_base_not_in_prev_base = analyze_list_differences(ids_no_intop_prior_baseline, ids_no_intop_qc_baseline, _prior_baseline, _qc_config1_baseline)
uniq_curr_base_not_in_3, uniq_3_not_in_curr_base = analyze_list_differences(ids_no_intop_qc_baseline, ids_no_intop_qc_exp3, _qc_config1_baseline, _qc_config1_exp3)
uniq_curr_base_not_in_4, uniq_4_not_in_curr_base = analyze_list_differences(ids_no_intop_qc_baseline, ids_no_intop_qc_exp4, _qc_config1_baseline, _qc_config1_exp4)
uniq_3_not_in_4, uniq_4_not_in_3 = analyze_list_differences(ids_no_intop_qc_exp3, ids_no_intop_qc_exp4, _qc_config1_exp3, _qc_config1_exp4)
uniq_curr_base_not_in_8, uniq_8_not_in_curr_base = analyze_list_differences(ids_no_intop_qc_baseline, ids_no_intop_qc_exp8, _qc_config1_baseline, _qc_config1_exp8)


# of items in common between prior_baseline_no_in_top and qc_baseline_no_in_top: 67
Items in prior_baseline_no_in_top but NOT in qc_baseline_no_in_top: {50026, 10059, 30000, 20116, 50004, 10008, 30013, 50014}
Items in qc_baseline_no_in_top but NOT in prior_baseline_no_in_top: {10026, 20125}

# of items in common between qc_baseline_no_in_top and qc_exp3_no_in_top: 29
Items in qc_baseline_no_in_top but NOT in qc_exp3_no_in_top: {20231, 10119, 50058, 50060, 10125, 50065, 50066, 10132, 10133, 20119, 20125, 50079, 50083, 10150, 50088, 10026, 20010, 20012, 10030, 10032, 20016, 20150, 20023, 20153, 20027, 20029, 10181, 10054, 10182, 30150, 10060, 20172, 20178, 10067, 20053, 20054, 50011, 20196, 50020, 10100}
Items in qc_exp3_no_in_top but NOT in qc_baseline_no_in_top: {50049, 50018, 50052, 10220, 30000, 20189, 10206}

# of items in common between qc_baseline_no_in_top and qc_exp4_no_in_top: 30
Items in qc_baseline_no_in_top but NOT in qc_exp4_no_in_top: {20231, 10119, 50058, 10125, 50065, 50

##### 2 questions not retrieved from groundtruth source in config 1 baseline that were in the 75 question baseline

In [70]:
print('Not retrieved from groundtruth source in Config 1 baseline:')
subset_df_incorrect_curr_base_vs_prev_base = get_df_rows_given_ids(uniq_curr_base_not_in_prev_base, qc_config1_exp4_results_df)
display(subset_df_incorrect_curr_base_vs_prev_base)

print('Note that these questions were not included in the prev. baseline of the 75 questions that had an in_top_init of 0:')
subset_df_prev_base_vs_incorrect_curr_base = get_df_rows_given_ids_prev_base(uniq_curr_base_not_in_prev_base, groundtruth_qs_70_no_intop_df)
display(subset_df_prev_base_vs_incorrect_curr_base)

Not retrieved from groundtruth source in Config 1 baseline:


Unnamed: 0,id,source,bcss_question,answer,article_number,filename,BW_TXNMY_GRP_1,BW_TXNMY_GRP_2,BW_TXNMY_GRP_3,init_chunks,in_top_init
16,10026,FAN Search Tool (FaST) - BCSS.html,What can I use FaST for?,Use FaST to view FAN profile details including...,7553.0,2154.json,Systems,Informational Tools,Contract Information,[('Accessing FaST | Searching for FANs | Searc...,2
257,20125,Fraud & Social Engineering Call Handling Proce...,How to cancel an unauthorized online order,Cancel an order from Common Repository\n\n\nUs...,21385.0,1661.json,Bill & Account,Account Handling,Fraud,[('Advise caller that they are not authorized ...,7


Note that these questions were not included in the prev. baseline of the 75 questions that had an in_top_init of 0:


Unnamed: 0,id,source,bcss_question,answer,article_number,filename,BW_TXNMY_GRP_1,BW_TXNMY_GRP_2,BW_TXNMY_GRP_3


In [72]:
def print_subset_df_init_chunks_by_index(subset_df: pd.DataFrame, index: int):
    df_list = eval(subset_df['init_chunks'].to_list()[index])
    for (chunk, source) in df_list:
        chunk = str.replace(chunk, "\t", "")
        chunk = str.replace(chunk, "\n", "")
        print(f'{source}: {chunk}\n')

# print(subset_df_0['init_chunks'].to_list()[0]))

##### 7 questions not retrieved from groundtruth source in exp3 (Cat1+Cat2+Cat3+Query) that were in the config 1 baseline

In [62]:
print('Not retrieved from groundtruth source in Exp3:')
subset_df_incorrect_3_vs_curr_base = get_df_rows_given_ids(uniq_3_not_in_curr_base, qc_config1_exp3_results_df)
display(subset_df_incorrect_3_vs_curr_base)

print('Those IDs in Config 1 baseline:')
subset_df_curr_base_vs_incorrect_3 = get_df_rows_given_ids(uniq_3_not_in_curr_base, qc_config1_baseline_results_df)
display(subset_df_curr_base_vs_incorrect_3)

Not retrieved from groundtruth source in Exp3:


Unnamed: 0,id,source,bcss_question,answer,article_number,filename,BW_TXNMY_GRP_1,BW_TXNMY_GRP_2,BW_TXNMY_GRP_3,init_chunks,in_top_init
147,10206,Billing Workflow Scenarios - Account Maintenan...,How to change a customer's bill cycle?,"Only telecom managers, day-to-day, and decisio...",22356.0,34.json,Operations,Center Operations,Call Handling,[('Change Bill Cycle\n\nTo change the Bill Cyc...,0
155,10220,Place of Primary Use (PPU).html,How do I explain the PPU address requirements ...,To comply with the Mobile Telecommunications S...,6570.0,3531.json,Bill & Account,Billing & Payments,Charges & Credits,[('New Account\n\n\t\t\t\t\t\tBilling for the ...,0
299,20189,Handle Request to Port in Number Recently Port...,What if a customer wants to port back in to AT...,Resume 60 Days or More after Cancellation\nAdv...,24266.0,3403.json,Bill & Account,Orders & Changes,Porting,"[(""What happens if the customer cannot port an...",0
334,30000,Unlimited Your Way for Business - BCSS.html,Do any business wireless plans include a featu...,AT&T Business Fast Track is available with our...,21757.0,2006.json,Services,Wireless Plans,Business,[('PLANS\n\nPRICING\n\nDEVICE PAIRINGS\n\nAT&T...,0
388,50018,eChat Transfer Process,how to handle echat customers,Perform a transfer if you receive an incorrect...,12781.0,2569.json,Operations,Center Operations,Call Handling,"[(""Verified Customer (Customer Successfully Co...",0
418,50049,Telegence Payment-Related Codes - BCSS,What if the code is not listed?,If you are unable to determine the reason for ...,23743.0,1173.json,Bill & Account,Account Handling,Credits & Adjustments,[('Monthly Data Charge\n\nBCS uses this pre-in...,0
421,50052,LNP (Number Transfer) Port-Out - BCSS,what are the requirements for porting number t...,Port-Out Validation Requirements\n\nWhen AT&T ...,13853.0,313.json,Bill & Account,Orders & Changes,Porting,"[(""Includes port-in number changes; anytime a ...",0


Those IDs in Config 1 baseline:


Unnamed: 0,id,source,bcss_question,answer,article_number,filename,BW_TXNMY_GRP_1,BW_TXNMY_GRP_2,BW_TXNMY_GRP_3,init_chunks,in_top_init
147,10206,Billing Workflow Scenarios - Account Maintenan...,How to change a customer's bill cycle?,"Only telecom managers, day-to-day, and decisio...",22356.0,34.json,Operations,Center Operations,Call Handling,"[(""Requirements & Restrictions\nBill Availabil...",2
155,10220,Place of Primary Use (PPU).html,How do I explain the PPU address requirements ...,To comply with the Mobile Telecommunications S...,6570.0,3531.json,Bill & Account,Billing & Payments,Charges & Credits,"[(""The federal law requires that state and loc...",2
299,20189,Handle Request to Port in Number Recently Port...,What if a customer wants to port back in to AT...,Resume 60 Days or More after Cancellation\nAdv...,24266.0,3403.json,Bill & Account,Orders & Changes,Porting,"[(""The Port Activation Center (PAC) - Winback ...",1
334,30000,Unlimited Your Way for Business - BCSS.html,Do any business wireless plans include a featu...,AT&T Business Fast Track is available with our...,21757.0,2006.json,Services,Wireless Plans,Business,"[(""Phone line discount:\n-$35/mo. account cred...",1
388,50018,eChat Transfer Process,how to handle echat customers,Perform a transfer if you receive an incorrect...,12781.0,2569.json,Operations,Center Operations,Call Handling,"[(""Express in a clear, simple, and professiona...",1
418,50049,Telegence Payment-Related Codes - BCSS,What if the code is not listed?,If you are unable to determine the reason for ...,23743.0,1173.json,Bill & Account,Account Handling,Credits & Adjustments,[('Customers sometimes encounter payment error...,1
421,50052,LNP (Number Transfer) Port-Out - BCSS,what are the requirements for porting number t...,Port-Out Validation Requirements\n\nWhen AT&T ...,13853.0,313.json,Bill & Account,Orders & Changes,Porting,"[(""Jump to: Guidelines | Port-Out Procedure |\...",1


##### 6 questions not retrieved from groundtruth source in config1 baseline exp 4 (Query+Cat1+Cat2+Cat3) that were in the config 1 baseline

In [63]:
print('Not retrieved from groundtruth source in Exp4:')
subset_df_incorrect_4_vs_curr_base = get_df_rows_given_ids(uniq_4_not_in_curr_base, qc_config1_exp4_results_df)
display(subset_df_incorrect_4_vs_curr_base)

print('Those IDs in Config 1 baseline:')
subset_df_curr_base_vs_incorrect_4 = get_df_rows_given_ids(uniq_4_not_in_curr_base, qc_config1_baseline_results_df)
display(subset_df_curr_base_vs_incorrect_4)

Not retrieved from groundtruth source in Exp4:


Unnamed: 0,id,source,bcss_question,answer,article_number,filename,BW_TXNMY_GRP_1,BW_TXNMY_GRP_2,BW_TXNMY_GRP_3,init_chunks,in_top_init
78,10099,Device Stolen In Transit (Cannot Find a Delive...,how do i report a stolen package for my cell p...,File a Stolen Device > Device Unblocking Reque...,24653.0,54.json,Equipment,Changes,Lost or Stolen Equipment,[('Email Passwords\xa0\n\t\t\t\t\t\t\t\t\t\n\t...,0
147,10206,Billing Workflow Scenarios - Account Maintenan...,How to change a customer's bill cycle?,"Only telecom managers, day-to-day, and decisio...",22356.0,34.json,Operations,Center Operations,Call Handling,[('Click Validation.\n\nVerify USPS Validation...,0
334,30000,Unlimited Your Way for Business - BCSS.html,Do any business wireless plans include a featu...,AT&T Business Fast Track is available with our...,21757.0,2006.json,Services,Wireless Plans,Business,"[(""Phone line discount:\n-$35/mo. account cred...",0
388,50018,eChat Transfer Process,how to handle echat customers,Perform a transfer if you receive an incorrect...,12781.0,2569.json,Operations,Center Operations,Call Handling,"[(""Scripts are required on all openings and cl...",0
418,50049,Telegence Payment-Related Codes - BCSS,What if the code is not listed?,If you are unable to determine the reason for ...,23743.0,1173.json,Bill & Account,Account Handling,Credits & Adjustments,[('Non-Supported Requests |\n\nApproval & Subm...,0
421,50052,LNP (Number Transfer) Port-Out - BCSS,what are the requirements for porting number t...,Port-Out Validation Requirements\n\nWhen AT&T ...,13853.0,313.json,Bill & Account,Orders & Changes,Porting,"[(""Includes port-in number changes; anytime a ...",0


Those IDs in Config 1 baseline:


Unnamed: 0,id,source,bcss_question,answer,article_number,filename,BW_TXNMY_GRP_1,BW_TXNMY_GRP_2,BW_TXNMY_GRP_3,init_chunks,in_top_init
78,10099,Device Stolen In Transit (Cannot Find a Delive...,how do i report a stolen package for my cell p...,File a Stolen Device > Device Unblocking Reque...,24653.0,54.json,Equipment,Changes,Lost or Stolen Equipment,"[(""Stolen Package\n\nIf a customer calls/visit...",1
147,10206,Billing Workflow Scenarios - Account Maintenan...,How to change a customer's bill cycle?,"Only telecom managers, day-to-day, and decisio...",22356.0,34.json,Operations,Center Operations,Call Handling,"[(""Requirements & Restrictions\nBill Availabil...",2
334,30000,Unlimited Your Way for Business - BCSS.html,Do any business wireless plans include a featu...,AT&T Business Fast Track is available with our...,21757.0,2006.json,Services,Wireless Plans,Business,"[(""Phone line discount:\n-$35/mo. account cred...",1
388,50018,eChat Transfer Process,how to handle echat customers,Perform a transfer if you receive an incorrect...,12781.0,2569.json,Operations,Center Operations,Call Handling,"[(""Express in a clear, simple, and professiona...",1
418,50049,Telegence Payment-Related Codes - BCSS,What if the code is not listed?,If you are unable to determine the reason for ...,23743.0,1173.json,Bill & Account,Account Handling,Credits & Adjustments,[('Customers sometimes encounter payment error...,1
421,50052,LNP (Number Transfer) Port-Out - BCSS,what are the requirements for porting number t...,Port-Out Validation Requirements\n\nWhen AT&T ...,13853.0,313.json,Bill & Account,Orders & Changes,Porting,"[(""Jump to: Guidelines | Port-Out Procedure |\...",1



##### 4 questions not retrieved from groundtruth source in exp3 (Cat1+Cat2+Cat3+Query) that were in exp4 (Query+Cat1+Cat2+Cat3)

In [59]:
print('Not retrieved from groundtruth source in Exp3:')
subset_df_incorrect_3_vs_4 = get_df_rows_given_ids(uniq_3_not_in_4, qc_config1_exp3_results_df)
display(subset_df_incorrect_3_vs_4)

print('Those IDs in Exp4:')
subset_df_4_vs_incorrect_3 = get_df_rows_given_ids(uniq_3_not_in_4, qc_config1_exp4_results_df)
display(subset_df_4_vs_incorrect_3)

Not retrieved from groundtruth source in Exp3:


Unnamed: 0,id,source,bcss_question,answer,article_number,filename,BW_TXNMY_GRP_1,BW_TXNMY_GRP_2,BW_TXNMY_GRP_3,init_chunks,in_top_init
155,10220,Place of Primary Use (PPU).html,How do I explain the PPU address requirements ...,To comply with the Mobile Telecommunications S...,6570.0,3531.json,Bill & Account,Billing & Payments,Charges & Credits,[('New Account\n\n\t\t\t\t\t\tBilling for the ...,0
286,20174,AT&T Internet for Business Call Handling - BCS...,What if a customer has equipment questions reg...,Existing customers > U-verse Business Tech Sup...,3817.0,3266.json,Operations,Center Operations,Call Handling,[('What types of data speeds can customers exp...,0
299,20189,Handle Request to Port in Number Recently Port...,What if a customer wants to port back in to AT...,Resume 60 Days or More after Cancellation\nAdv...,24266.0,3403.json,Bill & Account,Orders & Changes,Porting,"[(""What happens if the customer cannot port an...",0
318,20215,My Promise - BCSS.html,Can I offer an adjustment due to previous poor...,Credit Adjustment Exception Notes: Only offer ...,11997.0,705.json,Operations,Sales Operations,Career,"[(""Adjustments may be input through the 10th o...",0


Those IDs in Exp4:


Unnamed: 0,id,source,bcss_question,answer,article_number,filename,BW_TXNMY_GRP_1,BW_TXNMY_GRP_2,BW_TXNMY_GRP_3,init_chunks,in_top_init
155,10220,Place of Primary Use (PPU).html,How do I explain the PPU address requirements ...,To comply with the Mobile Telecommunications S...,6570.0,3531.json,Bill & Account,Billing & Payments,Charges & Credits,"[(""The federal law requires that state and loc...",2
286,20174,AT&T Internet for Business Call Handling - BCS...,What if a customer has equipment questions reg...,Existing customers > U-verse Business Tech Sup...,3817.0,3266.json,Operations,Center Operations,Call Handling,[('What types of data speeds can customers exp...,1
299,20189,Handle Request to Port in Number Recently Port...,What if a customer wants to port back in to AT...,Resume 60 Days or More after Cancellation\nAdv...,24266.0,3403.json,Bill & Account,Orders & Changes,Porting,[('Customer call-in process\n\nLNP activations...,1
318,20215,My Promise - BCSS.html,Can I offer an adjustment due to previous poor...,Credit Adjustment Exception Notes: Only offer ...,11997.0,705.json,Operations,Sales Operations,Career,[('Functions\n\nEmpowerment\n\nYou are empower...,1


##### 4 questions not retrieved from groundtruth source in exp4 (Query+Cat1+Cat2+Cat3) that were in exp3 (Cat1+Cat2+Cat3+Query)

In [58]:
print('Not retrieved from groundtruth source in Exp4:')
subset_df_incorrect_4_vs_3 = get_df_rows_given_ids(uniq_4_not_in_3, qc_config1_exp4_results_df)
display(subset_df_incorrect_4_vs_3)

print('Those IDs in Exp3:')
subset_df_3_vs_incorrect_4 = get_df_rows_given_ids(uniq_4_not_in_3, qc_config1_exp3_results_df)
display(subset_df_3_vs_incorrect_4)

Not retrieved from groundtruth source in Exp4:


Unnamed: 0,id,source,bcss_question,answer,article_number,filename,BW_TXNMY_GRP_1,BW_TXNMY_GRP_2,BW_TXNMY_GRP_3,init_chunks,in_top_init
78,10099,Device Stolen In Transit (Cannot Find a Delive...,how do i report a stolen package for my cell p...,File a Stolen Device > Device Unblocking Reque...,24653.0,54.json,Equipment,Changes,Lost or Stolen Equipment,[('Email Passwords\xa0\n\t\t\t\t\t\t\t\t\t\n\t...,0
133,10182,Correspondence - BCSS.html,What if a customer wants to file a complaint w...,If the customer requests to file a complaint w...,12038.0,1893.json,Operations,Center Operations,Customer Communications,"[(""Area Manager > Director > Regional Vice Pre...",0
429,50060,Clarify Credits and Adjustments Policy and Sub...,how to process a refund for equipment?,Equipment / Accessory Charge - Return\nCredit ...,22815.0,97.json,Bill & Account,Account Handling,Credits & Adjustments,[('If the customer is unable to complete the r...,0
450,50083,Clarify Credits and Adjustments Policy and Sub...,how to process a refund on a closed account?,"Using the customer's closed account, submit a ...",22815.0,97.json,Bill & Account,Account Handling,Credits & Adjustments,"[(""The TM issues the appropriate adjustments t...",0


Those IDs in Exp3:


Unnamed: 0,id,source,bcss_question,answer,article_number,filename,BW_TXNMY_GRP_1,BW_TXNMY_GRP_2,BW_TXNMY_GRP_3,init_chunks,in_top_init
78,10099,Device Stolen In Transit (Cannot Find a Delive...,how do i report a stolen package for my cell p...,File a Stolen Device > Device Unblocking Reque...,24653.0,54.json,Equipment,Changes,Lost or Stolen Equipment,[('If\xa0\xa0\n\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\...,2
133,10182,Correspondence - BCSS.html,What if a customer wants to file a complaint w...,If the customer requests to file a complaint w...,12038.0,1893.json,Operations,Center Operations,Customer Communications,"[(""Area Manager > Director > Regional Vice Pre...",1
429,50060,Clarify Credits and Adjustments Policy and Sub...,how to process a refund for equipment?,Equipment / Accessory Charge - Return\nCredit ...,22815.0,97.json,Bill & Account,Account Handling,Credits & Adjustments,[('To request a prepaid MasterCard:\n\nUse Cla...,1
450,50083,Clarify Credits and Adjustments Policy and Sub...,how to process a refund on a closed account?,"Using the customer's closed account, submit a ...",22815.0,97.json,Bill & Account,Account Handling,Credits & Adjustments,"[(""First time refund request - Cancel and refu...",1


In [75]:
# print(subset_df_incorrect_4_vs_3['init_chunks'].to_list()[0])
print('Init chunks for question 0 in exp 4 (does not reference correct source)')
print_subset_df_init_chunks_by_index(subset_df_incorrect_4_vs_3, 1)

Init chunks for question 0 in exp 4 (does not reference correct source)
000009924: Area Manager > Director > Regional Vice President (RVP) > Office of the President (OOP)Additional Escalation ScenariosPrior to offering any of the following information, always attempt to use probing and problem solving skills to assist the customer and follow the Chain of Command above. Provide the information below if they are adamant.If the CustomerThenRequests to file a complaint against AT&T with the Federal Communications Commission (FCC)Direct the customer to go to the site for additional information.Wants to write a letter to your manager, director, or president of the company concerning a negative experience or unresolved situationProvide the following written correspondence address to BCS customers:PO Box 1809Paramus, NJ 07653Wants to write a letter to your manager, director, or president of the company concerning a positive experience.Offer to transfer the customer to your team manager's voice

In [77]:
print('Init chunks for question 1 in exp 3 (references correct source)')
print_subset_df_init_chunks_by_index(subset_df_3_vs_incorrect_4, 1)

Init chunks for question 1 in exp 3 (references correct source)
000009924: Area Manager > Director > Regional Vice President (RVP) > Office of the President (OOP)Additional Escalation ScenariosPrior to offering any of the following information, always attempt to use probing and problem solving skills to assist the customer and follow the Chain of Command above. Provide the information below if they are adamant.If the CustomerThenRequests to file a complaint against AT&T with the Federal Communications Commission (FCC)Direct the customer to go to the site for additional information.Wants to write a letter to your manager, director, or president of the company concerning a negative experience or unresolved situationProvide the following written correspondence address to BCS customers:PO Box 1809Paramus, NJ 07653Wants to write a letter to your manager, director, or president of the company concerning a positive experience.Offer to transfer the customer to your team manager's voicemail.Pro

## Initial Chunk Analysis

The following cells try to answer the following question(s):

1. Are we seeing similar chunks returned from the search step in semantic search for the different experiments?
   If we do, we can assume that the reranking step is where most of the differences in chunks are coming from.

To perform this analysis, we will perform semantic search with the max k-value (50) such that we remove the use of the reranker.
The same could have been accomplished with just using hybrid search and setting both k-values to 50.

### Analysis Setup

In [9]:
load_dotenv('./.env')

api_key = os.environ.get("AZURE_COGNITIVE_SEARCH_API_KEY")
if not api_key:
    raise ValueError("AZURE_COGNITIVE_SEARCH_API_KEY not set.")


SEARCH_UL_TEMPLATE = "https://searchcast-nprd-33435-southcentralus.search.windows.net//indexes('{index_name}')/docs/search.post.search?api-version=2023-10-01-Preview"
DOMAIN_SERVICES_URL_GET_CHUNKS = "https://askapi.dev.att.com/automation/domain-services/get_embedding_chunks"


default_body = {
    "queryType": "semantic",
    "semanticConfiguration": "<semantic-config>",
    "search": "<search>",
    "queryLanguage": "en-us",
    "select": "content,metadata",
    "top": "<top-k>",
    "vectorQueries": [
        {
            "kind": "vector",
            "k": "<top-k>",
            "fields": "content_vector",
            "vector": []
        }
    ]
}

def get_embeddings(questions: list[str]):
    question_map = {f"{i}": { "id": f"{i}", "text": question } for i, question in enumerate(questions)}
    question_map.items
    body = {
        "domain": "user_pb5253",
        "config_version": "2023-11-01",
        "data": [{"id": k, "text": v["text"]} for k, v in question_map.items()]
    }

    res = requests.post(
        url=DOMAIN_SERVICES_URL_GET_CHUNKS,
        json=body
    )
    res.raise_for_status()
    res_json = res.json()
    for result in res_json:
        question_map[result["id"]]["embedding"] = result["embedding"]
    return list(question_map.values())

def hybrid_search(
    index_name: str,
    semantic_configuration_name: str,
    text: str,
    embedding: list[float],
    top: int = 10
):
    headers = {
        'api-key': api_key,
        'Content-Type': 'application/json'
    }

    body = default_body.copy()
    body['search'] = text
    body['top'] = top
    body['semanticConfiguration'] = semantic_configuration_name
    body['vectorQueries'][0]['vector'] = embedding
    body['vectorQueries'][0]['k'] = top
    url = SEARCH_UL_TEMPLATE.format(index_name=index_name)

    res = requests.post(
        url=url,
        json=body,
        headers=headers
    )
    return res.json()


In [10]:
# saving the embeddings
def get_question_embeddings(questions: list[str], output_file_name: str):
    base_path = "./data/var"
    os.makedirs(base_path, exist_ok=True)
    embedding_output_file_path = f"{base_path}/{output_file_name}"
    if not os.path.exists(embedding_output_file_path):
        question_result = get_embeddings(questions)
        with open(embedding_output_file_path, "w") as f:
            json.dump(question_result, f)
    else:
        with open(embedding_output_file_path, "r") as f:
            question_result = json.load(f)
    return question_result

article_numbers = qc_config1_baseline_results_df['article_number'].values
article_numbers = [f'{int(elem):09}' for elem in article_numbers]
question_ids = qc_config1_baseline_results_df['id'].values

query_category1_category2_category3_questions = (
    qc_config1_baseline_results_df['bcss_question'] + ' ' +
    qc_config1_baseline_results_df['BW_TXNMY_GRP_1'].astype(str) + ' ' +
    qc_config1_baseline_results_df['BW_TXNMY_GRP_2'].astype(str) + ' ' +
    qc_config1_baseline_results_df['BW_TXNMY_GRP_3'].astype(str)
).values
query_category1_category2_category3_questions_file_name = 'query_category1_category2_category3_questions.json'
query_category1_category2_category3_embeddings = get_question_embeddings(
    query_category1_category2_category3_questions,
    query_category1_category2_category3_questions_file_name
)

category1_category2_category3_query_questions = (
    qc_config1_baseline_results_df['BW_TXNMY_GRP_1'].astype(str) + ' ' +
    qc_config1_baseline_results_df['BW_TXNMY_GRP_2'].astype(str) + ' ' +
    qc_config1_baseline_results_df['BW_TXNMY_GRP_3'].astype(str) + ' ' +
    qc_config1_baseline_results_df['bcss_question']
).values
category1_category2_category3_query_questions_file_name = 'category1_category2_category3_query_questions.json'
category1_category2_category3_query_embeddings = get_question_embeddings(
    category1_category2_category3_query_questions,
    category1_category2_category3_query_questions_file_name
)

query_category3_questions = (
    qc_config1_baseline_results_df['bcss_question'] + ' ' +
    qc_config1_baseline_results_df['BW_TXNMY_GRP_3'].astype(str)
).values
query_category3_questions_file_name = 'query_category3_questions.json'
query_category3_embeddings = get_question_embeddings(
    query_category3_questions,
    query_category3_questions_file_name
)

category3_query_questions = (
    qc_config1_baseline_results_df['BW_TXNMY_GRP_3'].astype(str) + ' ' +
    qc_config1_baseline_results_df['bcss_question']
).values
category3_query_questions_file_name = 'category3_query_questions.json'
category3_query_embeddings = get_question_embeddings(
    category3_query_questions,
    category3_query_questions_file_name
)

query_only_questions = qc_config1_baseline_results_df['bcss_question'].values
query_only_questions_file_name = 'query_only_questions.json'
query_only_embeddings = get_question_embeddings(
    query_only_questions,
    query_only_questions_file_name
)

In [12]:
def perform_search(
    index_name: str,
    semantic_configuration_name: str,
    questions_and_embeddings: list[dict],
    top: int=50):
    results = []
    for question_and_embedding in tqdm(questions_and_embeddings):
        question = question_and_embedding['text']
        embedding = question_and_embedding['embedding']
        semantic_with_metadata_data = hybrid_search(
            index_name,
            semantic_configuration_name,
            question,
            embedding,
            top
        )
        results.append((question, semantic_with_metadata_data))
    return results


BASELINE = "c-care-gpt35-dev-2023-10-25"
BASELINE_SEMANTIC_CONFIG = "semantic_configuration"
CONFIG_1 = "c-user-pb5253-no-search-non-required-dev-2023-11-20"
CONFIG_1_SEMANTIC_CONFIG = "semantic_config"
BEST_INDEX = "c-user-mn5253-qcconcat-1-dev-2023-11-28"
BEST_INDEX_SEMANTIC_CONFIG = "semantic_config"

baseline_results = perform_search(
    BASELINE,
    BASELINE_SEMANTIC_CONFIG,
    query_only_embeddings,
    50
)
no_search_non_required_results = perform_search(
    CONFIG_1,
    CONFIG_1_SEMANTIC_CONFIG,
    query_only_embeddings,
    50
)
query_category1_category2_category3_results = perform_search(
    BEST_INDEX,
    BEST_INDEX_SEMANTIC_CONFIG,
    query_category1_category2_category3_embeddings,
    50
)
category1_category2_category3_query_results = perform_search(
    BEST_INDEX,
    BEST_INDEX_SEMANTIC_CONFIG,
    category1_category2_category3_query_embeddings,
    50
)
query_category3_results = perform_search(
    BEST_INDEX,
    BEST_INDEX_SEMANTIC_CONFIG,
    query_category3_embeddings,
    50
)
category3_query_results = perform_search(
    BEST_INDEX,
    BEST_INDEX_SEMANTIC_CONFIG,
    category3_query_embeddings,
    50
)

100%|██████████| 455/455 [02:38<00:00,  2.87it/s]
100%|██████████| 455/455 [02:50<00:00,  2.66it/s]
100%|██████████| 455/455 [02:40<00:00,  2.84it/s]
100%|██████████| 455/455 [02:40<00:00,  2.83it/s]
100%|██████████| 455/455 [02:40<00:00,  2.83it/s]
100%|██████████| 455/455 [02:40<00:00,  2.83it/s]


In [17]:
def get_content_from_semantic_search_results(semantic_search_result: dict):
    values: list[dict] = semantic_search_result.get("value")
    content = [value['content'] for value in values]
    return content

def filter_by_top_k(result: dict, top: int) -> dict:
    result_copy = result.copy()
    result_copy['value'] = result_copy['value'][:top]
    return result_copy

def compare_question_results(
    sample_1: list[tuple[str, dict]],
    sample_2: list[tuple[str, dict]],
):
    question_matching_count_map = {}
    for elem_1, elem_2 in tqdm(zip(sample_1, sample_2)):
        elem_1_text, elem_1_results = elem_1
        elem_2_text, elem_2_results = elem_2

        elem_1_content = get_content_from_semantic_search_results(elem_1_results)
        elem_2_content = get_content_from_semantic_search_results(elem_2_results)

        matching_content_count = 0
        for elem in elem_1_content:
            if elem in elem_2_content:
                matching_content_count += 1

        question_matching_count_map[elem_1_text] = matching_content_count
    return {
        "results": question_matching_count_map
    }

In [18]:
def generate_metrics(
    result_1: list,
    result_2: list,
    title: str):
    comparison_result = compare_question_results(result_1, result_2)
    comparison_result_arr = list(comparison_result["results"].values())
    q75, q25 = np.percentile(comparison_result_arr, [75 ,25])
    mean_content_similarity = np.mean(comparison_result_arr)
    median_content_similarity = np.median(comparison_result_arr)
    min_content_similarity = min(comparison_result_arr)
    max_content_similarity = max(comparison_result_arr)
    variance_content_similarity = np.std(comparison_result_arr)

    print(f"""
    {title}
    Results of Chunk Similarity
    Mean: {mean_content_similarity};
    Percent Overlap: {mean_content_similarity/50*100};
    Median: {median_content_similarity};
    75th Percentile: {q75};
    IQR: {q75 - q25};
    Min: {min_content_similarity};
    Max: {max_content_similarity};
    STD: {variance_content_similarity};
    """)

### Analysis Results

In [19]:
# NOTE: This is expected to have 100% overlap. The reason as to why it might not be is due to the addition of the categories.
# The categories are not in the metadata for the baseline experiment but do exist in the no_search_non_required_results experiment.
# Due to this, we will analyze the results of no_search_non_required with the other experiments as initial search should be the same as the baseline (if baseline had the same metadata).

generate_metrics(
    baseline_results,
    no_search_non_required_results,
    "Baseline VS Query Only"
)

455it [00:00, 8125.17it/s]


    Baseline VS Query Only
    Results of Chunk Similarity
    Mean: 46.55604395604396;
    Percent Overlap: 93.11208791208792;
    Median: 47.0;
    75th Percentile: 48.0;
    IQR: 2.0;
    Min: 37;
    Max: 50;
    STD: 2.0830666737525796;
    





In [34]:
generate_metrics(
    no_search_non_required_results,
    query_category1_category2_category3_results,
    "Baseline VS Query + Category 1 + 2 + 3"
)

455it [00:00, 9553.89it/s]


    Baseline VS Query + Category 1 + 2 + 3
    Results of Chunk Similarity
    Mean: 27.21978021978022;
    Percent Overlap: 54.43956043956044;
    Median: 27.0;
    75th Percentile: 34.0;
    IQR: 13.0;
    Min: 1;
    Max: 46;
    STD: 8.90808029023039;
    





In [35]:
generate_metrics(
    no_search_non_required_results,
    category1_category2_category3_query_results,
    "Baseline VS Category 1 + 2 + 3 + Query"
)

455it [00:00, 13012.02it/s]


    Baseline VS Category 1 + 2 + 3 + Query
    Results of Chunk Similarity
    Mean: 25.254945054945054;
    Percent Overlap: 50.50989010989011;
    Median: 25.0;
    75th Percentile: 32.0;
    IQR: 13.0;
    Min: 2;
    Max: 44;
    STD: 8.787656974806831;
    





In [37]:
generate_metrics(
    no_search_non_required_results,
    query_category3_results,
    "Baseline VS Query + Category 3"
)

455it [00:00, 12739.11it/s]


    Baseline VS Query + Category 3
    Results of Chunk Similarity
    Mean: 34.637362637362635;
    Percent Overlap: 69.27472527472527;
    Median: 36.0;
    75th Percentile: 41.0;
    IQR: 11.5;
    Min: 2;
    Max: 49;
    STD: 8.052187585522967;
    





In [39]:
generate_metrics(
    no_search_non_required_results,
    category3_query_results,
    "Baseline VS Category 3 + Query"
)

455it [00:00, 12944.07it/s]


    Baseline VS Category 3 + Query
    Results of Chunk Similarity
    Mean: 33.38901098901099;
    Percent Overlap: 66.77802197802198;
    Median: 35.0;
    75th Percentile: 40.0;
    IQR: 12.0;
    Min: 2;
    Max: 48;
    STD: 8.192484502186169;
    





In [40]:
generate_metrics(
    category1_category2_category3_query_results,
    query_category1_category2_category3_results,
    "Category 1 + 2 + 3 + Query VS Query + Category 1 + 2 + 3"
)

455it [00:00, 13788.08it/s]


    Category 1 + 2 + 3 + Query VS Query + Category 1 + 2 + 3
    Results of Chunk Similarity
    Mean: 45.72087912087912;
    Percent Overlap: 91.44175824175824;
    Median: 46.0;
    75th Percentile: 47.0;
    IQR: 2.0;
    Min: 34;
    Max: 50;
    STD: 2.2333888154042625;
    





In [41]:
generate_metrics(
    query_category3_results,
    category3_query_results,
    "Query + Category 3 VS Category 3 + Query"
)

455it [00:00, 12245.40it/s]


    Query + Category 3 VS Category 3 + Query
    Results of Chunk Similarity
    Mean: 47.072527472527476;
    Percent Overlap: 94.14505494505495;
    Median: 47.0;
    75th Percentile: 48.0;
    IQR: 2.0;
    Min: 36;
    Max: 50;
    STD: 1.9287339088125925;
    





### Analysis of In-Top = 0 Post-Rerank

We have observed that some of the experiments performed have a higher hit ratio (returning chunks that have matching sources).
There are a few things we would like a analyze:

1. The distribution of the `in-top-init` for the pre-reranking results of the experiments for the 75 questions.
   This should inform us where the baseline experiment is not performing well and if the better performing experiments are seeing much more chunks with the correct source in the pre-reranked chunks.
1. The distribution of the `in-top-init` for the pre-reranking results where the post-reranked results are 0.
   This should tell us if the majority of the results are being pruned by the reranker or if the initial search is not capturing the assumed sources.

In [27]:
def get_metadata_from_search_results(semantic_search_result: dict):
    values: list[dict] = semantic_search_result.get("value")
    metadata = [json.loads(value['metadata']) for value in values]
    return metadata

def get_percent_of_chunk_with_correct_source(question_ids: list[str], results: list[dict], expected_article_numbers: list[str]):
    question_matching_count_map = {}
    for question_id, (_, search_results), expected_article_number in zip(question_ids, results, expected_article_numbers):
        metadata = get_metadata_from_search_results(search_results)
        search_article_numbers = [elem['ArticleNumber'] for elem in metadata]
        
        seen_article_chunks = 0
        for search_article_number in search_article_numbers:
            if search_article_number == expected_article_number:
                seen_article_chunks += 1
        
        question_matching_count_map[question_id] = seen_article_chunks
    return question_matching_count_map

ids_no_intop_prior_baseline_arr = ids_no_intop_prior_baseline.values.tolist()
        

In [33]:
import plotly.graph_objects as go

baseline_results_in_top = get_percent_of_chunk_with_correct_source(question_ids, baseline_results, article_numbers)
baseline_results_in_top = { k: v for k, v in baseline_results_in_top.items() if k in ids_no_intop_prior_baseline_arr }

no_search_non_required_results_in_top = get_percent_of_chunk_with_correct_source(question_ids, no_search_non_required_results, article_numbers)
no_search_non_required_results_in_top = { k: v for k, v in no_search_non_required_results_in_top.items() if k in ids_no_intop_prior_baseline_arr }

query_category1_category2_category3_results_in_top = get_percent_of_chunk_with_correct_source(question_ids, query_category1_category2_category3_results, article_numbers)
query_category1_category2_category3_results_in_top = { k: v for k, v in query_category1_category2_category3_results_in_top.items() if k in ids_no_intop_prior_baseline_arr }

query_category3_results_in_top = get_percent_of_chunk_with_correct_source(question_ids, query_category3_results, article_numbers)
query_category3_results_in_top = { k: v for k, v in query_category3_results_in_top.items() if k in ids_no_intop_prior_baseline_arr }

category1_category2_category3_query_results_in_top = get_percent_of_chunk_with_correct_source(question_ids, category1_category2_category3_query_results, article_numbers)
category1_category2_category3_query_results_in_top = { k: v for k, v in category1_category2_category3_query_results_in_top.items() if k in ids_no_intop_prior_baseline_arr }

category3_query_results_in_top = get_percent_of_chunk_with_correct_source(question_ids, category3_query_results, article_numbers)
category3_query_results_in_top = { k: v for k, v in category3_query_results_in_top.items() if k in ids_no_intop_prior_baseline_arr }

trace1 = go.Histogram(
    x=list(baseline_results_in_top.values()),
    opacity=0.50,
    name='Baseline',
    histnorm='percent'
)

trace2 = go.Histogram(
    x=list(no_search_non_required_results_in_top.values()),
    opacity=0.50,
    name='Query Only',
    histnorm='percent'
)

trace3 = go.Histogram(
    x=list(query_category1_category2_category3_results_in_top.values()),
    opacity=0.50,
    name='Query + Category 1, 2, 3',
    histnorm='percent'
)

trace4 = go.Histogram(
    x=list(query_category3_results_in_top.values()),
    opacity=0.50,
    name='Query + Category 3',
    histnorm='percent'
)

trace5 = go.Histogram(
    x=list(category1_category2_category3_query_results_in_top.values()),
    opacity=0.50,
    name='Category 1, 2, 3 + Query',
    histnorm='percent'
)

trace6 = go.Histogram(
    x=list(category3_query_results_in_top.values()),
    opacity=0.50,
    name='Category 3 + Query',
    histnorm='percent'
)

data = [trace1, trace2, trace3, trace4, trace5, trace6]
layout = go.Layout(barmode='group', title="Initial Search Results (k=50) for Baseline Questions where In Top is 0 Post Rerank")
fig = go.Figure(data=data, layout=layout)

fig.show()

In [49]:
# in top init 0 breakdown

import plotly.graph_objects as go

baseline_results_in_top = get_percent_of_chunk_with_correct_source(question_ids, baseline_results, article_numbers)
baseline_results_in_top = { k: v for k, v in baseline_results_in_top.items() if k in ids_no_intop_prior_baseline_arr }

no_search_non_required_results_in_top = get_percent_of_chunk_with_correct_source(question_ids, no_search_non_required_results, article_numbers)
no_search_non_required_results_in_top = { k: v for k, v in no_search_non_required_results_in_top.items() if k in ids_no_intop_qc_baseline }

query_category1_category2_category3_results_in_top = get_percent_of_chunk_with_correct_source(question_ids, query_category1_category2_category3_results, article_numbers)
query_category1_category2_category3_results_in_top = { k: v for k, v in query_category1_category2_category3_results_in_top.items() if k in ids_no_intop_qc_exp4 }

category1_category2_category3_query_results_in_top = get_percent_of_chunk_with_correct_source(question_ids, category1_category2_category3_query_results, article_numbers)
category1_category2_category3_query_results_in_top = { k: v for k, v in category1_category2_category3_query_results_in_top.items() if k in ids_no_intop_qc_exp3 }

trace1 = go.Histogram(
    x=list(baseline_results_in_top.values()),
    opacity=0.50,
    name='Baseline',
    histnorm='percent'
)

trace2 = go.Histogram(
    x=list(no_search_non_required_results_in_top.values()),
    opacity=0.50,
    name='Query Only',
    histnorm='percent'
)

trace3 = go.Histogram(
    x=list(query_category1_category2_category3_results_in_top.values()),
    opacity=0.50,
    name='Query + Category 1, 2, 3',
    histnorm='percent'
)

trace4 = go.Histogram(
    x=list(category1_category2_category3_query_results_in_top.values()),
    opacity=0.50,
    name='Category 1, 2, 3 + Query',
    histnorm='percent'
)

data = [trace1, trace2, trace3, trace4]
layout = go.Layout(barmode='group', title="Reranker In-top-init=0 Breakdown w/ Init Chunks")
fig = go.Figure(data=data, layout=layout)

fig.show()

### Extra

#### Graphing Init RougeL Recall vs In Top Init = 0

In [59]:
baseline_init_rougeL_recall = qc_config1_baseline_results_df.loc[qc_config1_baseline_results_df['in_top_init'] == 0]['init_rougeL_recall'].to_list()
cat_1_2_3_query_init_rougeL_recall = qc_config1_exp3_results_df.loc[qc_config1_exp3_results_df['in_top_init'] == 0]['init_rougeL_recall'].to_list()
query_cat_1_2_3_init_rougeL_recall = qc_config1_exp4_results_df.loc[qc_config1_exp4_results_df['in_top_init'] == 0]['init_rougeL_recall'].to_list()


trace1 = go.Histogram(
    x=baseline_init_rougeL_recall,
    opacity=0.50,
    name='Baseline',
    histnorm='percent'
)

trace2 = go.Histogram(
    x=cat_1_2_3_query_init_rougeL_recall,
    opacity=0.50,
    name='Category 1, 2, 3 + Query',
    histnorm='percent'
)

trace3 = go.Histogram(
    x=query_cat_1_2_3_init_rougeL_recall,
    opacity=0.50,
    name='Query + Category 1, 2, 3',
    histnorm='percent'
)

data = [trace1, trace2, trace3]
layout = go.Layout(barmode='group', title="In top init = 0 rougeL recall")
fig = go.Figure(data=data, layout=layout)

fig.show()

#### Graphing Init RougeL Recall vs In Top Init != 0

In [63]:
baseline_init_rougeL_recall = qc_config1_baseline_results_df.loc[qc_config1_baseline_results_df['in_top_init'] != 0]['init_rougeL_recall'].to_list()
cat_1_2_3_query_init_rougeL_recall = qc_config1_exp3_results_df.loc[qc_config1_exp3_results_df['in_top_init'] != 0]['init_rougeL_recall'].to_list()
query_cat_1_2_3_init_rougeL_recall = qc_config1_exp4_results_df.loc[qc_config1_exp4_results_df['in_top_init'] != 0]['init_rougeL_recall'].to_list()


trace1 = go.Histogram(
    x=baseline_init_rougeL_recall,
    opacity=0.50,
    name='Baseline',
    histnorm='percent'
)

trace2 = go.Histogram(
    x=cat_1_2_3_query_init_rougeL_recall,
    opacity=0.50,
    name='Category 1, 2, 3 + Query',
    histnorm='percent'
)

trace3 = go.Histogram(
    x=query_cat_1_2_3_init_rougeL_recall,
    opacity=0.50,
    name='Query + Category 1, 2, 3',
    histnorm='percent'
)

data = [trace1, trace2, trace3]
layout = go.Layout(barmode='group', title="In top init <> 0 rougeL recall")
fig = go.Figure(data=data, layout=layout)

fig.show()

### Analyzing chat results for highest-performing experiments

In [2]:
exp4_with_chat_filepath = './data/exp4-with-chat-result.csv'

exp4_with_chat_df = pd.read_csv(exp4_with_chat_filepath)

In [3]:
exp4_with_chat_df.head()

Unnamed: 0.1,Unnamed: 0,id,Total Views,source,bcss_question,answer,Title,article_number,filename,UrlName,...,source_missing,run_errors,init_chunks,generated_answer,reranker_chunks,in_top_init,init_max_rouge1,init_max_rougeLsum,init_rougeL_recall,init_rougeL_precision
0,0,10000,1582624,Unlimited Your Way for Business - BCSS.html,What international benefits are included in th...,All wireless plans for phones include unlimite...,Unlimited Your Way for Business - BCSS,21757.0,2006.json,Unlimited-Your-Way-for-Business-BCSS,...,False,,[('What if the customer is traveling to countr...,The international benefits included in the AT&...,"[(""This is a retired rate plan.\n\nAt the end ...",1,0.294737,0.196491,0.5,0.25
1,1,10003,491034,Account Verification Process - BCSS.html,Can I call the AOP/TCM if the verfied end-user...,If a verified end-user caller claims to have m...,Account Verification Process - BCSS,16479.0,630.json,account-verification-process----bcss-draft-con...,...,False,,[('Provide the CTN and ask the caller if you a...,"Yes, if a verified end-user claims to have mor...","[(""It is preferred that an AOP/TCM bridging an...",1,0.427984,0.304527,1.0,0.186528
2,2,10005,431915,PIN Maintenance - BCSS.html,How many digits can a PIN be?,"For End-Users, the PINs must be between 4-8 di...",PIN Maintenance - BCSS,7152.0,1703.json,pin-maintenance,...,False,,[('Pop up for displaying that both SMS and ema...,A PIN can be between 4-8 digits for Bill & Acc...,"[(""PIN Benefits\n\nThe PIN is:\n\tSecure and n...",10,0.247678,0.19195,0.95,0.166667
3,3,10006,430184,Mobile Hotspot.html,What is a mobile hotspot?,Mobile Hotspots provide instant access to the ...,Mobile Hotspot,7260.0,1865.json,mobile-hotspot,...,False,,"[(""Mobile Hotspots provide instant access to t...",A mobile hotspot is a device or feature that p...,"[(""Mobile Hotspots provide instant access to t...",5,0.211838,0.143302,1.0,0.108696
4,4,10008,402489,International Day Pass for Business - BCSS.html,What countries are eligible for the Internatio...,See att.com/globalcountries.,International Day Pass for Business - BCSS,4733.0,1063.json,international-day-pass-for-business,...,False,,"[(""International Day Pass FAQ\n\nWhat countrie...",The International Day Pass is available in ove...,[('Eligibility\n\nIDP can be added to all Mobi...,5,0.462121,0.363636,1.0,0.021505


In [10]:
# Quick validation that the experiment is set up consistently + we are sending the same query to /similarity-search and /chat
# We expect the chunks retrieved for each question to be the same
exp4_simsearch_chunks = exp4_with_chat_df['init_chunks']
exp4_chat_chunks = exp4_with_chat_df['reranker_chunks']

In [21]:
for i, (simsearch_chunk_result, chat_chunk_results) in enumerate(zip(exp4_simsearch_chunks, exp4_chat_chunks)):
    simsearch_chunks_set = set([chunk for (chunk, _) in eval(exp4_simsearch_chunks[i])])
    chat_chunks_set = set([chunk for (chunk, _) in eval(exp4_chat_chunks[i])])
    if not simsearch_chunks_set == chat_chunks_set:
        print(f'Similarity search and chat endpoints returned different results for question {i} :(')

In [39]:
def get_df_rows_given_ids_all_cols(id_list: list[str], df: pd.DataFrame, id_col: str = 'id'):
    return df.loc[df[id_col].isin(id_list)]


In [50]:
# ids_no_intop_prior_baseline
exp4_with_chat_results_df = pd.read_csv('./data/exp4-with-chat-and-gpt-scores.csv')

exp4_with_chat_results_filtered_75 = get_df_rows_given_ids_all_cols(ids_no_intop_prior_baseline, exp4_with_chat_results_df)

In [51]:
len(exp4_with_chat_results_filtered_75)

75

In [52]:
exp4_with_chat_results_filtered_75.to_csv('./data/exp4-with-chat-results-filtered-75.csv')