In [55]:
import pandas as pd
import json
from sklearn.metrics import accuracy_score
from tabulate import tabulate

In [62]:
def load_json_data(json_path):
    with open(json_path, 'r') as file:
        data = json.load(file)
    return data

def extract_true_vals(data):
    """
    Extract the true values (customer_name, email, phone) from the JSON data.
    Returns list of dicts that are true values from the conversation
    """
    true_vals = []
    for i in data:
        scenario = i.get('scenario', {})
        personal_info = scenario.get('personal', {})
        true_vals.append({
            'customer_name': personal_info.get('customer_name', '').lower(),
            'email': personal_info.get('email', '').lower(),
            'phone': personal_info.get('phone', '').lower()
        })
    return true_vals

def convert_to_comparable_format(vals):
    """
    Convert a list of dicts to a list of tuples for comparison.
    """
    return [(val['customer_name'], val['email'], val['phone']) for val in vals]

def compare_and_display_results(y_true, y_pred, show_all=True, output_format='df'):
    """
    Compare true and predicted values and display them either as df or json.
    If show_all is True, shows all comparisons; otherwise, shows only mismatches.
    """
    comparisons = []
    for i, (true, pred) in enumerate(zip(y_true, y_pred)):
        comparison = {
            'Index': i + 1,
            'True Customer Name': true[0],
            'Predicted Customer Name': pred[0],
            'True Email': true[1],
            'Predicted Email': pred[1],
            'True Phone': true[2],
            'Predicted Phone': pred[2],
            'Match': true == pred
        }
        comparisons.append(comparison)

    df_comparisons = pd.DataFrame(comparisons)

    if show_all == False : df_comparisons = df_comparisons[~df_comparisons['Match']]

    # df or json
    if output_format == 'df':
        return tabulate(df_comparisons, headers='keys', tablefmt='grid')
    elif output_format == 'json':
        return df_comparisons.to_json(orient='records', indent=2)

def test_llm_predictions(json_path, llm_predictions, show_all=True):
    """
    Run all methods.
    Arg llm_predictions is a list of dicts.
    Returns float accuracy score of LLM using sklearn's accuracy_score for each conversation.
    """
    data = load_json_data(json_path)
    true_vals = extract_true_vals(data)

    # Convert true values and predictions to comparable formats
    y_true = convert_to_comparable_format(true_vals)
    y_pred = convert_to_comparable_format(llm_predictions)

    # Calc accuracy for each conversation
    conversation_accuracy = [1 if true == pred else 0 for true, pred in zip(y_true, y_pred)]

    # If all elems are 1, all preds are correct. use accuracy_score to calc acc_score
    acc_score = accuracy_score([1] * len(conversation_accuracy), conversation_accuracy)

    # Display results
    results = compare_and_display_results(y_true, y_pred, show_all, output_format='json') # If you want json/df, change here

    return acc_score, results

In [65]:
# Path to main json file within your specific Google Drive, please mount GDrive
json_file_path = '/content/drive/MyDrive/abcd_sample.json'

# LLM predictions (used sample, for testing reasons)
# Paste your results here?
llm_predictions = [
    {'customer_name': 'crystal minh', 'email': 'cminh730@email.com', 'phone': '(977) 625-2661'},
    {'customer_name': 'alessandro phoenix', 'email': 'incorrectemail@gmail.com', 'phone': '(727) 760-7806'},
    {'customer_name': 'joyce wu', 'email': '', 'phone': '(859) 787-9085'},
]

# Test LLM predictions against the ground truth with a detailed view of mismatches
acc_score, results = test_llm_predictions(json_file_path, llm_predictions, show_all=False) # If you want to see each comparison, change show_all to true

print(f"\nLLM Prediction Accuracy: {acc_score}%")
print(results)


LLM Prediction Accuracy: 0.6666666666666666%
[
  {
    "Index":2,
    "True Customer Name":"alessandro phoenix",
    "Predicted Customer Name":"alessandro phoenix",
    "True Email":"aphoenix939@email.com",
    "Predicted Email":"incorrectemail@gmail.com",
    "True Phone":"(727) 760-7806",
    "Predicted Phone":"(727) 760-7806",
    "Match":false
  }
]
