In [None]:
import json
from sklearn.metrics import accuracy_score

In [None]:
def load_json_data(json_path):
    with open(json_path, 'r') as file:
        data = json.load(file)
    return data

def extract_true_vals(data):
    """
    Extract the true values (customer_name, email, phone) from the JSON data.
    Returns list of dicts that are true values from the conversation
    """
    true_vals = []
    for i in data:
        scenario = i.get('scenario', {})
        personal_info = scenario.get('personal', {})
        true_vals.append({
            'customer_name': personal_info.get('customer_name', '').lower(),
            'email': personal_info.get('email', '').lower(),
            'phone': personal_info.get('phone', '').lower()
        })
    return true_vals

def convert_to_comparable_format(vals):
    """
    Convert a list of dictionaries to a list of tuples for comparison.
    """
    return [(val['customer_name'], val['email'], val['phone']) for val in vals]

def test_llm_predictions(json_path, llm_predictions):
    """
    Run all methods.
    Arg llm_predictions is a list of dicts.
    Returns float accuracy score of LLM using sklearn's accuracy_score for each conversation.
    """
    data = load_json_data(json_path)
    true_vals = extract_true_vals(data)

    # Convert true values and predictions to comparable formats
    y_true = convert_to_comparable_format(true_vals)
    y_pred = convert_to_comparable_format(llm_predictions)

    # Calculate accuracy for each conversation (entire tuple must match)
    conversation_accuracy = [1 if true == pred else 0 for true, pred in zip(y_true, y_pred)]

    acc_score = accuracy_score([1] * len(conversation_accuracy), conversation_accuracy)

    return acc_score * 100

In [None]:
# Path to main json file within your specific Google Drive, please mount GDrive
json_file_path = '/content/drive/MyDrive/abcd_sample.json'

# LLM predictions (used sample, for testing reasons)
# Paste your results here?
llm_predictions = [
    {'customer_name': 'crystal minh', 'email': 'cminh730@email.com', 'phone': '(977) 625-2661'},
    {'customer_name': 'alessandro phoenix', 'email': 'enacjmac@gmail.com', 'phone': '(727) 760-7806'}, # incorrect email example
    {'customer_name': 'joyce wu', 'email': '', 'phone': '(859) 787-9085'},
    ]

# Test LLM predictions against the ground truth
acc_score = test_llm_predictions(json_file_path, llm_predictions)

print(f"LLM Prediction Accuracy: {acc_score}%")

LLM Prediction Accuracy: 66.66666666666666%
