In [1]:
import numpy as np
import pandas as pd
import os
import csv
import re
from dotenv import load_dotenv
import json
from openai import OpenAI

In [2]:
df = pd.read_csv('/home/cptaswadu/RESCUE-n8n/insurance/Providers_Network_update.csv')
real_list = df["In-network Provider"].dropna().str.strip().tolist()

In [3]:
os.chdir('/home/cptaswadu/RESCUE-n8n/insurance')
load_dotenv(dotenv_path='/home/cptaswadu/RESCUE-n8n/insurance/.env')
openai_api_key = os.getenv("OPEN_AI_API_KEY")
perplexity_api_key = os.getenv("PERPLEXITY_API_KEY")
client = OpenAI(api_key=openai_api_key)


In [4]:
def normalize_provider(name):
    name = name.strip()

    if "Kansas City" in name:
        name = name.replace("Kansas City", "Kansas")
    
    if name.endswith("(FFS)"):
        name = name.replace("(FFS)", "").strip()

    if "Blue Shield of" in name:
        name = name.replace("Blue Shield of", "BS")

    return name

In [5]:
def evaluate_llm_providers(message_list, real_list, prompt_name=None, experiment_id=None, save_dir="results"):
    def safe_response_with_retry(messages, max_retries=3):
        for attempt in range(1, max_retries + 1):
            try:
                print(f"üîÅ Attempt {attempt}...")
                response = client.responses.create(
                    model="gpt-4o",
                    tools=[{"type": "web_search_preview"}],
                    input=messages
                )
                return response.output_text.strip()
            except Exception as e:
                print(f"‚ùå Attempt {attempt} failed: {e}")
        return None

    response_text = safe_response_with_retry(message_list)
    if not response_text:
        return {
            "error": "All attempts failed.",
            "Precision (%)": 0,
            "Recall (%)": 0
        }

    response_text = re.sub(r"^```json\s*", "", response_text)
    response_text = re.sub(r"\s*```$", "", response_text)

    if response_text.endswith(","):
        response_text = response_text.rstrip(",") + "]}"
    elif response_text.endswith("["):
        response_text += "]}"
    elif "Providers" in response_text and "source_url" not in response_text:
        response_text += ', "source_url": ""}'

    try:
        result = json.loads(response_text)
    except json.JSONDecodeError:
        print("‚ùå JSON decoding failed after all retries.")
        print(response_text[:500])

        os.makedirs(save_dir, exist_ok=True)
        filename = f"{prompt_name}_experiment{experiment_id}_failed.csv"
        path = os.path.join(save_dir, filename)
        pd.DataFrame([{"error": "invalid JSON", "raw_output": response_text}]).to_csv(path, index=False)
        print(f"‚ö†Ô∏è Raw output saved to '{path}'")

        return {
            "error": "invalid JSON",
            "Precision (%)": 0,
            "Recall (%)": 0
        }

    chatgpt_provider_list = result.get("Providers", [])
    normalized_chatgpt_list = [normalize_provider(name) for name in chatgpt_provider_list]

    real_set = set(real_list)
    chatgpt_set = set(normalized_chatgpt_list)
    common = real_set & chatgpt_set
    missing = real_set - chatgpt_set
    extra = chatgpt_set - real_set

    precision = len(common) / len(chatgpt_set) * 100 if chatgpt_set else 0
    recall = len(common) / len(real_set) * 100 if real_set else 0

    comparison_summary = {
        "prompt_name": prompt_name,
        "experiment_id": experiment_id,
        "real_list_count": len(real_list),
        "chatgpt_list_count": len(chatgpt_set),
        "common_count": len(common),
        "missing_from_chatgpt_count": len(missing),
        "extra_in_chatgpt_count": len(extra),
        "Precision (%)": round(precision, 2),
        "Recall (%)": round(recall, 2)
    }

    if prompt_name and experiment_id is not None:
        os.makedirs(save_dir, exist_ok=True)
        filename = f"{prompt_name}_experiment{experiment_id}.csv"
        path = os.path.join(save_dir, filename)
        pd.DataFrame([comparison_summary]).to_csv(path, index=False)
        print(f"üìÅ Result saved to '{path}'")

    return comparison_summary


In [6]:
def prompt_gene_providers_baseline():
    return [
        {
            "role": "system",
            "content": "You are a helpful assistant. Respond only in strict JSON format with no explanation or extra commentary."
        },
        {
            "role": "user",
            "content": (
                "List all the medical insurance providers that are currently in-network with GeneDx. "
                "Format your response as: "
                "{\"Providers\": [list of provider names], \"source_url\": \"link to the official source\"}. "
                "Only use information from official GeneDx or trusted affiliate websites."
            )
        }
    ]


def prompt_gene_providers_counted_311():
    return [
        {
            "role": "system",
            "content": (
                "You are an artificial intelligence assistant and you need to "
                "engage in a helpful, detailed, polite conversation with a user."
            )
        },
        {
            "role": "user",
            "content": (
                "List all the 311 medical insurance providers that are currently in-network with GeneDx. "
                "Output the result strictly in JSON format using the following structure: "
                "{\"Providers\": [list of provider names], \"source_url\": \"link to the official source\"}. "
                "Only include links from the official GeneDx website or affiliated trusted sources. "
                "Do not include any introduction, explanation, or extra commentary ‚Äî only return the JSON object."
            )
        }
    ]

def prompt_gene_providers_with_explicit_source():
    return [
        {
            "role": "system",
            "content": (
                "You are an artificial intelligence assistant and you need to "
                "engage in a helpful, detailed, polite conversation with a user."
            )
        },
        {
            "role": "user",
            "content": (
                "List all the medical insurance providers that are currently in-network with GeneDx. "
                "You may use the official GeneDx insurance network page at "
                "https://www.genedx.com/commercial-insurance-in-network-contracts/ as the primary source of information. "
                "Output the result strictly in JSON format using the following structure: "
                "{\"Providers\": [list of provider names], \"source_url\": \"link to the official source\"}. "
                "Only include links from the official GeneDx website or affiliated trusted sources. "
                "Do not include any introduction, explanation, or extra commentary ‚Äî only return the JSON object."
            )
        }
    ]


prompt_bank = {
    "baseline": prompt_gene_providers_baseline,
    "counted_311": prompt_gene_providers_counted_311,
    "explicit_source": prompt_gene_providers_with_explicit_source,
}

# Experiments

In [7]:
evaluate_llm_providers(
            message_list=prompt_gene_providers_baseline(),
            real_list=real_list,
            prompt_name="baseline",
            experiment_id=1
        )

üîÅ Attempt 1...
üìÅ Result saved to 'results/baseline_experiment1.csv'


{'prompt_name': 'baseline',
 'experiment_id': 1,
 'real_list_count': 311,
 'chatgpt_list_count': 165,
 'common_count': 53,
 'missing_from_chatgpt_count': 257,
 'extra_in_chatgpt_count': 112,
 'Precision (%)': 32.12,
 'Recall (%)': 17.1}

In [8]:
evaluate_llm_providers(
            message_list=prompt_gene_providers_counted_311(),
            real_list=real_list,
            prompt_name="counted_311",
            experiment_id=1
        )

üîÅ Attempt 1...
üìÅ Result saved to 'results/counted_311_experiment1.csv'


{'prompt_name': 'counted_311',
 'experiment_id': 1,
 'real_list_count': 311,
 'chatgpt_list_count': 165,
 'common_count': 53,
 'missing_from_chatgpt_count': 257,
 'extra_in_chatgpt_count': 112,
 'Precision (%)': 32.12,
 'Recall (%)': 17.1}

In [9]:
evaluate_llm_providers(
            message_list=prompt_gene_providers_with_explicit_source(),
            real_list=real_list,
            prompt_name="explicit_source",
            experiment_id=1
        )

üîÅ Attempt 1...
üìÅ Result saved to 'results/explicit_source_experiment1.csv'


{'prompt_name': 'explicit_source',
 'experiment_id': 1,
 'real_list_count': 311,
 'chatgpt_list_count': 169,
 'common_count': 150,
 'missing_from_chatgpt_count': 160,
 'extra_in_chatgpt_count': 19,
 'Precision (%)': 88.76,
 'Recall (%)': 48.39}