In [15]:
import numpy as np
import pandas as pd
import os
import csv
import re
from dotenv import load_dotenv
import json
from openai import OpenAI
from urllib.parse import urlparse
import hashlib
import pdfkit
import requests
from playwright.sync_api import sync_playwright
from openai import OpenAI
import json
from bs4 import BeautifulSoup
from urllib.parse import quote_plus

In [2]:
path = '/home/cptaswadu/RESCUE-n8n/insurance'
load_dotenv(dotenv_path=os.path.join(path, ".env"))
openai_api_key = os.getenv("OPEN_AI_API_KEY")
perplexity_api_key = os.getenv("PERPLEXITY_API_KEY")
client = OpenAI(api_key=openai_api_key)

In [16]:
def generate_search_query_from_patient_info(patient_info_text):
    insurance_provider = None
    if "United Healthcare" in patient_info_text or "UnitedHealthcare" in patient_info_text:
        insurance_provider = "UnitedHealthcare"
    elif "Aetna" in patient_info_text:
        insurance_provider = "Aetna"
    elif "Cigna" in patient_info_text:
        insurance_provider = "Cigna"
    elif "Blue Cross" in patient_info_text:
        insurance_provider = "Blue Cross"

    plan_match = re.search(r"covered by (.*?) and resides", patient_info_text)
    plan = plan_match.group(1) if plan_match else ""

    location_match = re.search(r"resides in ([A-Za-z ]+)[\\.]?", patient_info_text)
    location = location_match.group(1).strip() if location_match else ""

    if "Whole exome sequencing" in patient_info_text or "WES" in patient_info_text:
        test = "Whole Exome Sequencing"
    elif "BRCA1" in patient_info_text or "BRCA2" in patient_info_text:
        test = "BRCA1/BRCA2"
    else:
        test = "Genetic Testing"

    search_query = f"{insurance_provider} {plan} {location} {test} coverage policy"
    return search_query

In [29]:
def find_policy_url_with_web_search(patient_info_text):
    search_query = generate_search_query_from_patient_info(patient_info_text)

    prompt = f"""
You are a clinical insurance assistant.

Search for the most relevant and up-to-date official insurance policy document URL that describes genetic testing coverage for this patient, considering the patient's clinical situation and insurance plan.

PATIENT INFORMATION:
{patient_info_text}

Search Query (for reference):
{search_query}

Instructions:
- Prioritize policies that are specific to the patient’s clinical scenario and insurance plan, not generic ones.
- If available, return the URL for the most relevant coverage policy page that discusses medical necessity, clinical guidelines, prior authorization, and coverage conditions for the requested genetic test in this patient's context.
- Return ONLY the policy document URL. Do not include any explanation or extra text.

If no appropriate policy is found, respond with "No policy found."
"""

    response = client.responses.create(
        model="gpt-4o",
        input=[
            {"role": "system", "content": "You are a clinical insurance assistant."},
            {"role": "user", "content": prompt}
        ],
        tools=[{"type": "web_search_preview"}]
    )

    result_text = response.output_text.strip()

    if not result_text.startswith("http"):
        result_text = "No policy found."

    return result_text


In [30]:
def extract_policy_summary(patient_info_text, policy_url):
    if policy_url == "No policy found.":
        return "(No relevant policy document. Use patient information for all decisions.)"

    prompt = f"""
You are a clinical insurance assistant.

You will be provided:
1. Patient clinical information (very important context).
2. Insurance policy document URL (to read and extract relevant coverage rules).

PATIENT INFORMATION:
{patient_info_text}

Read the following insurance policy document from this URL:

{policy_url}

Extract ONLY the coverage criteria and important policy rules that are relevant to THIS patient’s situation. 
Focus especially on factors such as:
- Age requirements
- Clinical guidelines
- Medical necessity
- Prior authorization criteria 
- Any other relevant rules related to genetic testing coverage for this patient.

Do NOT include website menus, navigation elements, disclaimers, or unrelated information.

Summarize only the meaningful policy content that is helpful for deciding test coverage for this specific patient based on their clinical information.

If the page does not load or no relevant rules are found, respond with: "No relevant coverage criteria found."

Return the summarized policy text below.
"""

    response = client.responses.create(
        model="gpt-4o",
        input=[
            {"role": "system", "content": "You are a clinical insurance assistant."},
            {"role": "user", "content": prompt}
        ],
        tools=[{"type": "web_search_preview"}]
    )

    result_text = response.output_text.strip()

    if "No relevant coverage criteria" in result_text or len(result_text) < 50:
        return "(No relevant policy document. Use patient information for all decisions.)"

    return result_text

In [34]:
questions_file_path = "/home/cptaswadu/RESCUE-n8n/insurance/dataset/Insurance_Genetic_Testing_QA.json"

with open(questions_file_path, "r") as f:
    questions_data = json.load(f)

questions_list = questions_data["questions"]

def format_questions(questions_list):
    formatted_questions = []
    for q in questions_list:
        question_line = f"{q['id']}. {q['question']}"

        # Check if this is Free text question
        if q.get("options") == ["Free text answer"]:
            question_line += "\n(Free text answer allowed.)"

        else:
            # It is options-based question
            question_line += f"\nOptions: {q['options']}"

            # Additional follow up if Yes
            if "additional_if_yes" in q and q["additional_if_yes"]:
                question_line += f"\nIf you answer 'Yes', ALSO select from: {q['additional_if_yes']}"

            # Additional follow up if No
            if "additional_if_no" in q and q["additional_if_no"]:
                question_line += f"\nIf you answer 'No', ALSO select from: {q['additional_if_no']}"

        formatted_questions.append(question_line)

    return "\n\n".join(formatted_questions)

In [31]:
def run_qna(patient_info_text, policy_url, policy_summary, questions_list):
    questions_formatted = format_questions(questions_list)

    prompt = f"""
You are a clinical insurance assistant specializing in genetic testing coverage policies.

You will be given:
1. Patient clinical information (very important for decision making)
2. Summarized policy coverage criteria text (use this when available)

Instructions:
- If policy criteria clearly apply, follow them.
- If policy criteria do NOT exist or are vague, rely on the patient's clinical information and your prior knowledge and reasoning.
- For each question:
    - Answer "Yes" or "No" based on the policy criteria and patient information.
    - If the question is a Free text question, provide a free text answer.
    - Strictly choose the answer from the options provided.
    - If options are provided, choose ONLY from those options.
    - If the question says "(Free text answer allowed)", you may write your answer freely.
    - If the question says "If Yes, ALSO select from ..." and you answered "Yes", you MUST also select from those follow-up options.
    - If the question says "If No, ALSO select from ..." and you answered "No", you MUST also select from those follow-up options.
- Output answers in JSON format ONLY, with no explanation.
- At the end, include the policy_url as "policy_url".

==== PATIENT INFORMATION ====
{patient_info_text}

==== SUMMARIZED POLICY COVERAGE CRITERIA (from URL: {policy_url}) ====
{policy_summary}

==== QUESTIONS ====
{questions_formatted}

Output your answers in JSON format only and include the policy_url at the end.
"""


    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a clinical insurance assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature=0
    )

    result_content = response.choices[0].message.content.strip()

    print("\n===== QnA Result =====")
    print(result_content)
    print("======================\n")

    return result_content

In [33]:
case_ex = [
    {
        "id": "Case1",
        "patient_info": "An 8-year-old boy with neurodevelopmental delay and seizures. A prior chromosomal microarray test was negative. Whole exome sequencing (WES) has been requested by the genetic counselor to investigate potential underlying genetic causes that may guide diagnosis and future treatment decisions. There is also a family history of neurodevelopmental disorders, as his older brother was diagnosed with autism spectrum disorder. The patient is covered by United Healthcare Choice Plus through a family plan and resides in New Jersey."
    },
    {
        "id": "Case2",
        "patient_info": "An 8-year-old boy with mild learning difficulties and no significant neurological symptoms. There is no family history of genetic conditions, and his prior chromosomal microarray test was negative. Whole exome sequencing (WES) has been requested by his primary care provider (PCP) to explore potential genetic factors as part of general health screening and educational planning. The patient is covered by United Healthcare Choice Plus through a family plan and resides in New Jersey."
    },
    {
        "id": "Case3",
        "patient_info": "A 35-year-old woman with a strong family history of breast and ovarian cancer. Her mother was diagnosed with breast cancer at age 42, and her maternal aunt had ovarian cancer in her 50s. The patient herself has no history of cancer but has dense breast tissue and is considered at increased risk. The genetic counselor has recommended BRCA1/BRCA2 testing to assess her hereditary cancer risk and guide risk-reducing management decisions, including potential prophylactic options. The patient is covered by Aetna Open Access Managed Choice Plan and resides in California."
    },
    {
        "id": "Case4",
        "patient_info": "A 28-year-old woman with no family history of breast or ovarian cancer. The patient requested BRCA1/BRCA2 genetic testing after reading about genetic risks online. There were no prior specialist consultations or referrals, and no other clinical risk factors have been identified. The test was ordered directly by her primary care physician at the patient's request. The patient is enrolled in Aetna Open Access Managed Choice Plan and lives in Texas."
    }
]

In [35]:
policy_records = {}

for case in case_ex:
    case_id = case["id"]
    patient_info = case["patient_info"]

    print(f"\n=== Running for {case_id} ===")

    policy_url = find_policy_url_with_web_search(patient_info)
    print("Policy URL:", policy_url)

    policy_summary = extract_policy_summary(patient_info, policy_url)
    print("Policy Summary Extracted:", policy_summary)

    policy_records[case_id] = {
        "policy_url": policy_url,
        "policy_summary": policy_summary
    }

    qna_result = run_qna(patient_info, policy_url, policy_summary, questions_list)
    policy_records[case_id]["qna_result"] = qna_result


=== Running for Case1 ===
Policy URL: No policy found.
Policy Summary Extracted: (No relevant policy document. Use patient information for all decisions.)

===== QnA Result =====
```json
{
  "Q0": "Whole exome sequencing (WES)",
  "Q1": "Not specified",
  "Q2": "Not specified",
  "Q3": "Yes",
  "Q4": "No",
  "Q5": "Yes",
  "Q6": "No",
  "Q7": "Yes",
  "Q8": "No",
  "Q9": "Not specified",
  "Q10": "No",
  "Q10_followup": "Diagnostic",
  "Q11": "Not specified",
  "Q12": "No",
  "Q13": "Not specified",
  "Q14": "Not listed",
  "Q15": "No",
  "Q16": "Yes",
  "Q17": "Submit the claim with documentation of medical necessity, including the patient's clinical presentation, family history, and previous genetic testing results. Ensure that the test was ordered by a qualified specialist and include any genetic counseling notes if available.",
  "policy_url": "No policy found."
}
```


=== Running for Case2 ===
Policy URL: No policy found.
Policy Summary Extracted: (No relevant policy document. U

In [23]:
policy_records

{'Case1': {'policy_url': 'https://www.uhcprovider.com/content/dam/provider/docs/public/policies/comm-medical-drug/whole-exome-and-whole-genome-sequencing.pdf',
  'policy_summary': "Based on UnitedHealthcare's policy effective May 1, 2025, Whole Exome Sequencing (WES) is considered medically necessary for diagnosing or evaluating a genetic disorder when the results are expected to directly influence medical management and clinical outcomes, provided all the following criteria are met:\n\n1. **Clinical Presentation**: The patient's clinical presentation is nonspecific and does not fit a well-defined syndrome for which a specific or targeted gene test is available. If a specific genetic syndrome is suspected, a single gene or targeted gene panel should be performed prior to determining if WES is necessary.\n\n2. **Ordering Physician**: WES must be ordered by a medical geneticist, neonatologist, neurologist, immunologist, or developmental pediatrician.\n\n3. **Clinical History and Features

In [None]:
qna_results_only = {}

for case_id, record in policy_records.items():
    raw_qna = record["qna_result"]

    if raw_qna.startswith("```json"):
        raw_qna = raw_qna.replace("```json", "").strip()
    if raw_qna.endswith("```"):
        raw_qna = raw_qna[:-3].strip()

    qna_json = json.loads(raw_qna)

    qna_results_only[case_id] = qna_json

# 결과 확인
print(json.dumps(qna_results_only, indent=2))

{
  "Case1": {
    "Q0": "Whole Exome Sequencing (WES)",
    "Q1": "Yes",
    "Q2": "Not specified",
    "Q3": "Yes",
    "Q4": "Yes",
    "Q4_followup": [
      "None of the above"
    ],
    "Q5": "Yes",
    "Q6": "No",
    "Q7": "Yes",
    "Q8": "No",
    "Q9": "Yes",
    "Q9_followup": [
      "Yes"
    ],
    "Q10": "No",
    "Q10_followup": [
      "Diagnostic"
    ],
    "Q11": "Not specified",
    "Q12": "No",
    "Q13": "Yes",
    "Q14": "Not listed",
    "Q15": "No",
    "Q16": "Yes",
    "Q17": "The policy requires prior authorization for genetic and molecular testing performed in outpatient settings. Ensure that the test is ordered by an approved specialist and that all necessary documentation, including clinical history and prior test results, is submitted with the authorization request.",
    "policy_url": "https://www.uhcprovider.com/content/dam/provider/docs/public/policies/comm-medical-drug/whole-exome-and-whole-genome-sequencing.pdf"
  },
  "Case2": {
    "Q0": "Whole

In [None]:
ground_truth = {
    "Case1": {
        "Q0": "Whole Exome Sequencing (WES)",
        "Q1": "Yes",
        "Q2": "Yes",
        "Q3": "Yes",
        "Q4": "Yes",
        "Q4_followup": [
      "ACMG"
    ],
        "Q5": "Yes",
        "Q6": "No",
        "Q7": "Yes",
        "Q8": "No",
        "Q9": "Yes",
        "Q9_followup": [
      "Yes"
    ],
        "Q10": "No",
        "Q10_followup": [
      "Diagnostic"
    ],
        "Q11": "Yes",
        "Q12": "No",
        "Q13": "Yes",
        "Q14": "Yes",
        "Q14_followup": [
      "81415", "81416"
    ],
        "Q15": "No",
        "Q16": "Yes"
  }
    
}

In [None]:
def extract_json_from_qna_result(qna_result_text):
    """
    Remove ```json ... ``` and parse to dict
    """
    cleaned_text = re.sub(r"```json\n|\n```", "", qna_result_text).strip()

    try:
        return json.loads(cleaned_text)
    except json.JSONDecodeError as e:
        print("JSON parsing error:", e)
        return None
    

def evaluate_qna_result(predicted_result, gold_result):
    """
    Compare predicted and gold results for Q0 to Q16.
    Return evaluation result as dict (Correct / Incorrect per question).
    """
    evaluation = {}
    for qid in gold_result:
        if not qid.startswith("Q") or int(qid[1:]) > 16:
            continue

        pred_answer = predicted_result.get(qid, "").strip()
        gold_answer = gold_result.get(qid, "").strip()

        if pred_answer == gold_answer:
            evaluation[qid] = "Correct"
        else:
            evaluation[qid] = f"Incorrect (Pred: {pred_answer} / Gold: {gold_answer})"
    
    return evaluation

In [73]:
policy_records = {}

for case in case_ex:
    case_id = case["id"]
    patient_info = case["patient_info"]

    print(f"\n=== Running for {case_id} ===")

    policy_url = find_policy_url_with_web_search(patient_info)
    print("Policy URL:", policy_url)

    policy_summary = extract_policy_summary(policy_url)
    print("Policy Summary Extracted:", policy_summary)

    policy_records[case_id] = {
        "policy_url": policy_url,
        "policy_summary": policy_summary
    }

    qna_result = run_qna(patient_info, policy_url, policy_summary, questions_list)
    policy_records[case_id]["qna_result"] = qna_result


=== Running for Case1 ===
Policy URL: https://www.uhcprovider.com/en/resource-library/news/2024/changes-genetic-molecular-testing-coverage-pa-requirements.html
Policy Summary Extracted: Starting January 1, 2025, UnitedHealthcare will discontinue coverage and remove prior authorization requirements for multi-panel pharmacogenetic tests across select commercial and all Exchange plans. This change affects the following UnitedHealthcare benefit plans:

- UnitedHealthcare of the Mid-Atlantic, Inc.
- UnitedHealthcare Plan of the River Valley, Inc.
- UnitedHealthcare Insurance Company of the River Valley
- Oxford Health Insurance, Inc.
- Harvard Pilgrim Health Care
- All Savers Insurance Company
- All Savers Life Insurance Company of California
- UnitedHealthcare Exchange plans
- Neighborhood Health Partnership, Inc.

As a result, multi-panel genetic tests for behavioral health diagnoses will no longer be covered, and prior authorization requests for the following procedure codes will no lon

In [71]:
qna_result

'```json\n{\n  "Q0": "BRCA1/BRCA2 genetic testing",\n  "Q1": "Not specified",\n  "Q2": "No",\n  "Q3": "No",\n  "Q4": "No",\n  "Q5": "Not required",\n  "Q6": "No",\n  "Q7": "No",\n  "Q8": "No",\n  "Q9": "No",\n  "Q10": "Yes",\n  "Q11": "Not specified",\n  "Q12": "No",\n  "Q13": "Not specified",\n  "Q14": "Not listed",\n  "Q15": "No",\n  "Q16": "No",\n  "Q17": "Submit the claim with documentation of the test order by the primary care physician, and any additional information that may support medical necessity, if applicable.",\n  "policy_url": "No policy found."\n}\n```'