In [1]:
import numpy as np
import pandas as pd
import os
import csv
import re
from dotenv import load_dotenv
import json
from openai import OpenAI
from urllib.parse import urlparse
import hashlib
import pdfkit
import requests
from playwright.sync_api import sync_playwright
from openai import OpenAI
import json
from bs4 import BeautifulSoup
from urllib.parse import quote_plus
from itertools import product
import traceback

In [2]:
path = '/home/cptaswadu/RESCUE-n8n/insurance'
load_dotenv(dotenv_path=os.path.join(path, ".env"))
openai_api_key = os.getenv("OPEN_AI_API_KEY")
perplexity_api_key = os.getenv("PERPLEXITY_API_KEY")
client = OpenAI(api_key=openai_api_key)

In [None]:
def find_policy_url_with_web_search(patient_info_text, model="openai", openai_client=None, perplexity_api_key=None, max_retries=3):
    prompt = f"""
You are a clinical insurance assistant.
Your task is to search for the most relevant and up-to-date official insurance policy document URL that addresses **genetic testing coverage** for the following patient. 
Take into account both the patient’s clinical condition and insurance plan.
Please focus on the insurance provider and the plan, genetic test, and the state where the patient resides based on the patient information.

PATIENT INFORMATION:
{patient_info_text}

Instructions:
- Retrieve a **direct URL** to the official insurance policy that best fits this specific clinical scenario and insurance plan.
- Emphasize documents that discuss **medical necessity**, **clinical guidelines**, **prior authorization requirements**, and **coverage criteria**.
- If no relevant policy is found, respond with: **"No policy found."**
- Output **only the URL**. Do not include explanations, summaries, or any additional content.
"""

    messages = [
        {"role": "system", "content": "You are a clinical insurance assistant."},
        {"role": "user", "content": prompt}
    ]

    def call_openai():
        response = openai_client.responses.create(
            model="gpt-4o",
            input=messages,
            tools=[{"type": "web_search_preview"}]
        )
        return response.output_text.strip()

    def call_perplexity():
        headers = {
            "Authorization": f"Bearer {perplexity_api_key}",
            "Content-Type": "application/json"
        }
        data = {
            "model": "sonar-pro",
            "messages": messages
        }
        url = "https://api.perplexity.ai/chat/completions"
        res = requests.post(url, headers=headers, json=data)
        if res.status_code == 200:
            return res.json()["choices"][0]["message"]["content"].strip()
        else:
            raise Exception(f"Perplexity error: {res.status_code} - {res.text}")

    for attempt in range(1, max_retries + 1):
        try:
            print(f"🔁 Attempt {attempt} ({model})...")
            result_text = call_perplexity() if model == "perplexity" else call_openai()
            if not result_text.startswith("http"):
                return "No policy found."
            return result_text
        except Exception as e:
            print(f"❌ Attempt {attempt} failed: {e}")

    return "No policy found."

In [4]:
def extract_policy_summary(patient_info_text, policy_url, model="openai", openai_client=None, perplexity_api_key=None, max_retries=3):
    if policy_url == "No policy found.":
        return "(No relevant policy document. Use patient information for all decisions.)"

    prompt = f"""
You are a clinical insurance assistant.

You will be provided:
1. Patient clinical information (very important context).
2. Insurance policy document URL (to read and extract relevant coverage rules).

PATIENT INFORMATION:
{patient_info_text}

Read the following insurance policy document from this URL:

{policy_url}

Extract ONLY the coverage criteria and important policy rules that are relevant to THIS patient’s situation. 
Focus especially on factors such as:
- Age requirements
- Clinical guidelines
- Medical necessity
- Prior authorization criteria 
- Any other relevant rules related to genetic testing coverage for this patient.

Do NOT include website menus, navigation elements, disclaimers, or unrelated information.

Summarize only the meaningful policy content that is helpful for deciding test coverage for this specific patient based on their clinical information.

If the page does not load or no relevant rules are found, respond with: "No relevant coverage criteria found."

Return the summarized policy text below.
"""

    messages = [
        {"role": "system", "content": "You are a clinical insurance assistant."},
        {"role": "user", "content": prompt}
    ]

    def call_openai():
        response = openai_client.responses.create(
            model="gpt-4o",
            input=messages,
            tools=[{"type": "web_search_preview"}]
        )
        return response.output_text.strip()

    def call_perplexity():
        headers = {
            "Authorization": f"Bearer {perplexity_api_key}",
            "Content-Type": "application/json"
        }
        data = {
            "model": "sonar-pro",
            "messages": messages
        }
        url = "https://api.perplexity.ai/chat/completions"
        res = requests.post(url, headers=headers, json=data)
        if res.status_code == 200:
            return res.json()["choices"][0]["message"]["content"].strip()
        else:
            raise Exception(f"Perplexity error: {res.status_code} - {res.text}")

    for attempt in range(1, max_retries + 1):
        try:
            print(f"🔁 Attempt {attempt} ({model})...")
            result_text = call_perplexity() if model == "perplexity" else call_openai()

            if "No relevant coverage criteria" in result_text or len(result_text) < 50:
                return "(No relevant policy document. Use patient information for all decisions.)"
            return result_text

        except Exception as e:
            print(f"❌ Attempt {attempt} failed: {e}")

    return "(No relevant policy document. Use patient information for all decisions.)"

In [5]:
def clean_json_response(response_text):
    original = response_text.strip()

    # Step 0: Check for hallucinated greeting (Perplexity fallback)
    if "how can I assist you" in original.lower() or "insurance-related questions" in original.lower():
        raise ValueError("Perplexity returned generic assistant response instead of JSON.")

    # Step 1: Try direct parsing
    try:
        return json.loads(original)
    except json.JSONDecodeError:
        pass

    # Step 2: Remove code block wrappers
    cleaned = re.sub(r"^```(?:json)?\s*|\s*```$", "", original, flags=re.IGNORECASE).strip()
    try:
        return json.loads(cleaned)
    except json.JSONDecodeError:
        pass

    # Step 3: Try to extract the first {...} JSON-like block
    match = re.search(r"(\{[\s\S]*?\})", original)
    if match:
        try:
            return json.loads(match.group(1))
        except json.JSONDecodeError:
            pass

    raise ValueError("No valid JSON found in the response.")

In [6]:
def find_and_extract_policy_summary(patient_info_text, model="openai", openai_client=None, perplexity_api_key=None):
    policy_url = find_policy_url_with_web_search(
        patient_info_text=patient_info_text,
        model=model,
        openai_client=openai_client,
        perplexity_api_key=perplexity_api_key
    )

    policy_summary = extract_policy_summary(
        patient_info_text=patient_info_text,
        policy_url=policy_url,
        model=model,
        openai_client=openai_client,
        perplexity_api_key=perplexity_api_key
    )

    return policy_url, policy_summary


In [None]:
questions_file_path = "/home/cptaswadu/RESCUE-n8n/insurance/dataset/Insurance_Genetic_Testing_QA.json"

with open(questions_file_path, "r") as f:
    questions_data = json.load(f)

questions_list = questions_data["questions"]

def format_question_block(q, indent=0):
    indent_str = "  " * indent
    question_line = f"{indent_str}- {q['question']}"
    
    if q.get("options") == ["Free text answer"]:
        question_line += " (Free text answer allowed.)"
    else:
        question_line += f" (Options: {', '.join(q['options'])})"
        
    # Recursively handle nested follow-ups
    if "additional_if_yes" in q:
        question_line += f"\n{indent_str}  If 'Yes':"
        for subq in q["additional_if_yes"]:
            question_line += "\n" + format_question_block(subq, indent + 2)
    
    if "additional_if_no" in q:
        question_line += f"\n{indent_str}  If 'No':"
        for subq in q["additional_if_no"]:
            question_line += "\n" + format_question_block(subq, indent + 2)

    return question_line

def format_questions(questions_list):
    return "\n\n".join([
        f"{q['id']}. {format_question_block(q)}"
        for q in questions_list
    ])


In [8]:
def run_qna(case_id, patient_info_text, policy_url, policy_summary, questions_list,
            qna_model="openai", openai_client=None, perplexity_api_key=None,
            search_model="openai", summary_model="openai"):

    questions_formatted = format_questions(questions_list)

    prompt = f"""
You are a clinical insurance assistant specializing in genetic testing coverage policies.

You will be given:
1. Patient clinical information (very important for decision making)
2. Summarized policy coverage criteria text (use this when available)

Instructions:
- If policy criteria clearly apply, follow them.
- If policy criteria do NOT exist or are vague, rely on the patient's clinical information and your prior knowledge and reasoning.
- For each question:
    - Answer "Yes" or "No" based on the policy criteria and patient information.
    - If the question is a Free text question, provide a free text answer.
    - Strictly choose the answer from the options provided.
    - If options are provided, choose ONLY from those options.
    - If the question says "(Free text answer allowed)", you may write your answer freely.
    - If the question says "If Yes, ALSO select from ..." and you answered "Yes", you MUST also select from those follow-up options.
    - If the question says "If No, ALSO select from ..." and you answered "No", you MUST also select from those follow-up options.
- Output answers in JSON format ONLY, with no explanation.
- At the end, include the policy_url as "policy_url".

==== PATIENT INFORMATION ====
{patient_info_text}

==== SUMMARIZED POLICY COVERAGE CRITERIA (from URL: {policy_url}) ====
{policy_summary}

==== QUESTIONS ====
{questions_formatted}

Output your answers in JSON format only and include the policy_url at the end.
"""

    messages = [
        {"role": "system", "content": "You are a clinical insurance assistant."},
        {"role": "user", "content": prompt}
    ]

    def call_openai():
        response = openai_client.chat.completions.create(
            model="gpt-4o",
            messages=messages,
            temperature=0
        )
        return response.choices[0].message.content.strip()

    def call_perplexity():
        headers = {
            "Authorization": f"Bearer {perplexity_api_key}",
            "Content-Type": "application/json"
        }
        data = {
            "model": "sonar-pro",
            "messages": messages,
            "temperature": 0
        }
        url = "https://api.perplexity.ai/chat/completions"
        res = requests.post(url, headers=headers, json=data)
        if res.status_code == 200:
            return res.json()["choices"][0]["message"]["content"].strip()
        else:
            raise Exception(f"Perplexity error: {res.status_code} - {res.text}")

    try:
        print(f"🧠 Running QnA ({qna_model})...")
        result_content = call_perplexity() if qna_model == "perplexity" else call_openai()
        result_json = clean_json_response(result_content)

        final_result = {}
        for k, v in result_json.items():
            if k == "policy_url":
                continue
            if "_selection" in k or "_details" in k:
                base_key = k.replace("_selection", "").replace("_details", "")
                final_result[f"{base_key}_followup"] = [v] if isinstance(v, str) else v
            else:
                final_result[k] = v

        # 모델 조합에 따른 폴더 구조 반영
        result_dir = f"/home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/{search_model}_{summary_model}_{qna_model}"
        os.makedirs(result_dir, exist_ok=True)
        filename = os.path.join(result_dir, f"{case_id}_qna_result.json")

        with open(filename, "w") as f:
            json.dump(final_result, f, indent=2)

        print(f"✅ QnA result saved to {filename}")

    except Exception as e:
        print("❗ JSON parsing error:", e)
        final_result = {
            "error": "JSON parsing failed",
            "raw_content": result_content
        }

    print("QnA Result JSON:", final_result)
    return final_result


In [9]:
case_ex = [
    {
        "id": "Case1",
        "patient_info": "An 8-year-old boy with neurodevelopmental delay and seizures. A prior chromosomal microarray test was negative. Whole exome sequencing (WES) has been requested by the genetic counselor to investigate potential underlying genetic causes that may guide diagnosis and future treatment decisions. There is also a family history of neurodevelopmental disorders, as his older brother was diagnosed with autism spectrum disorder. The patient is covered by United Healthcare Choice Plus through a family plan and resides in New Jersey."
    },
    {
        "id": "Case2",
        "patient_info": "An 8-year-old boy with mild learning difficulties and no significant neurological symptoms. There is no family history of genetic conditions, and his prior chromosomal microarray test was negative. Whole exome sequencing (WES) has been requested by his primary care provider (PCP) to explore potential genetic factors as part of general health screening and educational planning. The patient is covered by United Healthcare Choice Plus through a family plan and resides in New Jersey."
    },
    {
        "id": "Case3",
        "patient_info": "A 35-year-old woman with a strong family history of breast and ovarian cancer. Her mother was diagnosed with breast cancer at age 42, and her maternal aunt had ovarian cancer in her 50s. The patient herself has no history of cancer but has dense breast tissue and is considered at increased risk. The genetic counselor has recommended BRCA1/BRCA2 testing to assess her hereditary cancer risk and guide risk-reducing management decisions, including potential prophylactic options. The patient is covered by Aetna Open Access Managed Choice Plan and resides in California."
    },
    {
        "id": "Case4",
        "patient_info": "A 28-year-old woman with no family history of breast or ovarian cancer. The patient requested BRCA1/BRCA2 genetic testing after reading about genetic risks online. There were no prior specialist consultations or referrals, and no other clinical risk factors have been identified. The test was ordered directly by her primary care physician at the patient's request. The patient is enrolled in Aetna Open Access Managed Choice Plan and lives in Texas."
    }
]

In [10]:
def run_all_model_combinations(case_ex, questions_list, openai_client, perplexity_api_key):
    model_options = ["chatgpt", "perplexity"]
    combinations = list(product(model_options, repeat=3))  # (search, summary, qna)

    for search_model, summary_model, qna_model in combinations:
        print(f"\n🚀 Starting experiments for: {search_model}_{summary_model}_{qna_model}\n")

        for case in case_ex:
            case_id = case["id"]
            patient_info = case["patient_info"]

            print(f"\n=== Running for {case_id} ===")

            try:
                policy_url = find_policy_url_with_web_search(
                    patient_info_text=patient_info,
                    model=search_model,
                    openai_client=openai_client,
                    perplexity_api_key=perplexity_api_key
                )

                policy_summary = extract_policy_summary(
                    patient_info_text=patient_info,
                    policy_url=policy_url,
                    model=summary_model,
                    openai_client=openai_client,
                    perplexity_api_key=perplexity_api_key
                )

                qna_result = run_qna(
                    case_id=case_id,
                    patient_info_text=patient_info,
                    policy_url=policy_url,
                    policy_summary=policy_summary,
                    questions_list=questions_list,
                    qna_model=qna_model,
                    openai_client=openai_client,
                    perplexity_api_key=perplexity_api_key,
                    search_model=search_model,
                    summary_model=summary_model
                )

            except Exception as e:
                print(f"❌ Failed for {case_id} in {search_model}_{summary_model}_{qna_model}")
                traceback.print_exc()

run_all_model_combinations(
    case_ex=case_ex,
    questions_list=questions_list,
    openai_client=client,
    perplexity_api_key=perplexity_api_key
)


🚀 Starting experiments for: chatgpt_chatgpt_chatgpt


=== Running for Case1 ===
🔁 Attempt 1 (chatgpt)...
🔁 Attempt 1 (chatgpt)...
🧠 Running QnA (chatgpt)...
✅ QnA result saved to /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/chatgpt_chatgpt_chatgpt/Case1_qna_result.json
QnA Result JSON: {'Q0': 'Whole exome sequencing (WES)', 'Q1': 'Not specified', 'Q2': 'Not specified', 'Q3': 'Yes', 'Q4': 'No', 'Q5': 'Yes', 'Q6': 'No', 'Q7': 'Yes', 'Q8': 'No', 'Q9': 'Not specified', 'Q10': 'No', 'Q10_followup': 'Diagnostic', 'Q11': 'Not specified', 'Q12': 'No', 'Q13': 'Not specified', 'Q14': 'Not listed', 'Q15': 'No', 'Q16': 'Yes', 'Q17': "Submit the claim with documentation of medical necessity, including the patient's clinical presentation, family history, and prior genetic testing results. Ensure that the test was ordered by a qualified specialist and that genetic counseling was provided if required."}

=== Running for Case2 ===
🔁 Attempt 1 (chatgpt)...
🔁 Attempt 1 (chatgpt)...
🧠 

In [11]:
def merge_qna_jsons_to_csv(folder_path, output_csv_path):
    all_data = []

    for file in os.listdir(folder_path):
        if file.endswith("_qna_result.json"):
            case_id = file.replace("_qna_result.json", "")
            json_path = os.path.join(folder_path, file)

            with open(json_path, "r") as f:
                try:
                    result = json.load(f)
                    flat_result = {"case_id": case_id}

                    for k, v in result.items():
                        if isinstance(v, list):
                            flat_result[k] = "; ".join(map(str, v))
                        else:
                            flat_result[k] = v

                    all_data.append(flat_result)
                except Exception as e:
                    print(f"❗ Failed to parse {file}: {e}")

    if all_data:
        df = pd.DataFrame(all_data)
        df.to_csv(output_csv_path, index=False)
        print(f"✅ Merged CSV saved to: {output_csv_path}")
    else:
        print(f"⚠️ No valid QnA result files found in: {folder_path}")

def merge_all_combinations_to_csv(base_dir):
    for folder_name in os.listdir(base_dir):
        folder_path = os.path.join(base_dir, folder_name)
        if os.path.isdir(folder_path):
            output_csv = os.path.join(base_dir, f"{folder_name}.csv")
            merge_qna_jsons_to_csv(folder_path, output_csv)


merge_all_combinations_to_csv(
    base_dir="/home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End"
)

✅ Merged CSV saved to: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/perplexity_chatgpt_perplexity.csv
✅ Merged CSV saved to: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/perplexity_perplexity_chatgpt.csv
✅ Merged CSV saved to: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/chatgpt_perplexity_perplexity.csv
✅ Merged CSV saved to: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/chatgpt_chatgpt_perplexity.csv
✅ Merged CSV saved to: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/perplexity_chatgpt_chatgpt.csv
✅ Merged CSV saved to: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/chatgpt_chatgpt_chatgpt.csv
✅ Merged CSV saved to: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/chatgpt_perplexity_chatgpt.csv
✅ Merged CSV saved to: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/perplexity_perplexity_perplexity.csv


In [12]:
def load_converted_results_from_folder(folder_path):
    converted_results = {}
    for file in os.listdir(folder_path):
        if file.endswith("_qna_result.json"):
            case_id = file.replace("_qna_result.json", "")
            file_path = os.path.join(folder_path, file)
            with open(file_path, "r") as f:
                try:
                    data = json.load(f)
                except json.JSONDecodeError as e:
                    print(f"❗ JSON decode error in {file}: {e}")
                    continue

                clean = {}
                for k, v in data.items():
                    if k.endswith("_followup"):
                        clean[k] = v if isinstance(v, list) else [v]
                    else:
                        clean[k] = v

                converted_results[case_id] = clean
    return converted_results

In [None]:
ground_truth = {
  "Case1": {
    "Q0": "Whole Exome Sequencing (WES)",
    "Q1": "Yes",
    "Q2": "Yes",
    "Q3": "Yes",
    "Q4": "Yes",
    "Q4_followup": [
      {
        "answer": "Yes",
        "followup": ["ACMG"]
      }
    ],
    "Q5": "Yes",
    "Q6": "No",
    "Q7": "Yes",
    "Q8": "No",
    "Q9": "Yes",
    "Q9_followup": ["Yes"],
    "Q10": "No",
    "Q10_followup": ["Diagnostic"],
    "Q11": "Yes",
    "Q12": "No",
    "Q13": "Yes",
    "Q14": "Yes",
    "Q14_followup": ["81415", "81416"],
    "Q15": "No",
    "Q16": "Yes"
  },
  
    "Case2": {
        "Q0": "Whole Exome Sequencing (WES)",
        "Q1": "Yes",
        "Q2": "No",
        "Q3": "No",
        "Q4": "No",
        "Q5": "Yes",
        "Q6": "No",
        "Q7": "No",
        "Q8": "No",
        "Q9": "Yes",
        "Q9_followup": [
      "No"
    ],
        "Q10": "No",
        "Q10_followup": [
      "Other"
    ],
        "Q11": "Yes",
        "Q12": "No",
        "Q13": "Yes",
        "Q14": "Yes",
        "Q14_followup": [
      "81415", "81416"
    ],
        "Q15": "No",
        "Q16": "No"
  },
  
    "Case3": {
        "Q0": "BRCA1/BRCA2 genetic testing",
        "Q1": "Yes",
        "Q2": "Yes",
        "Q3": "Yes",
        "Q4": "Yes",
        "Q4_followup": [
      {
        "answer": "Yes",
        "followup": ["NCCN"]
      }
    ],
        "Q5": "Yes",
        "Q6": "No",
        "Q7": "No",
        "Q8": "No",
        "Q9": "Yes",
        "Q9_followup": [
      "Yes"
    ],
        "Q10": "No",
        "Q10_followup": [
      "Risk Assessment"
    ],
        "Q11": "Yes",
        "Q12": "No",
        "Q13": "Yes",
        "Q14": "Yes",
        "Q14_followup": [
      "81162"
    ],
        "Q15": "No",
        "Q16": "Yes"
  },
  
    "Case4": {
        "Q0": "BRCA1/BRCA2 genetic testing",
        "Q1": "Yes",
        "Q2": "Yes",
        "Q3": "No",
        "Q4": "No",
        "Q5": "No",
        "Q6": "No",
        "Q7": "No",
        "Q8": "No",
        "Q9": "Yes",
        "Q9_followup": [
      "No"
    ],
        "Q10": "No",
        "Q10_followup": [
      "Risk Assessment"
    ],
        "Q11": "No",
        "Q12": "No",
        "Q13": "Yes",
        "Q14": "Yes",
        "Q14_followup": [
      "81162"
    ],
        "Q15": "No",
        "Q16": "No"
  }
    
}

In [14]:
def evaluate_qna_result(case_id, predicted_result, gold_result, folder_path=None):
    records = []
    correct_count = 0
    total_count = 0

    for qid in gold_result:
        if not qid.startswith("Q") or qid == "policy_url" or qid == "Q17" or "_followup" in qid:
            continue

        pred_answer = predicted_result.get(qid, "")
        gold_answer = gold_result.get(qid, "")

        if isinstance(pred_answer, list):
            pred_answer = ", ".join(pred_answer)
        if isinstance(gold_answer, list):
            gold_answer = ", ".join(gold_answer)

        pred_answer = pred_answer.strip()
        gold_answer = gold_answer.strip()

        is_correct = pred_answer == gold_answer
        score = 1 if is_correct else 0

        records.append({
            "Case": case_id,
            "Question": qid,
            "Predicted": pred_answer,
            "Gold": gold_answer,
            "Score": score
        })

        total_count += 1
        correct_count += score

        followup_key = qid + "_followup"
        pred_followup = predicted_result.get(followup_key, None)
        gold_followup = gold_result.get(followup_key, None)

        if is_correct and gold_followup is not None:
            def normalize(ans):
                if ans is None:
                    return "None"
                if isinstance(ans, list):
                    return ", ".join(ans)
                return ans

            pred_followup_norm = normalize(pred_followup)
            gold_followup_norm = normalize(gold_followup)

            followup_score = 1 if pred_followup_norm == gold_followup_norm else 0

            records.append({
                "Case": case_id,
                "Question": followup_key,
                "Predicted": pred_followup_norm,
                "Gold": gold_followup_norm,
                "Score": followup_score
            })

            total_count += 1
            correct_count += followup_score

    accuracy = correct_count / total_count * 100 if total_count > 0 else 0

    records.append({
        "Case": case_id,
        "Question": "TOTAL",
        "Predicted": f"Correct: {correct_count}",
        "Gold": f"Incorrect: {total_count - correct_count}",
        "Score": f"Accuracy: {accuracy:.2f}%"
    })

    df = pd.DataFrame(records)

    # ✅ Save CSV if folder_path is given
    if folder_path:
        os.makedirs(folder_path, exist_ok=True)
        csv_path = os.path.join(folder_path, f"evaluation_{case_id}.csv")
        df.to_csv(csv_path, index=False)
        print(f"📄 Saved evaluation to {csv_path}")

    return df, accuracy

In [20]:
def evaluate_all_folders_with_summary(base_dir, gold_answers, summary_output_csv):
    eval_output_dir = os.path.join(base_dir, "Evaluation")
    os.makedirs(eval_output_dir, exist_ok=True)

    summary_records = []

    for folder_name in os.listdir(base_dir):
        folder_path = os.path.join(base_dir, folder_name)
        if not os.path.isdir(folder_path):
            continue

        print(f"\n📊 Evaluating folder: {folder_name}")

        converted_results = load_converted_results_from_folder(folder_path)
        all_dfs = []
        accuracies = []

        for case_id, pred_result in converted_results.items():
            gold_result = gold_answers.get(case_id)
            if gold_result is None:
                continue

            df_case, acc = evaluate_qna_result(case_id, pred_result, gold_result)
            all_dfs.append(df_case)
            accuracies.append(acc)

        if all_dfs:
            merged_df = pd.concat(all_dfs, ignore_index=True)
            eval_csv_path = os.path.join(eval_output_dir, f"{folder_name}.csv")
            merged_df.to_csv(eval_csv_path, index=False)
            print(f"✅ Saved: {eval_csv_path}")

            
            summary_records.append({
                "Model_Combination": folder_name,
                "Mean_Accuracy": f"{sum(accuracies)/len(accuracies):.2f}%" if accuracies else "N/A"
            })

    
    if summary_records:
        summary_df = pd.DataFrame(summary_records)
        os.makedirs(os.path.dirname(summary_output_csv), exist_ok=True)
        summary_df.to_csv(summary_output_csv, index=False)
        print(f"\n✅ Summary saved to: {summary_output_csv}")
        print(summary_df)

In [21]:
evaluate_all_folders_with_summary(
    base_dir="/home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End",
    gold_answers=ground_truth,
    summary_output_csv="/home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/Evaluation/summary_accuracy.csv"
)



📊 Evaluating folder: perplexity_chatgpt_perplexity
✅ Saved: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/Evaluation/perplexity_chatgpt_perplexity.csv

📊 Evaluating folder: perplexity_perplexity_chatgpt
✅ Saved: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/Evaluation/perplexity_perplexity_chatgpt.csv

📊 Evaluating folder: Evaluation

📊 Evaluating folder: chatgpt_perplexity_perplexity
✅ Saved: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/Evaluation/chatgpt_perplexity_perplexity.csv

📊 Evaluating folder: chatgpt_chatgpt_perplexity
✅ Saved: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/Evaluation/chatgpt_chatgpt_perplexity.csv

📊 Evaluating folder: perplexity_chatgpt_chatgpt
✅ Saved: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/Evaluation/perplexity_chatgpt_chatgpt.csv

📊 Evaluating folder: chatgpt_chatgpt_chatgpt
✅ Saved: /home/cptaswadu/RESCUE-n8n/insurance/results/LLM_QnA/End-To-End/Evaluation