# Installation

In [1]:
!pip install langchain faiss-cpu sentence-transformers -U langchain-community torch langdetect

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-4.0.2-py3-none-any.whl.metadata (13 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting nvidia-c

# Import Libraries

In [3]:
import warnings
from transformers.utils import logging as hf_logging

# Suppress transformer and user warnings.
hf_logging.set_verbosity_error()
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning, module="huggingface_hub.file_download")


import pandas as pd
import random
import re

# For embeddings and vector search.
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Transformer pipelines.
from transformers import pipeline

# For language detection.
from langdetect import detect, DetectorFactory
DetectorFactory.seed = 0  # Ensure consistent language detection

# For geolocation and distance calculation.
from geopy.geocoders import Nominatim
from geopy.distance import geodesic

# Data Ingestion

In [4]:
# Load provider metadata from CSV.
df = pd.read_csv("/content/Mock_Specialist_Dataset.csv")
texts, metadatas = [], []
for _, row in df.iterrows():
    doc_text = (
        f"Specialist profile for {row['name']}. Specialty: {row['specialty']}. Location: {row['location']}. "
        f"Insurance accepted: {row['insurance_accepted']}. Availability next 3 days: {row['availability_next_3_days']}. "
        f"Profile summary: {row['profile_summary']}"
    )
    texts.append(doc_text)
    metadatas.append(row.to_dict())

In [5]:
texts

['Specialist profile for Dr. Aarti Mehta. Specialty: Cardiologist. Location: New Haven, CT. Insurance accepted: BlueCross, Aetna. Availability next 3 days: 3. Profile summary: Board-certified cardiologist with 10 years of experience in treating arrhythmia and heart failure.',
 'Specialist profile for Dr. James Wright. Specialty: Pulmonologist. Location: Stamford, CT. Insurance accepted: Aetna, Cigna. Availability next 3 days: 0. Profile summary: Specialist in respiratory and lung disorders including COPD and asthma. 15+ years in pulmonary care.',
 'Specialist profile for Dr. Leena Kapoor. Specialty: Neurologist. Location: Hartford, CT. Insurance accepted: United, BlueCross. Availability next 3 days: 2. Profile summary: Neurodiagnostics expert focusing on dizziness, migraines, and stroke recovery.',
 'Specialist profile for Dr. Brian Choi. Specialty: Internal Medicine. Location: Bridgeport, CT. Insurance accepted: Medicare, Aetna. Availability next 3 days: 5. Profile summary: General pr

In [6]:
metadatas

[{'specialist_id': 1,
  'name': 'Dr. Aarti Mehta',
  'specialty': 'Cardiologist',
  'location': 'New Haven, CT',
  'insurance_accepted': 'BlueCross, Aetna',
  'availability_next_3_days': 3,
  'profile_summary': 'Board-certified cardiologist with 10 years of experience in treating arrhythmia and heart failure.'},
 {'specialist_id': 2,
  'name': 'Dr. James Wright',
  'specialty': 'Pulmonologist',
  'location': 'Stamford, CT',
  'insurance_accepted': 'Aetna, Cigna',
  'availability_next_3_days': 0,
  'profile_summary': 'Specialist in respiratory and lung disorders including COPD and asthma. 15+ years in pulmonary care.'},
 {'specialist_id': 3,
  'name': 'Dr. Leena Kapoor',
  'specialty': 'Neurologist',
  'location': 'Hartford, CT',
  'insurance_accepted': 'United, BlueCross',
  'availability_next_3_days': 2,
  'profile_summary': 'Neurodiagnostics expert focusing on dizziness, migraines, and stroke recovery.'},
 {'specialist_id': 4,
  'name': 'Dr. Brian Choi',
  'specialty': 'Internal Medi

# Vector Embedding

In [7]:
# Embeddings & Vector Store
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_texts(texts, embeddings, metadatas=metadatas)
print("Vector store created successfully.")

# Intent Classification
candidate_specialties = [
    "Cardiologist", "Pulmonologist", "Neurologist",
    "General Practitioner", "Orthopedic", "Dermatologist"
]
intent_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


Vector store created successfully.


# RAG Pipeline

In [10]:
# === Utilities ===

def is_valid_input(text: str) -> bool:
    """Check if input text is a valid English description with enough content."""
    if not isinstance(text, str) or len(text.strip()) < 8:
        return False
    words = re.findall(r'\b[a-zA-Z]{2,}\b', text.lower())
    if len(words) < 2:
        return False
    try:
        return detect(text) == 'en'
    except:
        return False

def is_valid_location(location_str):
    """Check if the given location string returns valid coordinates."""
    try:
        return get_coordinates(location_str) is not None
    except:
        return False

def is_valid_insurance(insurance_input):
    """Validate insurance against known insurance options in dataset."""
    all_insurances = set()
    for ins_list in df["insurance_accepted"]:
        all_insurances.update([i.strip().lower() for i in ins_list.split(",")])
    return insurance_input.lower() in all_insurances

# === Classification ===

def log_specialty_confidence_scores(patient_query: str, labels: list) -> dict:
    """Use zero-shot classification to determine specialty from patient query."""
    if not is_valid_input(patient_query):
        return {"error": "Invalid input: Please describe your symptoms in more detail.", "raw_scores": [], "top_label": None, "top_2": [], "top_gap": 0.0}
    hypothesis = "The patient might need a {}."
    result = intent_classifier(patient_query, candidate_labels=labels, hypothesis_template=hypothesis)
    return {
        "raw_scores": list(zip(result["labels"], result["scores"])),
        "top_label": result["labels"][0],
        "top_2": result["labels"][:2],
        "top_gap": abs(result["scores"][0] - result["scores"][1])
    }

# === Location and Proximity ===

geolocator = Nominatim(user_agent="specialist_locator", timeout=5)

def get_coordinates(location_str: str):
    """Return latitude and longitude for a location string."""
    try:
        location = geolocator.geocode(location_str)
        if location:
            return (location.latitude, location.longitude)
    except:
        pass
    return None

def calculate_proximity_score(provider_location: str, patient_location: str, max_threshold_km=100.0):
    """Score based on proximity: 1.0 at 0km, linearly decreasing to 0 at 100km."""
    provider_coords = get_coordinates(provider_location)
    patient_coords = get_coordinates(patient_location)
    if provider_coords and patient_coords:
        dist = geodesic(provider_coords, patient_coords).km
        score = max(0, 1 - (dist / max_threshold_km))
        return round(score, 2), round(dist, 2)
    return 0.0, None

# === Scoring ===

def update_provider_metadata(metadata: dict):
    """Stub for any metadata transformation if needed."""
    return metadata

def calculate_scores(candidate_metadata, valid_specialties, patient_insurance, patient_location):
    """Compute composite score based on insurance, availability, and proximity."""
    updated_metadata = update_provider_metadata(candidate_metadata)
    insurances = [x.strip().lower() for x in updated_metadata.get("insurance_accepted", "").split(',')]
    insurance_score = 1 if patient_insurance.lower() in insurances else 0
    availability = updated_metadata.get("availability_next_3_days", 0)
    availability_score = max(0, (10 - availability) / 10) if availability > 0 else 0
    proximity_score, distance = calculate_proximity_score(updated_metadata.get("location", ""), patient_location)
    composite = round(0.5 * insurance_score + 0.3 * availability_score + 0.2 * proximity_score, 2)
    return updated_metadata, {
        "insurance_score": insurance_score,
        "availability_score": availability_score,
        "proximity_score": proximity_score,
        "composite_score": composite,
        "distance_km": distance
    }

# === Explanation ===

explanation_generator = pipeline("summarization", model="facebook/bart-large-cnn", pad_token_id=50256)

def generate_explanation(details: dict, candidate_metadata: dict):
    """Create short summary explaining why doctor was recommended."""
    input_text = (
        f"Insurance: {details['insurance_score']}, "
        f"Availability: {details['availability_score']:.2f}, "
        f"Proximity: {details['proximity_score']:.2f}. "
        f"Doctor's profile: {candidate_metadata['profile_summary']}"
    )
    try:
        if len(input_text.split()) < 50:
            return input_text
        max_len = min(64, int(len(input_text.split()) * 0.6))
        output = explanation_generator(input_text, max_length=max_len, min_length=20, truncation=True, do_sample=False)[0]
        summary = output.get("summary_text", input_text).strip()
        if summary.endswith("in") or summary.endswith("and") or len(summary) < 30:
            return input_text
        if not summary.endswith((".", "!", "?")):
            summary += "."
        return summary
    except:
        return input_text


# === Agents ===

def referral_supervisor_agent(matches):
    """Review matches and flag low-confidence cases."""
    for candidate in matches:
        candidate["supervisor_review"] = "Approved" if candidate["composite_score"] >= 0.6 else "Needs Review"
    return matches

def insurance_authorization_agent(matches, patient_insurance):
    """Add insurance authorization info based on patient's insurance."""
    for candidate in matches:
        accepted_insurances = [ins.strip() for ins in candidate["metadata"].get("insurance_accepted", "").split(",")]
        patient_ins = patient_insurance.strip().lower()
        if patient_ins in [ins.lower() for ins in accepted_insurances]:
            candidate["insurance_auth_status"] = "Pre-Authorized"
        else:
            accepted_list = ", ".join(accepted_insurances)
            candidate["insurance_auth_status"] = (
                f"Your insurance '{patient_insurance}' is not accepted by this doctor. Please register with one of: {accepted_list}"
            )
    return matches


# === Matching ===

def get_ranked_specialists_with_reflection(patient_query, specialty_result, vector_store, patient_insurance, patient_location, confidence_threshold=0.6):
    """Find and rank top specialists based on patient query with fallback reflection."""
    recommended_specialties = [specialty_result["top_label"]]
    if specialty_result["top_gap"] < 0.07:
        recommended_specialties.append(specialty_result["top_2"][1])

    # Initial query boost
    boosted_query = f"{patient_query}. Seeking help from a {recommended_specialties[0]}"
    search_results = vector_store.similarity_search(boosted_query, k=5)

    def rank_all(search_results):
        candidates = []
        for doc in search_results:
            updated_metadata, scores = calculate_scores(doc.metadata, recommended_specialties, patient_insurance, patient_location)
            explanation = generate_explanation(scores, updated_metadata)
            candidates.append({
                "name": updated_metadata.get("name", "Unknown"),
                "specialty": updated_metadata.get("specialty", "Unknown"),
                "composite_score": scores["composite_score"],
                "distance_km": scores["distance_km"],
                "explanation": explanation,
                "metadata": updated_metadata
            })
        return sorted(candidates, key=lambda x: x["composite_score"], reverse=True)

    top_candidates = rank_all(search_results)
    was_reflection_used = False

    # Reflection loop trigger – fallback re-query if all scores are low
    if all(c["composite_score"] < confidence_threshold for c in top_candidates):
        print("Low confidence in top results. Triggering reflection loop with rephrased query...")
        refined_query = f"{patient_query}. I need an expert {recommended_specialties[0]} with availability and insurance match."
        refined_results = vector_store.similarity_search(refined_query, k=5)
        top_candidates = rank_all(refined_results)
        was_reflection_used = True

    return top_candidates[:3], was_reflection_used




# === Test Interface ===

def run_test_case(query, patient_insurance="BlueCross", patient_location="New Haven, CT"):
    """End-to-end run for a single test case query."""
    if not query or len(query.strip()) == 0:
        print("--> Please enter a symptom description.")
        return
    if len(patient_insurance.strip()) < 3:
        print("--> Please enter a valid insurance provider.")
        return
    if len(patient_location.strip()) < 3:
        print("--> Please enter a valid location.")
        return

    print(f"\nQuery: {query}")
    print(f"Location: {patient_location} | Insurance: {patient_insurance}")

    specialty_result = log_specialty_confidence_scores(query, candidate_specialties)
    if specialty_result.get("error"):
        print("-->", specialty_result["error"])
        return

    top_matches, reflection_used = get_ranked_specialists_with_reflection(query, specialty_result, vector_store, patient_insurance, patient_location)
    top_matches = referral_supervisor_agent(top_matches)
    top_matches = insurance_authorization_agent(top_matches, patient_insurance)

    print(f"\n---> Reflection Used: {'Yes' if reflection_used else 'No'}")
    for i, match in enumerate(top_matches, 1):
        print(f"\n{i}.{match['name']} ({match['specialty']})")
        print(f"  Score: {match['composite_score']}")
        print(f"  Distance: {match['distance_km']:.2f} km")
        print(f"  Explanation: {match['explanation']}")
        print(f"  Supervisor Review: {match['supervisor_review']}")
        print(f"  Insurance Status: {match['insurance_auth_status']}")

# Sample Test Cases

In [13]:
def batch_test_runner(test_cases):
    print("\nRunning Specialist Recommender on Batch Inputs\n")

    for idx, case in enumerate(test_cases, 1):
        query = case.get("query", "").strip()
        insurance = case.get("insurance", "BlueCross").strip()
        location = case.get("location", "New Haven, CT").strip()

        if not is_valid_input(query):
            print(f"Test Case {idx}: Invalid symptom description.\n")
            continue

        if not is_valid_location(location):
            print(f"Test Case {idx}: Invalid location format.\n")
            continue

        if not is_valid_insurance(insurance):
            print(f"Test Case {idx}: Insurance not found in dataset. Proceeding anyway.\n")

        print(f"\nRunning Test Case {idx}")
        print(f"Query: {query}")
        print(f"Insurance: {insurance}")
        print(f"Location: {location}\n")

        run_test_case(query, patient_insurance=insurance, patient_location=location)

# Example usage
if __name__ == "__main__":
    test_cases = [
        {
            "query": "I’ve been experiencing chest tightness and shortness of breath during exercise.",
            "insurance": "BlueCross",
            "location": "New Haven, CT"
        },
        {
            "query": "I often feel dizzy and have trouble concentrating during work hours.",
            "insurance": "United",
            "location": "Hartford, CT"
        },
        {
            "query": "My heart races randomly, even while I’m resting or watching TV.",
            "insurance": "Aetna",
            "location": "Stamford, CT"
        },
        {
            "query": "I'm dealing with a persistent cough and trouble breathing at night.",
            "insurance": "Cigna",
            "location": "Stamford, CT"
        },
        {
            "query": "I feel chest flutters while jogging and get lightheaded afterward.",
            "insurance": "BlueCross",
            "location": "Norwalk, CT"
        },
        {
            "query": "I've had migraines that come with vision issues and confusion lately.",
            "insurance": "United",
            "location": "Hartford, CT"
        },
        {
            "query": "I've been struggling with high blood pressure and chest pressure.",
            "insurance": "Aetna",
            "location": "New Haven, CT"
        },
        {
            "query": "I have fatigue, body chills, and recurring infections.",
            "insurance": "Cigna",
            "location": "Stamford, CT"
        },
        {
            "query": "My glucose levels fluctuate and I feel tired most of the time.",
            "insurance": "Medicare",
            "location": "Bridgeport, CT"
        },
        {
            "query": "I feel pressure in my chest that worsens with exertion and feel dizzy sometimes.",
            "insurance": "BlueCross",
            "location": "Norwalk, CT"
        }
    ]

    batch_test_runner(test_cases)


Running Specialist Recommender on Batch Inputs


Running Test Case 1
Query: I’ve been experiencing chest tightness and shortness of breath during exercise.
Insurance: BlueCross
Location: New Haven, CT


Query: I’ve been experiencing chest tightness and shortness of breath during exercise.
Location: New Haven, CT | Insurance: BlueCross

---> Reflection Used: No

1.Dr. Aarti Mehta (Cardiologist)
  Score: 0.91
  Distance: 0.00 km
  Explanation: Insurance: 1, Availability: 0.70, Proximity: 1.00. Doctor's profile: Board-certified cardiologist with 10 years of experience in treating arrhythmia and heart failure.
  Supervisor Review: Approved
  Insurance Status: Pre-Authorized

2.Dr. Sophia Martinez (Cardiologist)
  Score: 0.88
  Distance: 45.69 km
  Explanation: Insurance: 1, Availability: 0.90, Proximity: 0.54. Doctor's profile: Cardiologist specializing in exertional chest pain, athletic heart monitoring, and hypertension management.
  Supervisor Review: Approved
  Insurance Status: Pre-A

#  Sample Test Case with 0.95 threshold(Reflection Loop)

In [9]:
def batch_test_runner(test_cases):
    print("\nRunning Specialist Recommender on Batch Inputs\n")

    for idx, case in enumerate(test_cases, 1):
        query = case.get("query", "").strip()
        insurance = case.get("insurance", "BlueCross").strip()
        location = case.get("location", "New Haven, CT").strip()

        if not is_valid_input(query):
            print(f"Test Case {idx}: Invalid symptom description.\n")
            continue

        if not is_valid_location(location):
            print(f"Test Case {idx}: Invalid location format.\n")
            continue

        if not is_valid_insurance(insurance):
            print(f"Test Case {idx}: Insurance not found in dataset. Proceeding anyway.\n")

        print(f"\nRunning Test Case {idx}")
        print(f"Query: {query}")
        print(f"Insurance: {insurance}")
        print(f"Location: {location}\n")

        run_test_case(query, patient_insurance=insurance, patient_location=location)

if __name__ == "__main__":
    test_cases = [{
    "query": "I feel unwell sometimes, but it's hard to describe. Maybe it's just stress?",
    "insurance": "BlueCross",
    "location": "New Haven, CT"
},
{
    "query": "My legs hurt when I walk, and I get tired easily but don’t know what’s wrong.",
    "insurance": "Aetna",
    "location": "Stamford, CT"
},
{
    "query": "Occasionally I get a mild headache and I just feel off — nothing specific.",
    "insurance": "United",
    "location": "Hartford, CT"
}]
    batch_test_runner(test_cases)


Running Specialist Recommender on Batch Inputs


Running Test Case 1
Query: I feel unwell sometimes, but it's hard to describe. Maybe it's just stress?
Insurance: BlueCross
Location: New Haven, CT


Query: I feel unwell sometimes, but it's hard to describe. Maybe it's just stress?
Location: New Haven, CT | Insurance: BlueCross
Low confidence in top results. Triggering reflection loop with rephrased query...

---> Reflection Used: Yes

1.Dr. Aarti Mehta (Cardiologist)
  Score: 0.91
  Distance: 0.00 km
  Explanation: Insurance: 1, Availability: 0.70, Proximity: 1.00. Doctor's profile: Board-certified cardiologist with 10 years of experience in treating arrhythmia and heart failure.
  Supervisor Review: Approved
  Insurance Status: Pre-Authorized

2.Dr. Sophia Martinez (Cardiologist)
  Score: 0.88
  Distance: 45.69 km
  Explanation: Insurance: 1, Availability: 0.90, Proximity: 0.54. Doctor's profile: Cardiologist specializing in exertional chest pain, athletic heart monitoring, and hyper

# Invalid Test Cases

In [14]:
def batch_test_runner(test_cases):
    print("\nRunning Specialist Recommender on Batch Inputs\n")

    for idx, case in enumerate(test_cases, 1):
        query = case.get("query", "").strip()
        insurance = case.get("insurance", "BlueCross").strip()
        location = case.get("location", "New Haven, CT").strip()

        if not is_valid_input(query):
            print(f"Test Case {idx}: Invalid symptom description.\n")
            continue

        if not is_valid_location(location):
            print(f"Test Case {idx}: Invalid location format.\n")
            continue

        if not is_valid_insurance(insurance):
            print(f"Test Case {idx}: Insurance not found in dataset. Proceeding anyway.\n")

        print(f"\nRunning Test Case {idx}")
        print(f"Query: {query}")
        print(f"Insurance: {insurance}")
        print(f"Location: {location}\n")

        run_test_case(query, patient_insurance=insurance, patient_location=location)

# Example usage
if __name__ == "__main__":
    test_cases = [
    {
        "query": "Help",  # Too short, invalid input
        "insurance": "BlueCross",
        "location": "New Haven, CT"
    },
    {
        "query": "I feel dizzy after running and experience heavy breathing.",
        "insurance": "XYZHealth",  # Not in dataset
        "location": "Hartford, CT"
    },
    {
        "query": "My head feels foggy, and sometimes I lose focus.",
        "insurance": "Aetna",
        "location": "Atlantis"  # Invalid location
    },
    {
        "query": "これは日本語のテキストです。胸が痛いです。",  # Non-English input
        "insurance": "Cigna",
        "location": "Stamford, CT"
    },
    {
        "query": "gdjyfytfhmcfycmk",  # Junky, misspelled input
        "insurance": "Medicare",
        "location": "Bridgeport, CT"
    }
]

    batch_test_runner(test_cases)


Running Specialist Recommender on Batch Inputs

Test Case 1: Invalid symptom description.

Test Case 2: Insurance not found in dataset. Proceeding anyway.


Running Test Case 2
Query: I feel dizzy after running and experience heavy breathing.
Insurance: XYZHealth
Location: Hartford, CT


Query: I feel dizzy after running and experience heavy breathing.
Location: Hartford, CT | Insurance: XYZHealth

---> Reflection Used: Yes

1.Dr. Leena Kapoor (Neurologist)
  Score: 0.44
  Distance: 0.00 km
  Explanation: Insurance: 0, Availability: 0.80, Proximity: 1.00. Doctor's profile: Neurodiagnostics expert focusing on dizziness, migraines, and stroke recovery.
  Supervisor Review: Needs Review
  Insurance Status: Your insurance 'XYZHealth' is not accepted by this doctor. Please register with one of: United, BlueCross

2.Dr. Aarti Mehta (Cardiologist)
  Score: 0.3
  Distance: 54.32 km
  Explanation: Insurance: 0, Availability: 0.70, Proximity: 0.46. Doctor's profile: Board-certified cardiologist

# Dynamic Real-time API Availability(Demo)

In [11]:
# === Real-time Integration Placeholder ===

def fetch_real_time_availability(specialist_id: int):
    """
    Placeholder for real-time availability API integration.

    This function is intended to fetch live availability for a given specialist.
    In a production environment, this would call a RESTful API or access a live scheduling database.

    Example:
        import requests
        try:
            response = requests.get(f"https://api.hospital.com/availability/{specialist_id}")
            if response.status_code == 200:
                data = response.json()
                return data.get("available_slots", 0)
        except Exception as e:
            print(f"API Error for specialist {specialist_id}: {e}")
            return None

    For testing/demo purposes, this function can simulate real-time values.
    """

    # TODO: Integrate live availability feed

    # Simulated fallback mode: return None to use static availability
    # return None

    # Simulated live mode: uncomment the line below to test dynamic scoring
    import random
    return random.randint(0, 10)  # simulate 0 to 10 available slots


In [14]:
# === Utilities ===

def is_valid_input(text: str) -> bool:
    """Check if input text is a valid English description with enough content."""
    if not isinstance(text, str) or len(text.strip()) < 8:
        return False
    words = re.findall(r'\b[a-zA-Z]{2,}\b', text.lower())
    if len(words) < 2:
        return False
    try:
        return detect(text) == 'en'
    except:
        return False

def is_valid_location(location_str):
    """Check if the given location string returns valid coordinates."""
    try:
        return get_coordinates(location_str) is not None
    except:
        return False

def is_valid_insurance(insurance_input):
    """Validate insurance against known insurance options in dataset."""
    all_insurances = set()
    for ins_list in df["insurance_accepted"]:
        all_insurances.update([i.strip().lower() for i in ins_list.split(",")])
    return insurance_input.lower() in all_insurances


# === Classification ===

def log_specialty_confidence_scores(patient_query: str, labels: list) -> dict:
    """Use zero-shot classification to determine specialty from patient query."""
    if not is_valid_input(patient_query):
        return {"error": "Invalid input: Please describe your symptoms in more detail.", "raw_scores": [], "top_label": None, "top_2": [], "top_gap": 0.0}
    hypothesis = "The patient might need a {}."
    result = intent_classifier(patient_query, candidate_labels=labels, hypothesis_template=hypothesis)
    return {
        "raw_scores": list(zip(result["labels"], result["scores"])),
        "top_label": result["labels"][0],
        "top_2": result["labels"][:2],
        "top_gap": abs(result["scores"][0] - result["scores"][1])
    }


# === Location and Proximity ===

geolocator = Nominatim(user_agent="specialist_locator", timeout=5)

def get_coordinates(location_str: str):
    """Return latitude and longitude for a location string."""
    try:
        location = geolocator.geocode(location_str)
        if location:
            return (location.latitude, location.longitude)
    except:
        pass
    return None

def calculate_proximity_score(provider_location: str, patient_location: str, max_threshold_km=100.0):
    """Score based on proximity: 1.0 at 0km, linearly decreasing to 0 at 100km."""
    provider_coords = get_coordinates(provider_location)
    patient_coords = get_coordinates(patient_location)
    if provider_coords and patient_coords:
        dist = geodesic(provider_coords, patient_coords).km
        score = max(0, 1 - (dist / max_threshold_km))
        return round(score, 2), round(dist, 2)
    return 0.0, None


# === Scoring ===

def update_provider_metadata(metadata: dict):
    """Stub for any metadata transformation if needed."""
    return metadata

def calculate_scores(candidate_metadata, valid_specialties, patient_insurance, patient_location):
    """Compute composite score based on insurance, availability, and proximity."""
    updated_metadata = update_provider_metadata(candidate_metadata)
    insurances = [x.strip().lower() for x in updated_metadata.get("insurance_accepted", "").split(',')]
    insurance_score = 1 if patient_insurance.lower() in insurances else 0

    # ⏱️ Use real-time availability if available
    specialist_id = updated_metadata.get("specialist_id", -1)
    live_availability = fetch_real_time_availability(specialist_id)
    availability = live_availability if live_availability is not None else updated_metadata.get("availability_next_3_days", 0)

    availability_score = max(0, (10 - availability) / 10) if availability > 0 else 0
    proximity_score, distance = calculate_proximity_score(updated_metadata.get("location", ""), patient_location)

    composite = round(0.5 * insurance_score + 0.3 * availability_score + 0.2 * proximity_score, 2)
    return updated_metadata, {
        "insurance_score": insurance_score,
        "availability_score": availability_score,
        "proximity_score": proximity_score,
        "composite_score": composite,
        "distance_km": distance
    }


# === Explanation ===

explanation_generator = pipeline("summarization", model="facebook/bart-large-cnn", pad_token_id=50256)

def generate_explanation(details: dict, candidate_metadata: dict):
    """Create short summary explaining why doctor was recommended."""
    input_text = (
        f"Insurance: {details['insurance_score']}, "
        f"Availability: {details['availability_score']:.2f}, "
        f"Proximity: {details['proximity_score']:.2f}. "
        f"Doctor's profile: {candidate_metadata['profile_summary']}"
    )
    try:
        if len(input_text.split()) < 50:
            return input_text
        max_len = min(64, int(len(input_text.split()) * 0.6))
        output = explanation_generator(input_text, max_length=max_len, min_length=20, truncation=True, do_sample=False)[0]
        summary = output.get("summary_text", input_text).strip()
        if summary.endswith("in") or summary.endswith("and") or len(summary) < 30:
            return input_text
        if not summary.endswith((".", "!", "?")):
            summary += "."
        return summary
    except:
        return input_text


# === Agents ===

def referral_supervisor_agent(matches):
    """Review matches and flag low-confidence cases."""
    for candidate in matches:
        candidate["supervisor_review"] = "Approved" if candidate["composite_score"] >= 0.6 else "Needs Review"
    return matches

def insurance_authorization_agent(matches, patient_insurance):
    """Add insurance authorization info based on patient's insurance."""
    for candidate in matches:
        accepted_insurances = [ins.strip() for ins in candidate["metadata"].get("insurance_accepted", "").split(",")]
        patient_ins = patient_insurance.strip().lower()
        if patient_ins in [ins.lower() for ins in accepted_insurances]:
            candidate["insurance_auth_status"] = "Pre-Authorized"
        else:
            accepted_list = ", ".join(accepted_insurances)
            candidate["insurance_auth_status"] = (
                f"Your insurance '{patient_insurance}' is not accepted by this doctor. Please register with one of: {accepted_list}"
            )
    return matches


# === Matching with Reflection ===

def get_ranked_specialists_with_reflection(patient_query, specialty_result, vector_store, patient_insurance, patient_location, confidence_threshold=0.6):
    """Find and rank top specialists based on patient query with fallback reflection."""
    recommended_specialties = [specialty_result["top_label"]]
    if specialty_result["top_gap"] < 0.07:
        recommended_specialties.append(specialty_result["top_2"][1])

    # Initial query boost
    boosted_query = f"{patient_query}. Seeking help from a {recommended_specialties[0]}"
    search_results = vector_store.similarity_search(boosted_query, k=5)

    def rank_all(search_results):
        candidates = []
        for doc in search_results:
            updated_metadata, scores = calculate_scores(doc.metadata, recommended_specialties, patient_insurance, patient_location)
            explanation = generate_explanation(scores, updated_metadata)
            candidates.append({
                "name": updated_metadata.get("name", "Unknown"),
                "specialty": updated_metadata.get("specialty", "Unknown"),
                "composite_score": scores["composite_score"],
                "distance_km": scores["distance_km"],
                "explanation": explanation,
                "metadata": updated_metadata
            })
        return sorted(candidates, key=lambda x: x["composite_score"], reverse=True)

    top_candidates = rank_all(search_results)
    was_reflection_used = False

    # Reflection loop trigger – fallback re-query if all scores are low
    if all(c["composite_score"] < confidence_threshold for c in top_candidates):
        print("Low confidence in top results. Triggering reflection loop with rephrased query...")
        refined_query = f"{patient_query}. I need an expert {recommended_specialties[0]} with availability and insurance match."
        refined_results = vector_store.similarity_search(refined_query, k=5)
        top_candidates = rank_all(refined_results)
        was_reflection_used = True

    return top_candidates[:3], was_reflection_used


# === Test Interface ===

def run_test_case(query, patient_insurance="BlueCross", patient_location="New Haven, CT"):
    """End-to-end run for a single test case query."""
    if not query or len(query.strip()) == 0:
        print("--> Please enter a symptom description.")
        return
    if len(patient_insurance.strip()) < 3:
        print("--> Please enter a valid insurance provider.")
        return
    if len(patient_location.strip()) < 3:
        print("--> Please enter a valid location.")
        return

    print(f"\nQuery: {query}")
    print(f"Location: {patient_location} | Insurance: {patient_insurance}")

    specialty_result = log_specialty_confidence_scores(query, candidate_specialties)
    if specialty_result.get("error"):
        print("-->", specialty_result["error"])
        return

    top_matches, reflection_used = get_ranked_specialists_with_reflection(query, specialty_result, vector_store, patient_insurance, patient_location)
    top_matches = referral_supervisor_agent(top_matches)
    top_matches = insurance_authorization_agent(top_matches, patient_insurance)

    print(f"\n---> Reflection Used: {'Yes' if reflection_used else 'No'}")
    for i, match in enumerate(top_matches, 1):
        print(f"\n{i}.{match['name']} ({match['specialty']})")
        print(f"  Score: {match['composite_score']}")
        print(f"  Distance: {match['distance_km']:.2f} km")
        print(f"  Explanation: {match['explanation']}")
        print(f"  Supervisor Review: {match['supervisor_review']}")
        print(f"  Insurance Status: {match['insurance_auth_status']}")

In [15]:
def batch_test_runner(test_cases):
    print("\nRunning Specialist Recommender on Batch Inputs\n")

    for idx, case in enumerate(test_cases, 1):
        query = case.get("query", "").strip()
        insurance = case.get("insurance", "BlueCross").strip()
        location = case.get("location", "New Haven, CT").strip()

        if not is_valid_input(query):
            print(f"Test Case {idx}: Invalid symptom description.\n")
            continue

        if not is_valid_location(location):
            print(f"Test Case {idx}: Invalid location format.\n")
            continue

        if not is_valid_insurance(insurance):
            print(f"Test Case {idx}: Insurance not found in dataset. Proceeding anyway.\n")

        print(f"\nRunning Test Case {idx}")
        print(f"Query: {query}")
        print(f"Insurance: {insurance}")
        print(f"Location: {location}\n")

        run_test_case(query, patient_insurance=insurance, patient_location=location)

# Example usage
if __name__ == "__main__":
    test_cases = [
        {
            "query": "I’ve been experiencing chest tightness and shortness of breath during exercise.",
            "insurance": "BlueCross",
            "location": "New Haven, CT"
        },
        {
            "query": "I often feel dizzy and have trouble concentrating during work hours.",
            "insurance": "United",
            "location": "Hartford, CT"
        },
        {
            "query": "I want to check my whole body because sometimes I feel very weak.",
            "insurance": "Aetna",
            "location": "Stamford, CT"
        }

    ]

    batch_test_runner(test_cases)


Running Specialist Recommender on Batch Inputs


Running Test Case 1
Query: I’ve been experiencing chest tightness and shortness of breath during exercise.
Insurance: BlueCross
Location: New Haven, CT


Query: I’ve been experiencing chest tightness and shortness of breath during exercise.
Location: New Haven, CT | Insurance: BlueCross

---> Reflection Used: No

1.Dr. Aarti Mehta (Cardiologist)
  Score: 0.91
  Distance: 0.00 km
  Explanation: Insurance: 1, Availability: 0.70, Proximity: 1.00. Doctor's profile: Board-certified cardiologist with 10 years of experience in treating arrhythmia and heart failure.
  Supervisor Review: Approved
  Insurance Status: Pre-Authorized

2.Dr. Sophia Martinez (Cardiologist)
  Score: 0.73
  Distance: 45.69 km
  Explanation: Insurance: 1, Availability: 0.40, Proximity: 0.54. Doctor's profile: Cardiologist specializing in exertional chest pain, athletic heart monitoring, and hypertension management.
  Supervisor Review: Approved
  Insurance Status: Pre-A