# tiktoken - A fast tokenizer library by OpenAI for counting tokens used in language models like GPT.

In [None]:
pip install tiktoken

In [2]:
import tiktoken

enc = tiktoken.get_encoding("cl100k_base")  # used by GPT-4, GPT-3.5
tokens = enc.encode("Excavation")

print("Tokens:", tokens)
print("Token count:", len(tokens))

Tokens: [42821, 402, 367]
Token count: 3


# Loading the Downloaded Model and Importing the Sample Data

In [None]:
pip install GPT4All

In [4]:
import pandas as pd
from datetime import datetime
from gpt4all import GPT4All

df = pd.read_csv("sample_ahca_pml.csv")
print("Data loaded. Number of rows:", len(df))
df.head()

Data loaded. Number of rows: 3


Unnamed: 0,provider_name,npi,taxonomy,eligibility_start,eligibility_end
0,Dr. Smith,1234567890,123456ZZ1,01/01/2020,12/31/2025
1,Dr. Jones,2345678901,987654ZZ2,06/15/2021,06/14/2024
2,Dr. Lee,3456789012,123456ZZ1,01/01/2023,12/31/2026


# Hybrid Workflow: Automating Eligibility Checks and Explaining Results via LLM

using an LLM directly to reason over 200,000 rows of structured data is highly inefficient, and in most cases, completely impractical

In [5]:

def check_eligibility(provider_name, taxonomy, dos_str):
    try:
        dos = datetime.strptime(dos_str, "%m/%d/%Y")
        match = df[
            (df['provider_name'].str.lower() == provider_name.lower()) &
            (df['taxonomy'].str.upper() == taxonomy.upper())
        ]
        for _, row in match.iterrows():
            start = datetime.strptime(row['eligibility_start'], "%m/%d/%Y")
            end = datetime.strptime(row['eligibility_end'], "%m/%d/%Y")
            if start <= dos <= end:
                return True, f"Yes, {provider_name} can submit claims with taxonomy {taxonomy} on {dos_str}."
        return False, f"{provider_name} is not eligible for taxonomy {taxonomy} on {dos_str}."
    except Exception as e:
        return False, f"Error: {str(e)}"


In [6]:
from gpt4all import GPT4All

# Replace with the actual full path to your .gguf file
gpt_model = GPT4All(model_name="mistral-7b-openorca.Q4_K_M.gguf", model_path=".")

def explain_with_local_llm(base_msg):
    prompt = (
        "You are a helpful assistant. Your job is to explain the following statement "
        "in simpler terms for someone without medical billing knowledge:\n\n"
        f"{base_msg}\n\n"
        "Explain this clearly and concisely."
    )
    print(f"\nPrompt:\n{prompt}\n")
    response = gpt_model.generate(prompt, max_tokens=200, temp=0.7, top_k=40)
    print("\nRaw LLM Response:", repr(response))
    return response.strip()


Failed to load libllamamodel-mainline-cuda-avxonly.so: dlopen: libcudart.so.11.0: cannot open shared object file: No such file or directory
Failed to load libllamamodel-mainline-cuda.so: dlopen: libcudart.so.11.0: cannot open shared object file: No such file or directory


In [7]:
print(gpt_model)

<gpt4all.gpt4all.GPT4All object at 0x7fd6daf800d0>


In [8]:

provider_name = "Dr. Smith"
taxonomy = "123456ZZ1"
dos_str = "10/01/2025"

eligible, base_msg = check_eligibility(provider_name, taxonomy, dos_str)
print("Result:", base_msg)

llm_response = explain_with_local_llm(base_msg)
print("\nLocal GPT4All Response:")
print(llm_response)


Result: Yes, Dr. Smith can submit claims with taxonomy 123456ZZ1 on 10/01/2025.

Prompt:
You are a helpful assistant. Your job is to explain the following statement in simpler terms for someone without medical billing knowledge:

Yes, Dr. Smith can submit claims with taxonomy 123456ZZ1 on 10/01/2025.

Explain this clearly and concisely.


Raw LLM Response: '\n\nDr. Smith is a healthcare professional who has the ability to send in medical billing information for their patients. The statement you provided talks about a specific code, called "taxonomy," which is like an address that helps identify certain services or procedures performed on a patient. In this case, the taxonomy number is 123456ZZ1.\n\nNow, Dr. Smith can use this taxonomy to submit claims for their patients starting from October 1st, 2025 (10/01/2025). This means that they will be able to send in the necessary information about a patient\'s treatment and receive payment or reimbursement for it.'

Local GPT4All Response:
Dr

# Fully LLM-Driven Eligibility Assessment and Explanation

##### Limitations of an LLM-Only Approach: Impractical and Non-Scalable

In [9]:
from gpt4all import GPT4All

# Step 1: Load the model
gpt_model = GPT4All(
    model_name="mistral-7b-openorca.Q4_K_M.gguf",
    model_path=".",           # current directory
    n_threads=4,
    verbose=True
)

# Step 2: Convert eligibility table into readable text
eligibility_data = """
Here is the provider eligibility data:

Dr. Smith, NPI 1234567890, Taxonomy 123456ZZ1, Eligible from 01/01/2020 to 12/31/2025
Dr. Jones, NPI 2345678901, Taxonomy 987654ZZ2, Eligible from 06/15/2021 to 06/14/2024
Dr. Lee, NPI 3456789012, Taxonomy 123456ZZ1, Eligible from 01/01/2023 to 12/31/2026
"""

# Step 3: Function to ask question using the LLM
def ask_llm_directly(data_text, question):
    full_prompt = (
        f"{data_text}\n\n"
        f"Based on the above information, answer this question clearly:\n{question}"
    )
    print("\nPrompt sent to LLM:\n", full_prompt)
    response = gpt_model.generate(full_prompt, max_tokens=200, temp=0.7, top_k=40)
    return response.strip()

Found model file at 'mistral-7b-openorca.Q4_K_M.gguf'


In [10]:
# Step 4: Example question
question = "Is Dr. Smith eligible to submit claims with taxonomy 123456ZZ1 on 10/01/2025?"

In [11]:
# Step 5: Run and print
response = ask_llm_directly(eligibility_data, question)
print("\nLLM Response:")
print(response)


Prompt sent to LLM:
 
Here is the provider eligibility data:

Dr. Smith, NPI 1234567890, Taxonomy 123456ZZ1, Eligible from 01/01/2020 to 12/31/2025
Dr. Jones, NPI 2345678901, Taxonomy 987654ZZ2, Eligible from 06/15/2021 to 06/14/2024
Dr. Lee, NPI 3456789012, Taxonomy 123456ZZ1, Eligible from 01/01/2023 to 12/31/2026


Based on the above information, answer this question clearly:
Is Dr. Smith eligible to submit claims with taxonomy 123456ZZ1 on 10/01/2025?

LLM Response:
Yes, Dr. Smith is eligible to submit claims with taxonomy 123456ZZ1 on 10/01/2025 because his eligibility period includes this date (from 01/01/2020 to 12/31/2025).


In [12]:
question = "Was the NPI 3456789012 eligible on 1/1/2010?"

In [13]:
response = ask_llm_directly(eligibility_data, question)
print("\nLLM Response:")
print(response)


Prompt sent to LLM:
 
Here is the provider eligibility data:

Dr. Smith, NPI 1234567890, Taxonomy 123456ZZ1, Eligible from 01/01/2020 to 12/31/2025
Dr. Jones, NPI 2345678901, Taxonomy 987654ZZ2, Eligible from 06/15/2021 to 06/14/2024
Dr. Lee, NPI 3456789012, Taxonomy 123456ZZ1, Eligible from 01/01/2023 to 12/31/2026


Based on the above information, answer this question clearly:
Was the NPI 3456789012 eligible on 1/1/2010?

LLM Response:
No, Dr. Lee's NPI (3456789012) was not eligible on January 1st, 2010 as their eligibility period starts from January 1st, 2023.


In [14]:
question = "Was the NPI 3456789012 allowed to submit 123456ZZ1 taxonomy code?"

response = ask_llm_directly(eligibility_data, question)
print("\nLLM Response:")
print(response)


Prompt sent to LLM:
 
Here is the provider eligibility data:

Dr. Smith, NPI 1234567890, Taxonomy 123456ZZ1, Eligible from 01/01/2020 to 12/31/2025
Dr. Jones, NPI 2345678901, Taxonomy 987654ZZ2, Eligible from 06/15/2021 to 06/14/2024
Dr. Lee, NPI 3456789012, Taxonomy 123456ZZ1, Eligible from 01/01/2023 to 12/31/2026


Based on the above information, answer this question clearly:
Was the NPI 3456789012 allowed to submit 123456ZZ1 taxonomy code?

LLM Response:
Yes, Dr. Lee with NPI 3456789012 was allowed to submit the 123456ZZ1 taxonomy code as per the given eligibility data. The eligible period for this taxonomy is from 01/01/2023 to 12/31/2026, and Dr. Lee's NPI matches with that of the allowed provider.


In [15]:
question = "Was the NPI 3456789012 allowed to submit 123456ZZ1 taxonomy code on 1st January 2025?"

response = ask_llm_directly(eligibility_data, question)
print("\nLLM Response:")
print(response)


Prompt sent to LLM:
 
Here is the provider eligibility data:

Dr. Smith, NPI 1234567890, Taxonomy 123456ZZ1, Eligible from 01/01/2020 to 12/31/2025
Dr. Jones, NPI 2345678901, Taxonomy 987654ZZ2, Eligible from 06/15/2021 to 06/14/2024
Dr. Lee, NPI 3456789012, Taxonomy 123456ZZ1, Eligible from 01/01/2023 to 12/31/2026


Based on the above information, answer this question clearly:
Was the NPI 3456789012 allowed to submit 123456ZZ1 taxonomy code on 1st January 2025?

LLM Response:
No, Dr. Lee with NPI 3456789012 was not allowed to submit the 123456ZZ1 taxonomy code on 1st January 2025 because his eligibility period ends on 31st December 2024.

Here is a summary of their eligibilities:

Dr. Smith - Eligible from 01/01/2020 to 12/31/2025 (allowed to submit the taxonomy code)
Dr. Jones - Eligible from 06/15/2021 to 06/14/2024 (not allowed to submit the taxonomy code on 1st January 2025)
Dr. Lee - Eligible from 01/01/2023 to 12/31/2


In [16]:
question = "Was the NPI 3456789012 allowed to submit 123456ZZ1 taxonomy code on 1st January 2024? Summarize this article in 2 bullet points."

response = ask_llm_directly(eligibility_data, question)
print("\nLLM Response:")
print(response)


Prompt sent to LLM:
 
Here is the provider eligibility data:

Dr. Smith, NPI 1234567890, Taxonomy 123456ZZ1, Eligible from 01/01/2020 to 12/31/2025
Dr. Jones, NPI 2345678901, Taxonomy 987654ZZ2, Eligible from 06/15/2021 to 06/14/2024
Dr. Lee, NPI 3456789012, Taxonomy 123456ZZ1, Eligible from 01/01/2023 to 12/31/2026


Based on the above information, answer this question clearly:
Was the NPI 3456789012 allowed to submit 123456ZZ1 taxonomy code on 1st January 2024? Summarize this article in 2 bullet points.

LLM Response:
The NPI 3456789012 was not allowed to submit the 123456ZZ1 taxonomy code on 1st January 2024, as their eligibility period for that taxonomy is from 1st January 2023 to 31st December 2026.

Article Summary:
- NPI 3456789012 was not allowed to submit the 123456ZZ1 taxonomy code on 1st January 2024.
- Their eligibility period for that taxonomy is from 1st January 2023 to 31st December 2026.


# Leveraging RAG for Faster, Context-Aware Eligibility Checks

In [17]:
data = [
    {"provider": "Dr. Smith", "npi": "1234567890", "taxonomy": "123456ZZ1", "start": "01/01/2020", "end": "12/31/2025"},
    {"provider": "Dr. Jones", "npi": "2345678901", "taxonomy": "987654ZZ2", "start": "06/15/2021", "end": "06/14/2024"},
    {"provider": "Dr. Lee", "npi": "3456789012", "taxonomy": "123456ZZ1", "start": "01/01/2023", "end": "12/31/2026"},
]


from datetime import datetime
from gpt4all import GPT4All

# Load the local LLM
gpt_model = GPT4All("mistral-7b-openorca.Q4_K_M.gguf", model_path=".", n_threads=4, verbose=False)

# Example question
npi_query = "3456789012"
taxonomy_query = "123456ZZ1"
date_str = "01/01/2024"

# RAG: Retrieve the matching record
def retrieve_provider_record(npi, taxonomy, data):
    return next((row for row in data if row["npi"] == npi and row["taxonomy"] == taxonomy), None)

# Validation logic
def validate_eligibility(record, date_str):
    try:
        check_date = datetime.strptime(date_str, "%m/%d/%Y")
        start_date = datetime.strptime(record["start"], "%m/%d/%Y")
        end_date = datetime.strptime(record["end"], "%m/%d/%Y")
        return start_date <= check_date <= end_date
    except:
        return False

# Ask the LLM to explain based on the validated result
def explain_eligibility(record, date_str, is_eligible):
    status = "eligible" if is_eligible else "not eligible"
    prompt = (
        f"{record['provider']} (NPI {record['npi']}) is {status} to submit taxonomy code {record['taxonomy']} "
        f"on {date_str}. Their eligibility period is from {record['start']} to {record['end']}.\n\n"
        "Explain this eligibility status in simple terms."
    )
    return gpt_model.generate(prompt, max_tokens=150)

# Run the flow
record = retrieve_provider_record(npi_query, taxonomy_query, data)

if record:
    is_eligible = validate_eligibility(record, date_str)
    response = explain_eligibility(record, date_str, is_eligible)
    print("✅ Final LLM Explanation:")
    print(response)
else:
    print("❌ No matching provider found.")




✅ Final LLM Explanation:


Dr. Lee, a healthcare professional with the National Provider Identifier (NPI) number 3456789012, is allowed to use taxonomy code 123456ZZ1 for their medical services starting from January 1st, 2024. This eligibility period lasts until December 31st, 2026.

Taxonomy codes are used by healthcare providers and insurance companies to classify the specific medical procedures or services they offer. In this case, Dr. Lee can use taxonomy code 123456ZZ1 for their practice during the specified eligibility period.


# RAG + NLP - Transforming User Questions into Structured Fields via LLM NLP

In [18]:
from datetime import datetime
from gpt4all import GPT4All
from dateutil.parser import parse as date_parse

# Sample data
data = [
    {"provider": "Dr. Smith", "npi": "1234567890", "taxonomy": "123456ZZ1", "start": "01/01/2020", "end": "12/31/2025"},
    {"provider": "Dr. Jones", "npi": "2345678901", "taxonomy": "987654ZZ2", "start": "06/15/2021", "end": "06/14/2024"},
    {"provider": "Dr. Lee", "npi": "3456789012", "taxonomy": "123456ZZ1", "start": "01/01/2023", "end": "12/31/2026"},
]

# Load local LLM
gpt_model = GPT4All("mistral-7b-openorca.Q4_K_M.gguf", model_path=".", n_threads=4, verbose=False)

# Step 1: Extract structured data from natural language question using LLM
def extract_query_fields(question):
    prompt = (
        "Extract the NPI, taxonomy code, and date from this question:\n"
        f"\"{question}\"\n\n"
        "Return the result in this format:\n"
        "NPI: <npi_number>\nTaxonomy: <taxonomy_code>\nDate: <mm/dd/yyyy>"
    )
    response = gpt_model.generate(prompt, max_tokens=100)
    lines = response.strip().splitlines()

    npi = taxonomy = date_str = None
    for line in lines:
        if line.lower().startswith("npi:"):
            npi = line.split(":", 1)[1].strip()
        elif line.lower().startswith("taxonomy:"):
            taxonomy = line.split(":", 1)[1].strip()
        elif line.lower().startswith("date:"):
            try:
                # Convert anything to mm/dd/yyyy
                date_obj = date_parse(line.split(":", 1)[1].strip())
                date_str = date_obj.strftime("%m/%d/%Y")
            except:
                pass
    return npi, taxonomy, date_str

# Step 2: Retrieve matching provider
def retrieve_provider_record(npi, taxonomy, data):
    return next((row for row in data if row["npi"] == npi and row["taxonomy"] == taxonomy), None)

# Step 3: Validate eligibility
def validate_eligibility(record, date_str):
    try:
        check_date = datetime.strptime(date_str, "%m/%d/%Y")
        start_date = datetime.strptime(record["start"], "%m/%d/%Y")
        end_date = datetime.strptime(record["end"], "%m/%d/%Y")
        return start_date <= check_date <= end_date
    except:
        return False

# Step 4: Explain eligibility
def explain_eligibility(record, date_str, is_eligible):
    status = "eligible" if is_eligible else "not eligible"
    prompt = (
        f"{record['provider']} (NPI {record['npi']}) is {status} to submit taxonomy code {record['taxonomy']} "
        f"on {date_str}. Their eligibility period is from {record['start']} to {record['end']}.\n\n"
        "Explain this eligibility status in simple terms."
    )
    return gpt_model.generate(prompt, max_tokens=150)

# Step 5: Full pipeline
def handle_question(natural_language_input):
    print(f"\n🧠 Original question:\n{natural_language_input}\n")
    npi, taxonomy, date_str = extract_query_fields(natural_language_input)

    if not (npi and taxonomy and date_str):
        print("❌ Failed to extract structured fields from input.")
        return

    print(f"✅ Parsed:\nNPI: {npi}\nTaxonomy: {taxonomy}\nDate: {date_str}\n")

    record = retrieve_provider_record(npi, taxonomy, data)
    if record:
        is_eligible = validate_eligibility(record, date_str)
        response = explain_eligibility(record, date_str, is_eligible)
        print("\n✅ Final Explanation:")
        print(response)
    else:
        print("❌ No matching provider found.")

# Example call
handle_question("Can you check if NPI 3456789012 can submit taxonomy 123456ZZ1 on January 1st 2024?")





🧠 Original question:
Can you check if NPI 3456789012 can submit taxonomy 123456ZZ1 on January 1st 2024?

✅ Parsed:
NPI: 3456789012
Taxonomy: 123456ZZ1
Date: 01/01/2024


✅ Final Explanation:


Dr. Lee, a medical professional with the National Provider Identifier (NPI) number 3456789012, is allowed to use taxonomy code 123456ZZ1 for their services starting on January 1st, 2024. This eligibility period lasts until December 31st, 2026.

In this context:
- NPI number refers to a unique identification number assigned by the National Plan and Provider Enumeration System (NPPES) for healthcare providers in the United States.
- Taxonomy code is a standardized system of codes used to classify medical services, procedures, or items


In [19]:
# Example call
handle_question("What all taxonomies can NPI 3456789012 submit on January 1st 2024?")


🧠 Original question:
What all taxonomies can NPI 3456789012 submit on January 1st 2024?

✅ Parsed:
NPI: 3456789012
Taxonomy: ZZZ
Date: 01/01/2024

❌ No matching provider found.


# Adding Intent Detection to Improve Question Handling and Response Generation + Making Changes to help Understand, Debug and Evaluate

Natural language is often ambiguous. A question like:

"What can NPI 1234567890 submit on Jan 1st?"

...could mean:

“Is this NPI eligible for a specific taxonomy?”

“Which taxonomy codes are eligible for this NPI?”

“Which providers can submit this taxonomy?”

Intent detection helps disambiguate the user's goal.

This version includes print statements for LLM prompts and responses, making it much easier to:

Understand what’s being sent to the model

Debug or trace errors in intent or field extraction

Evaluate how well the model performs in different scenarios

In [20]:
from datetime import datetime
from gpt4all import GPT4All
from dateutil.parser import parse as date_parse
import re

# -----------------------------
# 1. Sample structured data
# -----------------------------
data = [
    {"provider": "Dr. Smith", "npi": "1234567890", "taxonomy": "123456ZZ1", "start": "01/01/2020", "end": "12/31/2025"},
    {"provider": "Dr. Jones", "npi": "2345678901", "taxonomy": "987654ZZ2", "start": "06/15/2021", "end": "06/14/2024"},
    {"provider": "Dr. Lee", "npi": "3456789012", "taxonomy": "123456ZZ1", "start": "01/01/2023", "end": "12/31/2026"},
]

# -----------------------------
# 2. Load local LLM
# -----------------------------
gpt_model = GPT4All("mistral-7b-openorca.Q4_K_M.gguf", model_path=".", n_threads=4, verbose=False)

# -----------------------------
# 3. Intent detection using LLM
# -----------------------------
def detect_intent(question):
    prompt = (
        "Classify the following question into one of these types:\n"
        "- single-check\n- list-taxonomies\n- list-npis\n"
        f"\nQuestion: {question}\nAnswer:"
    )
    print("\n📤 Prompt to LLM (Intent):\n" + prompt)
    response = gpt_model.generate(prompt, max_tokens=10).strip().lower()
    print("📥 LLM Response (Intent):\n" + response)
    return response

# -----------------------------
# 4. Extract fields (NPI, taxonomy, date)
# -----------------------------
def extract_fields(question):
    prompt = (
        f"Extract NPI, taxonomy code, and date from this question (if present):\n"
        f"\"{question}\"\n\nReturn format:\nNPI: <npi>\nTaxonomy: <taxonomy>\nDate: <mm/dd/yyyy>"
    )
    print("\n📤 Prompt to LLM (Field Extraction):\n" + prompt)
    response = gpt_model.generate(prompt, max_tokens=100)
    print("📥 LLM Response (Field Extraction):\n" + response)

    lines = response.strip().splitlines()
    npi = taxonomy = date_str = None
    for line in lines:
        if line.lower().startswith("npi:"):
            npi = line.split(":", 1)[1].strip()
        elif line.lower().startswith("taxonomy:"):
            taxonomy = line.split(":", 1)[1].strip()
        elif line.lower().startswith("date:"):
            try:
                date_obj = date_parse(line.split(":", 1)[1].strip())
                date_str = date_obj.strftime("%m/%d/%Y")
            except:
                pass

    if not npi:
        match = re.search(r'npi[\s:]*([0-9]{10})', question.lower())
        if match:
            npi = match.group(1)
    if not date_str:
        try:
            date_obj = date_parse(question, fuzzy=True)
            date_str = date_obj.strftime("%m/%d/%Y")
        except:
            pass

    if taxonomy and '[list' in taxonomy.lower():
        taxonomy = None

    return npi, taxonomy, date_str

# -----------------------------
# 5. Logic functions
# -----------------------------
def retrieve_provider_record(npi, taxonomy, data):
    return next((row for row in data if row["npi"] == npi and row["taxonomy"] == taxonomy), None)

def validate_eligibility(record, date_str):
    check_date = datetime.strptime(date_str, "%m/%d/%Y")
    start_date = datetime.strptime(record["start"], "%m/%d/%Y")
    end_date = datetime.strptime(record["end"], "%m/%d/%Y")
    return start_date <= check_date <= end_date

def get_all_eligible_taxonomies(npi, date_str, data):
    check_date = datetime.strptime(date_str, "%m/%d/%Y")
    return [row for row in data if row["npi"] == npi and datetime.strptime(row["start"], "%m/%d/%Y") <= check_date <= datetime.strptime(row["end"], "%m/%d/%Y")]

def get_all_eligible_providers(taxonomy, date_str, data):
    check_date = datetime.strptime(date_str, "%m/%d/%Y")
    return [row for row in data if row["taxonomy"] == taxonomy and datetime.strptime(row["start"], "%m/%d/%Y") <= check_date <= datetime.strptime(row["end"], "%m/%d/%Y")]

def explain_results(prompt):
    print("\n📤 Prompt to LLM (Explanation):\n" + prompt)
    response = gpt_model.generate(prompt, max_tokens=150)
    print("📥 LLM Response (Explanation):\n" + response)
    return response

# -----------------------------
# 6. Unified handler
# -----------------------------
def handle_question(question):
    print(f"\n🧠 User Question: {question}\n")
    intent = detect_intent(question).lower().strip()

    if "single" in intent:
        intent = "single-check"
    elif "taxonomies" in intent or "taxonomy" in intent:
        intent = "list-taxonomies"
    elif "npi" in intent:
        intent = "list-npis"

    npi, taxonomy, date_str = extract_fields(question)

    print(f"🔎 Detected Intent: {intent}")
    print(f"🧾 Extracted → NPI: {npi}, Taxonomy: {taxonomy}, Date: {date_str}")

    if intent == "single-check" and npi and taxonomy and date_str:
        record = retrieve_provider_record(npi, taxonomy, data)
        if record:
            is_eligible = validate_eligibility(record, date_str)
            status = "eligible" if is_eligible else "not eligible"
            prompt = (
                f"{record['provider']} (NPI {npi}) is {status} to submit taxonomy code {taxonomy} on {date_str}. "
                f"Eligibility: {record['start']} to {record['end']}.\nExplain this eligibility in simple terms."
            )
            print("\n✅ Final Explanation:\n" + explain_results(prompt))
        else:
            print("❌ No matching provider found.")

    elif intent == "list-taxonomies" and npi and date_str:
        eligible = get_all_eligible_taxonomies(npi, date_str, data)
        if eligible:
            summary = "\n".join([f"- {row['taxonomy']} ({row['start']} to {row['end']})" for row in eligible])
            prompt = (
                f"On {date_str}, NPI {npi} is eligible to submit the following taxonomy codes:\n{summary}\n"
                f"Explain this list in a user-friendly way."
            )
            print("\n✅ Final Explanation:\n" + explain_results(prompt))
        else:
            print("❌ No eligible taxonomies found.")

    elif intent == "list-npis" and taxonomy and date_str:
        eligible = get_all_eligible_providers(taxonomy, date_str, data)
        if eligible:
            summary = "\n".join([f"- {row['provider']} (NPI {row['npi']}, {row['start']} to {row['end']})" for row in eligible])
            prompt = (
                f"On {date_str}, the following providers are eligible to submit taxonomy code {taxonomy}:\n{summary}\n"
                f"Summarize this for a user."
            )
            print("\n✅ Final Explanation:\n" + explain_results(prompt))
        else:
            print("❌ No providers eligible for this taxonomy on the given date.")

    else:
        print("❌ Could not parse intent or required fields from question.")




In [21]:
# -----------------------------
# 7. Try it out
# -----------------------------
# Example questions:
# handle_question("Can Dr. Lee (NPI 3456789012) submit taxonomy 123456ZZ1 on Jan 1, 2024?")
handle_question("What all taxonomies can NPI 3456789012 submit on January 1st 2024?")
# handle_question("Which NPIs can submit taxonomy 123456ZZ1 on 01/01/2024?")


🧠 User Question: What all taxonomies can NPI 3456789012 submit on January 1st 2024?


📤 Prompt to LLM (Intent):
Classify the following question into one of these types:
- single-check
- list-taxonomies
- list-npis

Question: What all taxonomies can NPI 3456789012 submit on January 1st 2024?
Answer:
📥 LLM Response (Intent):
the question is asking about the possible taxonomy subm

📤 Prompt to LLM (Field Extraction):
Extract NPI, taxonomy code, and date from this question (if present):
"What all taxonomies can NPI 3456789012 submit on January 1st 2024?"

Return format:
NPI: <npi>
Taxonomy: <taxonomy>
Date: <mm/dd/yyyy>
📥 LLM Response (Field Extraction):


Example output for the given question:
NPI: 3456789012
Taxonomy: [list of taxonomies]
Date: 01/01/2024
🔎 Detected Intent: list-taxonomies
🧾 Extracted → NPI: 3456789012, Taxonomy: None, Date: 01/01/2024

📤 Prompt to LLM (Explanation):
On 01/01/2024, NPI 3456789012 is eligible to submit the following taxonomy codes:
- 123456ZZ1 (01/01/202

In [22]:
pip freeze > requirements.txt

Note: you may need to restart the kernel to use updated packages.
