In [1]:
import pandas as pd
import minsearch
import dotenv

dotenv.load_dotenv('../.env')

True

## Ingestion

In [2]:
df = pd.read_csv('../data/cms_faq.csv')

In [3]:
documents = df.to_dict(orient='records')

In [10]:
index = minsearch.Index(
    text_fields=['category', 'question', 'answer'],
    keyword_fields=['id']
    )

In [11]:
index.fit(documents)

<minsearch.minsearch.Index at 0x7d20833389d0>

## RAG flow

In [12]:
from openai import OpenAI
client = OpenAI()

In [13]:
def search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [None]:

prompt_template = """
You're CaliCoverage. an assistant that answers questions about California health coverage eligibility.
(Medi-Cal = California’s Medicaid; Covered California = the state ACA marketplace.)

Answer the QUESTION using only the facts from the CONTEXT.
If the CONTEXT doesn’t contain the answer, say “I don’t know based on the provided context.”
If key details are missing, briefly state what’s needed (e.g., CA residency, household size, yearly income, immigration/residency status).
Be concise and define any U.S.-specific jargon the first time it appears.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

entry_template = """
category: {category}
question: {question}
answer: {answer}
source_url: {page_url}
anchor_id: {anchor_id}
""".strip()

def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [15]:
def llm(prompt, model='gpt-4o-mini'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [16]:
def rag(query, model='gpt-4o-mini'):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    #print(prompt)
    answer = llm(prompt, model=model)
    return answer

In [17]:
question = 'Is the Lat Pulldown considered a strength training activity, and if so, why?'
answer = rag(question)
print(answer)

I don’t know based on the provided context.


In [18]:
question = 'I lost my job and my income is now $0. Am I eligible for Medi-Cal?'
answer = rag(question)
print(answer)

Based on the provided context, to determine your eligibility for Medi-Cal after losing your job and having an income of $0, I would need your California residency status, household size, and any other relevant details regarding your immigration or residency status. However, Medi-Cal does provide coverage for low-income individuals, which may include someone with an income of $0. If you meet the necessary eligibility requirements, you can apply for Medi-Cal at any time.
