In [1]:
import json

In [2]:
with open('st_johns_faq.json', 'rt') as f_in:
    docs_raw = json.load(f_in)

In [3]:
documents = []

for organization in docs_raw:
    for doc in organization['documents']:
        doc['organization'] = organization['organization']
        documents.append(doc)

In [4]:
documents[1]

{'text': 'Donors are critical to our work here at Hato Hone St John. Without them, we simply wouldn’t be able to help as many people as we do. Join Team Green: You can join team of dedicated donors who provide critical monthly backup to Hato Hone St John frontline services. Your dependable support ensures that our caring frontline teams can do their very best for you, your family and your community. Click here to join Team Green now. Make a one-off donation: You can also make a one-off donation to Hato Hone St John. Your donation will help ensure people across New Zealand have access to an ambulance when they need it most. Click here to donate.',
 'section': 'donate',
 'question': 'How can I support / donate to you?',
 'organization': 'Hato Hone St John'}

In [5]:
!wget https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py

--2024-07-02 04:18:42--  https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3832 (3.7K) [text/plain]
Saving to: ‘minsearch.py.1’


2024-07-02 04:18:42 (11.0 MB/s) - ‘minsearch.py.1’ saved [3832/3832]



In [6]:
import minsearch

In [7]:
index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

In [8]:
q = 'membership reduce ambulance fee?'

In [9]:
index.fit(documents)

<minsearch.Index at 0x70ab5bc1ded0>

In [10]:
def search(q):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=q,
        filter_dict={'organization': 'Hato Hone St John'},
        boost_dict=boost,
        num_results=5
    )

    return results

In [11]:
def build_prompt(q, search_results):
    prompt_template = """
You're a admin assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=q, context=context).strip()
    return prompt

In [12]:
from dotenv import load_dotenv
from openai import OpenAI
import os

In [13]:
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
if api_key is None:
    raise ValueError("API key not found. Please set the OPENAI_API_KEY environment variable in the .env file.")

In [14]:
client = OpenAI()

In [15]:
def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-3.5-turbo',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [16]:
def rag(q):
    search_results = search(q)
    prompt = build_prompt(q, search_results)
    answer = llm(prompt)
    return answer

In [17]:
rag(q)

'No, having an Ambulance Membership does not reduce the ambulance fee. The membership fee simply waives the ambulance fees if you are treated by one of our ambulance officers and/or transported because of a medical emergency.'