In [9]:
from elasticsearch import Elasticsearch
import json
from openai import OpenAI
from tqdm.auto import tqdm

In [2]:
INDEX_NAME = "index_new"
PERMIT_TYPE = "permit_type"
DOCUMENTS = "documents"

In [10]:
client = OpenAI(
    base_url='http://localhost:11434/v1/',
    api_key='ollama',
)

In [11]:
es_model = Elasticsearch("http://localhost:9200")
es_model.info()

ObjectApiResponse({'name': '54b6458921c6', 'cluster_name': 'docker-cluster', 'cluster_uuid': '1IQgF8oCR0i8F8uSrpF8Kg', 'version': {'number': '8.4.3', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '42f05b9372a9a4a470db3b52817899b99a76ee73', 'build_date': '2022-10-04T07:17:24.662462378Z', 'build_snapshot': False, 'lucene_version': '9.3.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

In [12]:
def build_prompt(query, search_results):
    prompt_template = """
You're a permit coordinator who assists application with permitss related to Distributed Energy Resources installation. Answer the QUESTION based on the CONTEXT from the database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [13]:
def llm(prompt):
    response = client.chat.completions.create(
        model='phi3',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [52]:
index_settings = {
"settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0
},
"mappings": {
    "properties": {
        "text": {"type": "text"},
        "section": {"type": "text"},
        "question": {"type": "text"},
        "permit_type": {"type": "keyword"} 
    }
}
}
index_name = "index_new"
response = es_model.indices.create(index=index_name, body=index_settings)
response

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'index_new'})

In [25]:
def elastic_search(
    query,
    index_name=INDEX_NAME,
    max_results=1
):
    search_query = {
        "size": max_results,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                       "permit_type": "solar-pv-installation"
                    }
                }
            }
        }
    }
    
    response = es_model.search(index=index_name, body=search_query)
    documents = [hit['_source'] for hit in response['hits']['hits']]
    return documents

In [16]:
def read_json_data(file_name: str):
    """
    Reads JSON data from a file and flattens the document structure.
    Args:
        file_name: Name and path to the file.   
    """
    with open(file_name, 'rt') as f_in:
        documents_file = json.load(f_in)        
    documents = []
    for item in documents_file:
        permit_type = item[PERMIT_TYPE]

        for doc in item[DOCUMENTS]:
            doc[PERMIT_TYPE] = permit_type
            documents.append(doc)
    return documents


In [17]:
def rag(query):
    search_results=elastic_search(query)
    prompt=build_prompt(query, search_results)    
    return llm(prompt)

In [18]:
documents = read_json_data(file_name='../data/data.json')

In [19]:
documents[0]

{'text': 'Yes, most jurisdictions require a permit to install PV systems to ensure safety and compliance.',
 'section': 'Permit Requirements',
 'question': 'Do I need a permit to install PV?',
 'permit_type': 'solar-pv-installation'}

In [20]:
for doc in tqdm(documents):
    es_model.index(index=index_name, document=doc)

  0%|          | 0/79 [00:00<?, ?it/s]

In [21]:
query='Which permit do I need for PV?'

In [24]:
prompt=build_prompt(query, search_results)

In [65]:
prompt

"You're a permit coordinator who assists application with permitss related to Distributed Energy Resources installation. Answer the QUESTION based on the CONTEXT from the database.\nUse only the facts from the CONTEXT when answering the QUESTION.\n\nQUESTION: Which permit do I need for PV?\n\nCONTEXT: \nsection: Permit Requirements\nquestion: Which permit should I get if I want to install PV?\nanswer: You typically need an electrical and building permit for PV installations. Check with your local building authority.\n\nsection: Permit Requirements\nquestion: Do I need a structural engineerâ€™s approval for PV installation?\nanswer: Permit requirements vary by jurisdiction. Check with your local authority.\n\nsection: Permit Requirements\nquestion: Do I need a permit to install PV?\nanswer: Yes, most jurisdictions require a permit to install PV systems to ensure safety and compliance.\n\nsection: Permit Requirements\nquestion: Do I need an inspection after getting a PV permit?\nanswer: 

In [28]:
rag(query)

You typically need an electrical and building permit for PV installations. Check with your local building authority.


