In [1]:
import os

from elasticsearch import Elasticsearch
import json
from openai import OpenAI
from tqdm.auto import tqdm

In [2]:
INDEX_NAME = "es_index"
PERMIT_TYPE = "permit_type"
DOCUMENTS = "documents"

In [3]:
es_model = Elasticsearch("http://localhost:9200")
es_model.info()

ObjectApiResponse({'name': 'ff725117390c', 'cluster_name': 'docker-cluster', 'cluster_uuid': 'Y0uUMpCTTwW0Eo0Ijues9w', 'version': {'number': '8.4.3', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '42f05b9372a9a4a470db3b52817899b99a76ee73', 'build_date': '2022-10-04T07:17:24.662462378Z', 'build_snapshot': False, 'lucene_version': '9.3.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

In [7]:
index_settings = {
"settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0
},
"mappings": {
    "properties": {
        "text": {"type": "text"},
        "section": {"type": "text"},
        "question": {"type": "text"},
        "permit_type": {"type": "keyword"} 
    }
}
}
index_name = INDEX_NAME
response = es_model.indices.create(index=index_name, body=index_settings)
response

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'es_index'})

In [4]:
def read_json_data(file_name: str):
    """
    Reads JSON data from a file and flattens the document structure.
    Args:
        file_name: Name and path to the file.   
    """
    with open(file_name, 'rt') as f_in:
        documents_file = json.load(f_in)        
    documents = []
    for item in documents_file:
        permit_type = item[PERMIT_TYPE]

        for doc in item[DOCUMENTS]:
            doc[PERMIT_TYPE] = permit_type
            documents.append(doc)
    return documents


In [5]:
def retrieve_documents(
    query,
    index_name=INDEX_NAME,
    max_results=1
):
    # es = Elasticsearch("http://localhost:9200")
    
    search_query = {
        "size": max_results,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                       "permit_type": "solar-pv-installation"
                    }
                }
            }
        }
    }
    
    response = es_model.search(index=index_name, body=search_query)
    documents = [hit['_source'] for hit in response['hits']['hits']]
    return documents

### Read json data

In [6]:
documents = read_json_data(file_name='../data/data.json')

In [7]:
documents[0]

{'text': 'Yes, most jurisdictions require a permit to install PV systems to ensure safety and compliance.',
 'section': 'Permit Requirements',
 'question': 'Do I need a permit to install PV?',
 'permit_type': 'solar-pv-installation'}

In [10]:
for doc in tqdm(documents):
    es_model.index(index=index_name, document=doc)

  0%|          | 0/79 [00:00<?, ?it/s]

### Perform a Search

In [11]:
query = "Which permit do I need for PV?"

In [12]:
response = retrieve_documents(query)

for doc in response:
    print(f"Section: {doc['section']}\nQuestion: {doc['question']}\nAnswer: {doc['text']}\n\n")

Section: Permit Requirements
Question: Which permit should I get if I want to install PV?
Answer: You typically need an electrical and building permit for PV installations. Check with your local building authority.


