In [2]:
#!pip3 install langchain

In [21]:
import json
 
 
# Load data into a JSON object
with open('workplace-docs.json') as f:
   workplace_docs = json.load(f)
 
print(f"Successfully loaded {len(workplace_docs)} documents")

Successfully loaded 15 documents


In [22]:
from dotenv import load_dotenv
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import ElasticKnnSearch
from langchain.text_splitter import CharacterTextSplitter
from urllib.request import urlopen
import os, json

load_dotenv()

openai_api_key=os.getenv('OPENAI_API_KEY')
elastic_user=os.getenv('ES_USER')
elastic_password=os.getenv('ES_PASSWORD')
elastic_endpoint=os.getenv("ES_ENDPOINT")
elastic_index_name='elastic-knn-search'

In [23]:
metadata = []
content = []

for doc in workplace_docs:
  content.append(doc["content"])
  metadata.append({
      "name": doc["name"],
      "summary": doc["summary"],
      "rolePermissions":doc["rolePermissions"]
  })

text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
docs = text_splitter.create_documents(content, metadatas=metadata)

Created a chunk of size 245, which is longer than the specified 100
Created a chunk of size 288, which is longer than the specified 100
Created a chunk of size 204, which is longer than the specified 100
Created a chunk of size 281, which is longer than the specified 100
Created a chunk of size 249, which is longer than the specified 100
Created a chunk of size 285, which is longer than the specified 100
Created a chunk of size 298, which is longer than the specified 100
Created a chunk of size 270, which is longer than the specified 100
Created a chunk of size 224, which is longer than the specified 100
Created a chunk of size 288, which is longer than the specified 100
Created a chunk of size 260, which is longer than the specified 100
Created a chunk of size 199, which is longer than the specified 100
Created a chunk of size 290, which is longer than the specified 100
Created a chunk of size 251, which is longer than the specified 100
Created a chunk of size 195, which is longer tha

In [24]:
from elasticsearch import Elasticsearch

url = f"https://{elastic_user}:{elastic_password}@{elastic_endpoint}:9200"
connection = Elasticsearch(url, ca_certs = "./http_ca.crt", verify_certs = True)

print(connection.info())

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
dims=1536

es = ElasticKnnSearch.from_documents(
    docs,
    index_name=elastic_index_name,
    embedding=embeddings,
    es_user=elastic_user,
    es_password=elastic_password,
    dims=dims,
    es_connection = connection
)   


{'name': 'liuxgm.local', 'cluster_name': 'elasticsearch', 'cluster_uuid': 'KEk0jDqRTUW_66q32YjIzA', 'version': {'number': '8.10.4', 'build_flavor': 'default', 'build_type': 'tar', 'build_hash': 'b4a62ac808e886ff032700c391f45f1408b2538c', 'build_date': '2023-10-11T22:04:35.506990650Z', 'build_snapshot': False, 'lucene_version': '9.7.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}


In [25]:
def showResults(output):
  print("Total results: ", len(output))
  for index in range(len(output)):
    print(output[index])

In [26]:
query = "work from home policy"
result = es.similarity_search(query=query,query_vector=embeddings.embed_query(query))

showResults(result)

Total results:  4
page_content='The purpose of this full-time work-from-home policy is to provide guidelines and support for employees to conduct their work remotely, ensuring the continuity and productivity of business operations during the COVID-19 pandemic and beyond.\nScope'
page_content='This work-from-home policy will be reviewed periodically and updated as necessary, taking into account changes in public health guidance, business needs, and employee feedback.\nQuestions and Concerns'
page_content='This policy applies to all employees who are eligible for remote work as determined by their role and responsibilities. It is designed to allow employees to work from home full time while maintaining the same level of performance and collaboration as they would in the office.\nEligibility'
page_content='The company encourages employees to prioritize their health and well-being while working from home. This includes taking regular breaks, maintaining a work-life balance, and seeking sup