#### Install required dependencies

In [None]:
!pip install langchain
!pip install langchain-elasticsearch
!pip install langchain-community
!pip install tiktoken

#### Import packages

In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain_elasticsearch import ElasticsearchStore
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain_elasticsearch import ElasticsearchStore
from getpass import getpass
from urllib.request import urlopen
import json

#### Prompt user to provide Cloud ID and API Key

In [5]:
# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#finding-your-cloud-id
ELASTIC_CLOUD_ID = getpass("Elastic Cloud ID: ")

# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key
ELASTIC_API_KEY = getpass("Elastic Api Key: ")

#### Prepare documents for chunking and ingestion

In [6]:
url = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/datasets/workplace-documents.json"

response = urlopen(url)

workplace_docs = json.loads(response.read())
metadata = []
content = []
for doc in workplace_docs:
    content.append(doc["content"])
    metadata.append(
        {
            "name": doc["name"],
            "summary": doc["summary"],
            "rolePermissions": doc["rolePermissions"],
        }
    )
    
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=512, chunk_overlap=256
)
docs = text_splitter.create_documents(content, metadatas=metadata)

#### Define Elasticsearch Vector Store

In [7]:
es_vector_store = ElasticsearchStore(
    es_cloud_id=ELASTIC_CLOUD_ID,
    es_api_key=ELASTIC_API_KEY,
    index_name="workplace_index_elser",
    strategy=ElasticsearchStore.SparseVectorRetrievalStrategy(
        model_id=".elser_model_2_linux-x86_64"
    )
)


#### Add docs processed above. The document has already been chunked. The embeddings would be generated in Elasticsearch with an ingestion pipeline.

In [8]:
es_vector_store.add_documents(documents=docs)

['0c88fd07-e115-4d3d-b87c-f15404f5edcb',
 'acf4565d-6bac-410d-bc5f-40afe7e07bcc',
 '984bc926-9311-4c07-89fc-16fa86ab1a66',
 'ca8810f4-edeb-47df-b198-5bff6ed4ff64',
 '50a342f2-58fa-463d-bc77-20a557c3e37f',
 '5daa4345-8781-463f-9ed7-02432793da62',
 'eeabdcd3-a360-4b14-a4b7-8a40d07b6c7d',
 '793294d6-e743-49a4-8898-ade4bc45e453',
 '3a4e9c41-ffcd-40ec-bee9-c3df7c29caee',
 'd4c1dd98-aa22-4139-bb6f-aafc3a6d3528',
 '26907106-9adb-4d2b-8d49-a95433d87016',
 'fc6c2c51-97ac-49dd-b234-80083c21f8c9',
 '5345dcda-3b5a-48a6-92e0-9fb8cc5bd5d2',
 'b9cf77e1-3387-4aa9-a998-c18d17555930',
 'ddf202d7-9dc8-425e-a40d-c067a25c1b3d',
 '3d0d34db-6e43-400d-9cba-30273002e075',
 'd5baeec7-6675-40f0-8be4-4b0aca14ccd9',
 'e88c7e27-2daa-4e33-a492-76e7dfd629b5',
 '12e1db8f-70ef-4419-bcb6-7a2a39e2ff04',
 '25efe115-a61b-4ad6-aecf-96e514c7090f',
 '0f0fa0ae-13e0-4691-8909-889cda19f744',
 '135677a7-2bb6-4bdd-9f7c-c9998c284073',
 'a9990658-5154-40fc-8461-1ae6787f4435',
 'b0e6848e-b523-4689-8d94-c4ce775b5e2f',
 '9d5c3e2e-c713-

#### LLM Configuration

In [9]:
llm = Ollama(model="llama3")

#### Semantic Search using Elasticsearch ELSER and Llama3

In [10]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

retriever = es_vector_store.as_retriever()
template = """Answer the question based only on the following context:\n

                {context}
                
                Question: {question}
               """
prompt = ChatPromptTemplate.from_template(template)
chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

chain.invoke("What are the organizations sales goals?")

"Based on the provided context, the organization's sales goals for fiscal year 2024 are:\n\n1. Increase revenue by 20% compared to fiscal year 2023.\n2. Expand market share in key segments by 15%.\n3. Retain 95% of existing customers and increase customer satisfaction ratings.\n\nThese goals aim to drive growth, strengthen relationships with customers, and improve overall performance."