# Langchain <--> Elastic Search

Elasticsearch is an open source distributed, RESTful search and analytics engine, scalable data store, and vector database capable of addressing a growing number of use cases. As the heart of the Elastic Stack, it centrally stores your data for lightning-fast search, fineâ€‘tuned relevancy, and powerful analytics that scale with ease.
Elasticsearch can store and index a variety of data, including structured and unstructured text, numerical data, and geospatial data. It's known for its ability to find queries in large-scale unstructured data
Elasticsearch uses a search index, which is similar to an index in the back of a book, to map content to its location in a document. This allows users to quickly find information without scanning through an entire document

- https://www.elastic.co/search-labs/blog/langchain-collaboration
- https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html
- https://python.langchain.com/docs/integrations/vectorstores/elasticsearch/
- https://www.elastic.co/blog/elasticsearch-is-open-source-again


In [None]:
! pip install -r requirements.txt -q

# Install ELastic Search Docker

- docker network create elastic
- docker pull docker.elastic.co/elasticsearch/elasticsearch:8.15.3
- docker run --name es01 --net elastic -p 9200:9200 -it -m 1GB docker.elastic.co/elasticsearch/elasticsearch:8.15.3

In [None]:
import os
from dotenv import dotenv_values

In [None]:
config = dotenv_values("./keys/.env")

In [None]:
import os, tempfile
from langchain.prompts import PromptTemplate


from langchain_community.document_loaders import TextLoader

from langchain.chains import ConversationalRetrievalChain, RetrievalQA

from langchain_text_splitters import CharacterTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from google.oauth2 import service_account
from dotenv import dotenv_values
import json
import vertexai
 
import itertools
import time


In [None]:
config = dotenv_values("./keys/.env")
with open("./keys/complete-tube-421007-208a4862c992.json") as source:
    info = json.load(source)

vertex_credentials = service_account.Credentials.from_service_account_info(info)
vertexai.init(
    project=config["PROJECT"],
    location=config["REGION"],
    credentials=vertex_credentials,
)
google_api_key = config["GEMINI-API-KEY"]
os.environ["GEMINI_API_KEY"] = google_api_key

In [None]:
ROOT= os.getcwd()
ROOT

In [None]:
embeddings = GoogleGenerativeAIEmbeddings(
                        model="models/embedding-001",
                        credentials=vertex_credentials,
                        google_api_key=google_api_key,
                    )

In [None]:
from langchain_elasticsearch import ElasticsearchStore

elastic_vector_search = ElasticsearchStore(
    es_url="http://localhost:9200",
    index_name="langchain_index",
    embedding=embeddings,
    es_user="elastic",
    es_password="changeme",
)

In [None]:
from langchain_elasticsearch import ElasticsearchStore

vector_store = ElasticsearchStore(
    "langchain-demo", embedding=embeddings, es_url="http://localhost:9200"
)

In [None]:
mypath = "./docs"
onlyfiles = [f for f in os.listdir(mypath) if os.path.isfile(os.path.join(mypath, f))]
onlyfiles

In [None]:
def load_file(path):
    # load pdf file and transform into Langchain Documents
    loader = PyPDFLoader(path)
    pages = loader.load_and_split()
    return pages

path = os.path.join("docs", onlyfiles[0])
pages = load_file(path)

In [None]:
pages[0]

In [None]:
len(pages)

In [None]:
from uuid import uuid4

from langchain_core.documents import Document

In [None]:
uuids = [str(uuid4()) for _ in range(len(pages))]

vector_store.add_documents(documents=pages, ids=uuids)

In [None]:
retriever = vector_store.as_retriever(
                        search_kwargs={"k": 5})

In [None]:
llm = ChatGoogleGenerativeAI(
                    model="gemini-1.5-pro-001", credentials=vertex_credentials
                )

In [None]:
from langchain.chains import ConversationalRetrievalChain, RetrievalQA
# filter={"source" :"docs\\Baremo 2015.pdf"}
chain = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, return_source_documents=True)

In [None]:
chat_history = []

query = """Provide Main Details of the company Aardvark Constructions Limited. Including following details:
Name:
Country:
Company Number:
Incorporated:
Company Type:
Company Status:
Primary Addresses Registered Office:
Accounting Dates:
Confirmation Statement:
"""
result = chain.invoke({"question": query, "chat_history": chat_history})

print(result['answer'])

In [None]:
result.keys()

In [None]:
len(result["source_documents"])

In [None]:
chat_history = [(query, result["answer"])]

In [None]:
query2 = """From Management Details extract:
Managed By:
Managed By Email:
"""
result = chain.invoke({"question": query2, "chat_history": chat_history})
print(result['answer'])

In [None]:
chat_history.append((query2, result["answer"]))
chat_history

In [None]:
query3 = """Past Names of the Company with their period """
result = chain.invoke({"question": query3 ,"chat_history": chat_history})
print(result['answer'])

In [None]:
chat_history.append((query3, result["answer"]))
chat_history

In [None]:
query4 = """Appointments Board Positions"""
result = chain.invoke({"question": query4 ,"chat_history": chat_history})
print(result['answer'])