# Set up the Overall environment
Import the libraries and grab data from the .env file

In [1]:
import eland as ed
from dotenv import load_dotenv
import os 
from os import environ
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from common.environment import create_new_es_index
from langchain.vectorstores.elasticsearch import ElasticsearchStore
from langchain.embeddings.openai import OpenAIEmbeddings
# from langchain.vectorstores import ElasticsearchStore
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

from langchain.agents.agent_toolkits import (
    create_vectorstore_agent,
    VectorStoreToolkit,
    VectorStoreInfo,
)

#load the .env file
load_dotenv()

# create some useful variables that will be used again and again
es_url = f"https://{os.environ['elasticsearch_user']}:{os.environ['elasticsearch_pw']}@{os.environ['elasticsearch_host']}:{os.environ['elasticsearch_port']}"
es_index= os.environ['elasticsearch_index']
os.environ['OPENAI_API_KEY'] = os.environ['openai_api_key']
open_api_key=os.environ['OPENAI_API_KEY']


# Set up data environment

In [2]:
# declare embeddings to be used
embeddings= OpenAIEmbeddings()

# create the elasticsearch index to be used 
create_new_es_index(index_name=es_index, es_url=es_url, delete_index=True)

# create the connection to the vector store that will be used for queries.
state_of_union_store = ElasticsearchStore(
    es_url=es_url,
    index_name=es_index,
    embedding=embeddings,
    distance_strategy="DOT_PRODUCT",
)

# Set up the vector store.
Multiple steps -
1. Load the files into documents.
2. Split the doucments into chunks.
3. Load the chunks into Elasticsearch.

In [33]:
# split and create documents
loader = TextLoader("state_of_the_union.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# load documents into Elasticsearch
db = ElasticsearchStore.from_documents(
    docs, embeddings, es_url=es_url, index_name=es_index,
)

# make sure documents are available
db.client.indices.refresh(index=es_index)

{'source': 'state_of_the_union.txt'}

In [3]:
# split and create documents
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

csv_doc = None
csv_fn = 'elastic-products.csv'

# csv_loader = CSVLoader("elastic-products.csv", csv_args={
#     'delimiter': ',',
#     'quotechar': '"',
#     'fieldnames': ['Product Name', "Description"]
# })
# csv_documents = csv_loader.load()

with open(csv_fn) as f:
    csv_doc = f.read()

csv_splitter = RecursiveCharacterTextSplitter(chunk_size=100, 
                                              chunk_overlap=20, 
                                              length_function=len, 
                                              is_separator_regex=False)
csv_entries = csv_splitter.create_documents([csv_doc])


# add the source for each line
[setattr(entry, 'metadata', {'source': csv_fn}) for entry in csv_entries]

# load documents into Elasticsearch
db = ElasticsearchStore.from_documents(
    csv_entries, embeddings, es_url=es_url, index_name=es_index,
)

# make sure documents are available
db.client.indices.refresh(index=es_index)

ObjectApiResponse({'_shards': {'total': 2, 'successful': 2, 'failed': 0}})

# Create the Agent

In [4]:
# make a connection to the LLM
llm = OpenAI(temperature=0)

# Set up the information about the vector store
vectorstore_info = VectorStoreInfo(
    name="state_of_union_address",
    description="the most recent state of the Union adress",
    vectorstore=state_of_union_store,
    open_api_key=open_api_key,
)

# create a toolkit for the agent to use
toolkit = VectorStoreToolkit(vectorstore_info=vectorstore_info)
# create the agent itself
agent_executor = create_vectorstore_agent(llm=llm, toolkit=toolkit, verbose=True)


# Ask the question of the vector store

In [6]:
agent_executor.run(
    "Name at least 10 products from Elastic."
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find a source that lists products from Elastic.
Action: state_of_union_address
Action Input: What products does Elastic offer?[0m
Observation: [36;1m[1;3m Elastic offers Elastic Drone, Elastic Atlas, and Elastic Cloud.[0m
Thought:[32;1m[1;3m I need to find more products.
Action: state_of_union_address_with_sources
Action Input: What other products does Elastic offer?[0m
Observation: [33;1m[1;3m{"answer": " Elastic offers Elastic Cloud, Elastic Drone, and Elastic Atlas.\n", "sources": "elastic-products.csv"}[0m
Thought:[32;1m[1;3m I now know the final answer.
Final Answer: Elastic offers Elastic Cloud, Elastic Drone, Elastic Atlas, Elastic Stack, Elastic App Search, Elastic Site Search, Elastic Enterprise Search, Elastic Observability, Elastic Security, and Elastic Workplace Search.[0m

[1m> Finished chain.[0m


'Elastic offers Elastic Cloud, Elastic Drone, Elastic Atlas, Elastic Stack, Elastic App Search, Elastic Site Search, Elastic Enterprise Search, Elastic Observability, Elastic Security, and Elastic Workplace Search.'

In [14]:
agent_executor.run(
    "What did biden say about ketanji brown jackson in the state of the union address?"
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find the answer in the state of the union address
Action: state_of_union_address
Action Input: What did biden say about ketanji brown jackson[0m
Observation: [36;1m[1;3m Biden said that Ketanji Brown Jackson is one of the nation's top legal minds and will continue Justice Breyer's legacy of excellence.[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: Biden said that Ketanji Brown Jackson is one of the nation's top legal minds and will continue Justice Breyer's legacy of excellence.[0m

[1m> Finished chain.[0m


"Biden said that Ketanji Brown Jackson is one of the nation's top legal minds and will continue Justice Breyer's legacy of excellence."

