In [3]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain import OpenAI, VectorDBQAWithSourcesChain
import os
from langchain.document_loaders import UnstructuredHTMLLoader
from langchain.agents import initialize_agent, Tool
from langchain.tools import BaseTool
from langchain.llms import OpenAI
from langchain import PromptTemplate


class Weeklupdate:
  def __init__(self):
    self.llm = OpenAI(temperature=0)
    self.documents = self.load_documents()
    self.db = self.setup_db()
    self.agent = self.create_agent()

  def load_documents(self):
    documents = []
    file_names = os.listdir("../data/weekly_updates")
    for file_name in file_names:
      loader = UnstructuredHTMLLoader("../data/weekly_updates/" + file_name)
      document = loader.load()
      document[0].metadata["date"] = file_name[0:10]
      document[0].metadata["month"] = file_name[5:7]
      documents += document
    return documents
  
  def setup_db(self):
    persist_directory = '../data'
    embeddings = OpenAIEmbeddings()

    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_documents(self.documents)
    docsearch = Chroma.from_documents(texts, embeddings, collection_name="weeklyupdatesv3", persist_directory=persist_directory)
    return docsearch

  def create_agent(self):
    tools = [
        Tool(
            name = "Weekly Update QA System",
            func= VectorDBQAWithSourcesChain.from_chain_type(llm=self.llm, chain_type="map_reduce", vectorstore=self.db, search_kwargs={"filter": {"month":  "01"}}),
            description="useful for when you need to answer questions about what Anthony the CEO said in a weekly update emails to the company. Input should be a fully formed question."
        )
    ]
    agent = initialize_agent(tools, self.llm, agent="zero-shot-react-description", verbose=True)
    return agent
  
  def query_agent(self, query):
    template = """
    You are helping someone understand a startup names Hearth better. You have access to all the weekly update emails send by the CEO. These emails are dated.
    The question you receive is: {query}\n
    Please give your sources.
    """
    question_prompt = PromptTemplate(
      input_variables=["query"], 
      template=template
    )
    prompt_with_query = question_prompt.format(query=query)
    return self.agent.run(prompt_with_query)



In [4]:
agent = Weeklupdate()



Running Chroma using direct local API.
loaded in 3292 embeddings
loaded in 3 collections
collection with name weeklyupdatesv3 already exists, returning existing collection


In [5]:
docs = agent.db.similarity_search("What did Anthony think about the fund rate?")
for doc in docs:
  print(doc.metadata)
  print(doc.page_content)
  print() 


{'source': '../data/weekly_updates/2018-06-24T10-32 [anthony@shoguninc.com] Weekly Update.html', 'date': '2018-06-24', 'month': '06'}
Feel confident offering it [combatting uncertainty of not knowing rate plans upfront]

See the value of successful use.

Hope this is helpful food for thought for everyone. Let me know if you have any questions, thoughts or additions to this, its going to become a critical operational framework, so I'd really welcome anything we hadnt thought of.

Anthony

{'source': '../data/weekly_updates/2018-06-24T10-32 [anthony@shoguninc.com] Weekly Update.html', 'date': '2018-06-24', 'month': '06'}
Feel confident offering it [combatting uncertainty of not knowing rate plans upfront]

See the value of successful use.

Hope this is helpful food for thought for everyone. Let me know if you have any questions, thoughts or additions to this, its going to become a critical operational framework, so I'd really welcome anything we hadnt thought of.

Anthony



In [6]:
agent.query_agent("What did Anthony think about the fund rate?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find out what Anthony said about the fund rate in the weekly update emails.
Action: Weekly Update QA System
Action Input: What did Anthony say about the fund rate?[0m
Observation: [36;1m[1;3m{'question': 'What did Anthony say about the fund rate?', 'answer': ' Anthony did not mention the fund rate.\n', 'sources': '../data/weekly_updates/2017-06-25T11-06 [anthony@shoguninc.com] Weekly Note.html'}[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: Anthony did not mention the fund rate in the weekly update emails. The source is the email dated 2017-06-25T11-06 from anthony@shoguninc.com.[0m

[1m> Finished chain.[0m


'Anthony did not mention the fund rate in the weekly update emails. The source is the email dated 2017-06-25T11-06 from anthony@shoguninc.com.'