In [15]:
from langchain.document_loaders import TextLoader
sota_loader = TextLoader("../../modules/state_of_the_union.txt")
pg_loader = TextLoader("../../../../gpt_index/examples/paul_graham_essay/data/paul_graham_essay.txt")

In [12]:
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import FAISS

In [13]:
sota_index = VectorstoreIndexCreator(vectorstore_cls=FAISS).from_loaders([sota_loader])


In [16]:
pg_index = VectorstoreIndexCreator(vectorstore_kwargs={"collection_name": "paul-graham"}).from_loaders([pg_loader])


Running Chroma using direct local API.
Using DuckDB in-memory for database. Data will be transient.


In [17]:
sota_index.query("what did the president about kentaji brown jackson?")

" The President nominated Circuit Court of Appeals Judge Ketanji Brown Jackson to serve on the United States Supreme Court. He said she is one of the nation's top legal minds and will continue Justice Breyer's legacy of excellence."

In [18]:
pg_index.query("what did the president about kentaji brown jackson?")

" Kentaji Brown Jackson was not mentioned in the context, so I don't know."

In [19]:
from langchain.agents import initialize_agent, Tool
from langchain.tools import BaseTool
from langchain.llms import OpenAI

In [20]:
tools = [
    Tool(
        name = "State of Union QA System",
        func=sota_index.query,
        description="useful for when you need to answer questions about the most recent state of the union address. Input should be a fully formed question."
    ),
    Tool(
        name = "Paul Graham QA System",
        func=pg_index.query,
        description="useful for when you need to answer questions about Paul Graham. Input should be a fully formed question."
    ),
]

In [22]:
agent = initialize_agent(tools, OpenAI(temperature=0), agent="zero-shot-react-description", verbose=True)

In [24]:
import json

In [25]:
with open("../../../notebooks/state_of_union_qa.json") as f:
    sota_qa = json.load(f)

In [26]:
with open("../../../notebooks/paul_graham_qa.json") as f:
    pg_qa = json.load(f)

In [28]:
for d in sota_qa:
    d['steps'] = [{"tool": "State of Union QA System"}, {"tool_input": d["question"]}]
for d in pg_qa:
    d['steps'] = [{"tool": "Paul Graham QA System"}, {"tool_input": d["question"]}]

In [30]:
all_vectorstore_routing = sota_qa + pg_qa

In [31]:
with open("vectorstore_sota_pg.json", "w") as f:
    json.dump(all_vectorstore_routing, f)