In [3]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch
from langchain.vectorstores.faiss import FAISS

In [4]:
with open('../state_of_the_union.txt') as f:
    state_of_the_union = f.read()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_text(state_of_the_union)

embeddings = OpenAIEmbeddings()

In [5]:
docsearch = FAISS.from_texts(texts, embeddings)

query = "What did the president say about Ketanji Brown Jackson"
docs = docsearch.similarity_search(query)

In [9]:
from langchain.chains.qa_with_sources.base import QAWithSourcesChain
from langchain.llms import OpenAI
from langchain.docstore.document import Document

In [2]:
chain = QAWithSourcesChain.from_llm(OpenAI(temperature=0))

In [8]:
var_name = chain.llm_question_chain.input_keys[0]
results = chain.llm_question_chain.apply([{var_name: d.page_content, "question": query} for d in docs])

In [13]:
result_docs = [Document(page_content=r[chain.llm_question_chain.output_key], metadata={'source': f'Page {i}'}) 
              for i, r in enumerate(results)]

In [14]:
from langchain.chains.combine_documents import CombineDocumentsChain

In [15]:
combine_chain = CombineDocumentsChain(llm_chain=chain.llm_combine_chain, document_prompt=chain.document_prompt)

In [23]:
self = combine_chain
inputs = {combine_chain.input_key:result_docs, "question": query}
docs = inputs[self.input_key]
other_keys = {k:v for k,v in inputs.items() if k != self.input_key}
doc_dicts = []
for doc in docs:
    base_info = {"page_content": doc.page_content}
    base_info.update(doc.metadata)
    doc_dicts.append({k: base_info[k] for k in self.document_prompt.input_variables})
doc_strings = [self.document_prompt.format(**doc) for doc in doc_dicts]
doc_variable = self.llm_chain.prompt.input_variables[0]
other_keys[doc_variable] = "\n\n".join(doc_strings)
output = self.llm_chain.predict(**other_keys)

In [25]:
output.split("\n\nSources: ")

[' The president said that Ketanji Brown Jackson is a consensus builder who has received a broad range of support since she was nominated.',
 'Page 0']

In [18]:
combine_chain({"foo": "bar", "input_documents": ["jim"]})

AttributeError: 'str' object has no attribute 'dict'

In [8]:
question_p = """Use the following context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{{context}}

Question: {question}
Helpful Answer:""".format(question=query)
prompt_q = PromptTemplate(input_variables=["context"], template=question_p)
llm_chain = LLMChain(llm=OpenAI(), prompt=prompt_q)

In [9]:
res = llm_chain.apply([{"context": d.page_content} for d in docs])

In [10]:
from langchain.docstore.document import Document

In [11]:
res_docs = [Document(page_content=r['text'], source=f"Page {i}") for i,r in enumerate(res)]

In [12]:
document_prompt = PromptTemplate(input_variables=["page_content", "source"], template= "Content: {page_content}\nSource: {source}")

In [26]:
template = """Given the following questions, reference links and associated content, create a final answer with references:

Question: What color is an apple?
Content: An apple can be red
Source: foo
Content: An apple can be green
Source: bar
Content: An orange is orange
Source: baz
Final Answer: An apple can be red or green
Sources: foo, bar

Question: {question}
{{summaries}}
Final Answer:""".format(question=query)

prompt = PromptTemplate(input_variables=["summaries"], template=template)

llm_chain = LLMChain(llm=OpenAI(temperature=0), prompt=prompt)
combine_chain = CombineDocumentsChain(llm_chain=llm_chain, document_prompt=document_prompt)

In [27]:
combine_chain.run(res_docs)

" President Biden said that Ketanji Brown Jackson is one of our nation's top legal minds, and she will continue Justice Breyer's legacy of excellence.\nSources: Page 0"

In [24]:
print(llm_chain.prompt.template)

Given the following questions, reference links and associated content, create a final answer with references:

Question: What color is an apple?
Content 1: An apple can be red
Source 1: foo
Content 2: An apple can be green
Source 2: bar
Content 3: An orange is orange
Source 3: baz
Final Answer: An apple can be red or green
Sources: foo, bar

Question: What did the president say about Ketanji Brown Jackson
{summaries}
Final Answer:


In [15]:
doc_dicts = [{k: doc.dict()[k] for k in document_prompt.input_variables} for doc in res_docs]
doc_strings = [document_prompt.format(**doc) for doc in doc_dicts]

In [16]:
print('\n'.join(doc_strings))

Content:  President Biden said that Ketanji Brown Jackson is one of our nation's top legal minds, and she will continue Justice Breyer's legacy of excellence.
Source: Page 0
Content:  The President did not mention Ketanji Brown Jackson in the State of the Union address.
Source: Page 1
Content:  The President did not mention Ketanji Brown Jackson.
Source: Page 2
Content:  The president did not mention Ketanji Brown Jackson in the context provided.
Source: Page 3


In [20]:
doc_variable = llm_chain.prompt.input_variables[0]
output = llm_chain.predict(**{doc_variable: "\n".join(doc_strings)})

In [22]:
print(llm_chain.prompt.template)

Use the following context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: What did the president say about Ketanji Brown Jackson
Helpful Answer:
