# Question Answering over Resources
The notebook is based on https://python.langchain.com/en/latest/modules/chains/index_examples/qa_with_sources.html

## Import libraries

In [44]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings.cohere import CohereEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch
from langchain.vectorstores import Chroma
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.llms import OpenAI

import os
# Input OpenAI API Key Here
os.environ["OPENAI_API_KEY"] = "..."

## TXT files

In [45]:
with open("Test-Documents/state_of_the_union.txt") as f:
    state_of_the_union = f.read()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_text(state_of_the_union)

embeddings = OpenAIEmbeddings()

In [46]:
docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))])

In [47]:
# Input query here
query = "What did president say about economy?"
docs = docsearch.similarity_search(query)

In [48]:
chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type="stuff")
chain({"input_documents": docs, "question": query}, return_only_outputs=True)

{'output_text': ' The president said that his plan to fight inflation will lower costs and the deficit, and that he has a better plan to fight inflation which includes cutting the cost of prescription drugs, demanding more competition, and investing in America, educating Americans, and growing the workforce. He also mentioned that he passed the Bipartisan Infrastructure Law, the most sweeping investment to rebuild America in history.\n\nSOURCES: 16-pl, 21-pl, 8-pl, 10-pl'}

## PDF files

In [49]:
!pip3 install pypdf

Defaulting to user installation because normal site-packages is not writeable


In [50]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("Test-Documents/state_of_the_union.pdf")
pages = loader.load_and_split()

In [51]:
db = Chroma.from_documents(pages, embeddings)

In [52]:
# Input query here
query = "What did president say about economy?"
pages = db.similarity_search(query)

In [53]:
chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type="stuff")
chain({"input_documents": pages, "question": query}, return_only_outputs=True)

{'output_text': " The president proposed a 15% minimum tax rate for corporations, closing loopholes so the very wealthy don't pay a lower tax rate than a teacher or a firefighter, and announced the passage of the American Rescue Plan to provide economic relief for tens of millions of Americans. He also proposed investing in America, educating Americans, and growing the workforce to build the economy from the bottom up and the middle out.\n\nSOURCES: Test-Documents/state_of_the_union.pdf"}