# LangChain QA

All code comes from [LangChain docs](langchain.readthedocs.io).

In [7]:
# !pip install langchain openai chromadb tiktoken pypdf

In [8]:
import os 
os.environ["OPENAI_API_KEY"] = ""

In [9]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms.ctransformers import CTransformers

 # Load the locally downloaded model here
llm = CTransformers(
        model = "llama-2-7b-chat.ggmlv3.q8_0.bin",
        model_type="llama",
        max_new_tokens = 512,
        temperature = 0.5
    )



# load_qa_chain

Loads a chain that you can use to do QA over a set of documents, but it uses ALL of those documents. 

chain_type="stuff" will not work because the number of tokens exceeds the limit. We can try other chain types like "map_reduce". 

In [10]:
from langchain.chains.question_answering import load_qa_chain

# load document
loader = PyPDFLoader("data/wilee.pdf")
documents = loader.load()

### For multiple documents 
# loaders = [....]
# documents = []
# for loader in loaders:
#     documents.extend(loader.load())

chain = load_qa_chain(llm=llm, chain_type="map_reduce")
query = "what is the total number of AI publications?"
chain.run(input_documents=documents, question=query)

Number of tokens (1059) exceeded maximum context length (512).
Number of tokens (1060) exceeded maximum context length (512).
Number of tokens (1061) exceeded maximum context length (512).
Number of tokens (1062) exceeded maximum context length (512).
Number of tokens (1063) exceeded maximum context length (512).
Number of tokens (1064) exceeded maximum context length (512).
Number of tokens (1065) exceeded maximum context length (512).
Number of tokens (1066) exceeded maximum context length (512).
Number of tokens (1067) exceeded maximum context length (512).
Number of tokens (1068) exceeded maximum context length (512).
Number of tokens (1069) exceeded maximum context length (512).
Number of tokens (1070) exceeded maximum context length (512).
Number of tokens (1071) exceeded maximum context length (512).
Number of tokens (1072) exceeded maximum context length (512).
Number of tokens (1073) exceeded maximum context length (512).
Number of tokens (1074) exceeded maximum context length

# RetrievalQA 

RetrievalQA chain uses load_qa_chain under the hood. We retrieve the most relevant chunck of text and feed those to the language model. 


#### Options: 
- [embeddings](https://python.langchain.com/en/latest/reference/modules/embeddings.html)
- [TextSplitter](https://python.langchain.com/en/latest/modules/indexes/text_splitters.html)
- [VectorStore](https://python.langchain.com/en/latest/modules/indexes/vectorstores.html)
- [Retrievers](https://python.langchain.com/en/latest/modules/indexes/retrievers.html)
  - [search_type](https://python.langchain.com/en/latest/modules/indexes/vectorstores/examples/chroma.html#mmr): "similarity" or "mmr"
- [Chain Type](https://python.langchain.com/en/latest/modules/chains/index_examples/question_answering.html): "stuff", "map reduce", "refine", "map_rerank"


In [None]:
# load document
loader = PyPDFLoader("materials/example.pdf")
documents = loader.load()
# split the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
# select which embeddings we want to use
embeddings = OpenAIEmbeddings()
# create the vectorestore to use as the index
db = Chroma.from_documents(texts, embeddings)
# expose this index in a retriever interface
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":2})
# create a chain to answer questions 
qa = RetrievalQA.from_chain_type(
    llm=OpenAI(), chain_type="stuff", retriever=retriever, return_source_documents=True)
query = "what is the total number of AI publications?"
result = qa({"query": query})

In [None]:
retriever.get_relevant_documents(query)

In [None]:
result

# VectorstoreIndexCreator

VectorstoreIndexCreator is a wrapper for the above logic. 

Source: 
- https://python.langchain.com/en/latest/modules/chains/getting_started.html
- https://github.com/hwchase17/langchain/blob/master/langchain/indexes/vectorstore.py#L21-L74

In [None]:
index = VectorstoreIndexCreator(
    # split the documents into chunks
    text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0),
    # select which embeddings we want to use
    embedding=OpenAIEmbeddings(),
    # use Chroma as the vectorestore to index and search embeddings
    vectorstore_cls=Chroma
).from_loaders([loader])
query = "what is the total number of AI publications?"
index.query(llm=OpenAI(), question=query, chain_type="stuff")

# ConversationalRetrievalChain

conversation memory + RetrievalQAChain

Allow for passing in chat history which can be used for follow up questions.

Source: https://python.langchain.com/en/latest/modules/chains/index_examples/chat_vector_db.html


In [None]:
from langchain.chains import ConversationalRetrievalChain

In [None]:
# load document
loader = PyPDFLoader("materials/example.pdf")
documents = loader.load()
# split the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
# select which embeddings we want to use
embeddings = OpenAIEmbeddings()
# create the vectorestore to use as the index
db = Chroma.from_documents(texts, embeddings)
# expose this index in a retriever interface
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":2})
# create a chain to answer questions 
qa = ConversationalRetrievalChain.from_llm(OpenAI(), retriever)
chat_history = []
query = "what is the total number of AI publications?"
result = qa({"question": query, "chat_history": chat_history})

In [None]:
chat_history = []
query = "what is the total number of AI publications?"
result = qa({"question": query, "chat_history": chat_history})

In [None]:
result["answer"]

In [None]:
chat_history = [(query, result["answer"])]
query = "What is this number divided by 2?"
result = qa({"question": query, "chat_history": chat_history})

In [None]:
chat_history

In [None]:
result['answer']