# Store pdf into vector database

In [None]:
from langchain.document_loaders import PyPDFLoader

# Load pdf
loader = PyPDFLoader("ally.pdf")
pages = loader.load_and_split()

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Split pdf images into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=20)
texts = splitter.split_documents(pages)

In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# Embed pdfs
embedding = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

vectorstore = FAISS.from_documents(texts, embedding)

In [None]:
vectorstore.save_local("vector_store_ally")

# Chatbot Below

In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# embedding engine
embedding = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

# load from local
db = FAISS.load_local("vector_store_ally", embeddings=embedding)


In [None]:
from langchain.llms import CTransformers
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate

from langchain.chains import (
    ConversationalRetrievalChain, 
    ConversationChain, 
    RetrievalQAWithSourcesChain, 
    RetrievalQA
)

from langchain.chains.conversation.memory import (
    ConversationBufferWindowMemory,
    ConversationBufferMemory, 
    ConversationSummaryBufferMemory
)

from langchain.prompts import (
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,
    MessagesPlaceholder
)

prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [None]:
PROMPT = PromptTemplate(
      template = prompt_template, 
      input_variables = ["context", "question"]
)

chain_type_kwargs = {"prompt": PROMPT}

retriever = db.as_retriever(
      search_kwargs = {'k': 2}, 
      chain_type_kwargs = chain_type_kwargs,
      return_source_documents = True
)

llm = CTransformers(
      model = "models/llama-2-7b-chat.ggmlv3.q4_0.bin",
      model_type = "llama",
      config = {
            'max_new_tokens':512,
            'temperature':0.8}
      )

qa = RetrievalQA.from_chain_type(
      llm = llm, 
      chain_type = "stuff", 
      retriever = retriever
)

In [None]:
query = 'What was the provision for credit losses decrease at the end of 2021?'
ans = qa({'query':query})

In [None]:
# Print query, result, and source_documents
ans

In [None]:
# Print results only
print(ans['result'])