In [None]:
# !pip install -U langchain pypdfium2
%pip install streamlit --upgrade
%pip install streamlit-authenticator


In [None]:
import streamlit_authenticator as stauth
hashed_passwords = stauth.Hasher(['streamlit']).generate()
hashed_passwords

In [None]:
from langchain.document_loaders import PyPDFDirectoryLoader, PyPDFLoader, PyPDFium2Loader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.chains.question_answering import load_qa_chain
from langchain.vectorstores import FAISS
from langchain.indexes import VectorstoreIndexCreator
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.memory import ConversationBufferMemory

import re
import time
from glob import glob
from tqdm import tqdm



In [None]:
docs = []
for file_name in tqdm(glob('./Data/*')):
    loader = PyPDFium2Loader(file_name)
    doc_pages = loader.load()
    docs += doc_pages

In [None]:
len(docs)

In [None]:
x_docs = [doc for doc in docs if len(doc.page_content)>10]

In [None]:
from langchain.embeddings import BedrockEmbeddings

In [None]:
model_id = "amazon.titan-embed-text-v1"
be = BedrockEmbeddings(
    model_id=model_id
)

In [None]:
faiss_index = FAISS.from_documents(x_docs, be)

In [None]:
faiss_index.save_local("vectorstore")

In [None]:
from langchain.llms import Bedrock
llm = Bedrock(model_id='anthropic.claude-v2', model_kwargs={'max_tokens_to_sample':2000})

In [None]:
prompt_template = """Human: You are a friendly  AI assistant in providing guidance about customer support cases raised with AWS. Your job is to chat with AWS employees and provide them with correct and concise answers. If you don't know the answer, just say that you don't have access to this information yet, don't try to make up an answer. You can find the answers in the documents of the company available below.

Documents: <context>{context}</context>

Begin!
Question: {question}
Assistant:"""


In [None]:

from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=faiss_index.as_retriever(
        search_type="similarity", search_kwargs={"k": 6}
    ),
    return_source_documents=True,
    chain_type_kwargs={
        "prompt": PROMPT,
        # "memory": ConversationBufferMemory(memory_key="history", input_key="question"),
    }
)

In [None]:
query = "Hey, What is the objective of Outsourcing regulation for banks?"
result = qa({"query": query})
print(result['result'])

In [None]:
qa.combine_documents_chain.memory

In [None]:
qa({"query":"And what is its circular no.?"})

---

In [None]:

_template = """Human: Given the following conversation and a follow up question, rephrase the follow up question to be a 
standalone question without changing the content in given question.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question: Assistant:"""

condense_question_prompt_template = PromptTemplate.from_template(_template)
qa_prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [None]:
from langchain.chains import LLMChain

In [None]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain 

In [None]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
question_generator = LLMChain(llm=llm, prompt=condense_question_prompt_template, memory=memory)
doc_chain = load_qa_chain(llm, chain_type="stuff", prompt=qa_prompt)
qa_chain = ConversationalRetrievalChain(
    retriever=faiss_index.as_retriever(search_kwargs={'k': 6}),
    question_generator=question_generator,
    combine_docs_chain=doc_chain,
    memory=memory,
)

In [None]:
chat_history = []

In [None]:
question = 'What is the scope of Outsourcing Regulation for banks?'
result = qa_chain({'question': question, 'chat_history': chat_history})
response = result['answer']
chat_history.append((question, response))
response

In [None]:
result = qa_chain({'question': "And what is the definition of an Affiliate?", 'chat_history': chat_history})
result['answer']