In [1]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from tqdm.autonotebook import tqdm, trange

  from tqdm.autonotebook import tqdm, trange


In [2]:
DATA_PATH = 'Aurigo/'
DB_FAISS_PATH = 'vectorstores/db_faiss'

In [3]:
loader = DirectoryLoader(DATA_PATH, glob='*.pdf', loader_cls=PyPDFLoader)
documents = loader.load()
documents[:10]

[Document(metadata={'source': 'Aurigo/Aurigo.pdf', 'page': 0}, page_content='Canceling the Subscription'),
 Document(metadata={'source': 'Aurigo/Aurigo.pdf', 'page': 1}, page_content=' | Contents | ii\nContents\nCanceling the Subscription ......................................................................................6\nCampaigning for Projects ........................................................................................6\nContact Us .................................................................................................................6\nIntroduction to Aurigo Engage ...............................................................................6\nClosing a Campaign .................................................................................................7\nEngage Home ............................................................................................................7\nIntroduction to Aurigo Engage User Interface ..............................

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 100)
texts = text_splitter.split_documents(documents)
texts[:10]

[Document(metadata={'source': 'Aurigo/Aurigo.pdf', 'page': 0}, page_content='Canceling the Subscription'),
 Document(metadata={'source': 'Aurigo/Aurigo.pdf', 'page': 1}, page_content='| Contents | ii\nContents\nCanceling the Subscription ......................................................................................6\nCampaigning for Projects ........................................................................................6\nContact Us .................................................................................................................6\nIntroduction to Aurigo Engage ...............................................................................6\nClosing a Campaign .................................................................................................7\nEngage Home ............................................................................................................7\nIntroduction to Aurigo Engage User Interface ...............................

In [6]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs = { "device": "cpu" })
embeddings

  warn_deprecated(


HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={'device': 'cpu'}, encode_kwargs={}, multi_process=False, show_progress=False)

In [8]:
db = FAISS.from_documents(texts, embeddings)
db.save_local(DB_FAISS_PATH)

In [7]:
db = FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)
db

<langchain_community.vectorstores.faiss.FAISS at 0x7e27a986bdf0>

In [8]:
from langchain_community.llms.ctransformers import CTransformers

llm = CTransformers(
        model = "TheBloke/Llama-2-7B-Chat-GGML",
        model_type="llama",
        config={
            # 'max_new_tokens': 600,
            'temperature': 0.01,
            # 'context_length': 700,
            'stream': True
        }
    )

llm

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

CTransformers(client=<ctransformers.llm.LLM object at 0x7e27a9854100>, model='TheBloke/Llama-2-7B-Chat-GGML', model_type='llama', config={'temperature': 0.01, 'stream': True})

In [21]:
from langchain.prompts import PromptTemplate

custom_prompt_template = """
###Instructions:###
You are an AI Integrated Chatbot. Your task is to give a relevant answer to the question asked by the User based on the context provided. The context will be pieces of information from a particular document. If you don't know the answer, just say that you don't know; don't try to make up an answer. Answer the question given in a natural, human-like manner.

###Context: {context}###
###Question: {question}###

******************
Note: Only return the helpful & correct answer below and nothing else. Verify the information before responding. Don't give improper or incorrect information or information that are not provided in the context.
******************

Helpful answer:
"""

qa_prompt = PromptTemplate(template=custom_prompt_template,
                            input_variables=['context', 'question'])

qa_prompt

PromptTemplate(input_variables=['context', 'question'], template="\n###Instructions:###\nYou are an AI Integrated Chatbot. Your task is to give a relevant answer to the question asked by the User based on the context provided. The context will be pieces of information from a particular document. If you don't know the answer, just say that you don't know; don't try to make up an answer. Answer the question given in a natural, human-like manner.\n\n###Context: {context}###\n###Question: {question}###\n\n******************\nNote: Only return the helpful & correct answer below and nothing else. Verify the information before responding. Don't give improper or incorrect information or information not provided in the context.\n******************\n\nHelpful answer:\n")

In [22]:
from langchain.chains.retrieval_qa.base import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                    chain_type='stuff',
                                    retriever=db.as_retriever(search_kwargs={'k': 1}, search_type='mmr'),
                                    return_source_documents=True,
                                    chain_type_kwargs={'prompt': qa_prompt}
                                    )

qa_chain

RetrievalQA(combine_documents_chain=StuffDocumentsChain(llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['context', 'question'], template="\n###Instructions:###\nYou are an AI Integrated Chatbot. Your task is to give a relevant answer to the question asked by the User based on the context provided. The context will be pieces of information from a particular document. If you don't know the answer, just say that you don't know; don't try to make up an answer. Answer the question given in a natural, human-like manner.\n\n###Context: {context}###\n###Question: {question}###\n\n******************\nNote: Only return the helpful & correct answer below and nothing else. Verify the information before responding. Don't give improper or incorrect information or information not provided in the context.\n******************\n\nHelpful answer:\n"), llm=CTransformers(client=<ctransformers.llm.LLM object at 0x7e27a9854100>, model='TheBloke/Llama-2-7B-Chat-GGML', model_type='llama', config={'te

In [24]:
response_stream = qa_chain.stream({'query': "What is its important?"})
print(response_stream)
for response in response_stream:
    print(response)

<generator object Runnable.stream at 0x7e279c556a40>
{'query': 'What is its important?', 'result': 'The important thing to note about the Contents section is that it provides a summary of the key points covered in the document, including the importance of campaigns and projects, how to close a campaign, and how to engage with users.', 'source_documents': [Document(metadata={'source': 'Aurigo/Aurigo.pdf', 'page': 1}, page_content='| Contents | ii\nContents\nCanceling the Subscription ......................................................................................6\nCampaigning for Projects ........................................................................................6\nContact Us .................................................................................................................6\nIntroduction to Aurigo Engage ...............................................................................6\nClosing a Campaign .................................................