In [18]:
%pip install langchain langchain_community langchain_huggingface chainlit faiss-cpu ctransformers langchainhub pypdf

Collecting packaging<24.0,>=23.1
  Using cached packaging-23.2-py3-none-any.whl (53 kB)
Installing collected packages: packaging
  Attempting uninstall: packaging
    Found existing installation: packaging 24.1
    Uninstalling packaging-24.1:
      Successfully uninstalled packaging-24.1
Successfully installed packaging-23.2
Note: you may need to restart the kernel to use updated packages.


In [25]:
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms.ctransformers import CTransformers
from langchain.chains.retrieval_qa.base import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import chainlit as cl
import os

DB_FAISS_PATH = os.path.join(os.getcwd(), 'vectorstores/db_faiss')
DATA_PATH = os.path.join(os.getcwd(), 'data/PDF/Aurigo')

custom_prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

        Contexts: {context}

        Question: {question}
        Helpful Answer:"""
        
DB_FAISS_PATH
DATA_PATH

'/home/metapercept/ChatbotTest/data/PDF/Aurigo'

In [26]:
def set_custom_prompt():
    """
    Prompt template for QA retrieval for each vectorstore
    """
    prompt = PromptTemplate(template=custom_prompt_template,
                            input_variables=['context', 'question'])
    return prompt

prompt = set_custom_prompt()
prompt

PromptTemplate(input_variables=['context', 'question'], template="Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n        Contexts: {context}\n\n        Question: {question}\n        Helpful Answer:")

In [27]:
llm = CTransformers(
            model="TheBloke/Llama-2-7B-Chat-GGML",
            model_type="llama",
            # gpu_layers=110,
            config={
                'temperature': 0.01,
                'max_new_tokens': 600,
                'context_length': 1200,
            }
        )
llm

Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 23967.45it/s]
Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 14926.35it/s]


CTransformers(client=<ctransformers.llm.LLM object at 0x7b5e4d0e8a30>, model='TheBloke/Llama-2-7B-Chat-GGML', model_type='llama', config={'temperature': 0.01, 'max_new_tokens': 600, 'context_length': 1200})

In [28]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'})
embeddings

2024-08-05 13:04:56 - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={'device': 'cpu'}, encode_kwargs={}, multi_process=False, show_progress=False)

In [29]:
loader = DirectoryLoader(DATA_PATH, glob='*.pdf', loader_cls=PyPDFLoader)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts = text_splitter.split_documents(documents)
texts

[Document(metadata={'source': '/home/metapercept/ChatbotTest/data/PDF/Aurigo/Aurigo-Engage.pdf', 'page': 0}, page_content='Canceling the Subscription'),
 Document(metadata={'source': '/home/metapercept/ChatbotTest/data/PDF/Aurigo/Aurigo-Engage.pdf', 'page': 1}, page_content='| Contents | ii\nContents\nCanceling the Subscription ......................................................................................6\nCampaigning for Projects ........................................................................................6\nContact Us .................................................................................................................6\nIntroduction to Aurigo Engage ...............................................................................6\nClosing a Campaign .................................................................................................7\nEngage Home ...............................................................................................

In [30]:
db = FAISS.from_documents(texts, embeddings)
db.save_local(DB_FAISS_PATH)

2024-08-05 13:05:06 - Loading faiss with AVX512 support.
2024-08-05 13:05:06 - Successfully loaded faiss with AVX512 support.


In [31]:
db = FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)
db

<langchain_community.vectorstores.faiss.FAISS at 0x7b5e5480cdf0>

In [32]:
qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                    chain_type='stuff',
                                    retriever=db.as_retriever(search_kwargs={'k':4 }),
                                    return_source_documents=True,
                                    chain_type_kwargs={'prompt': prompt},
                                    )
qa_chain

RetrievalQA(combine_documents_chain=StuffDocumentsChain(llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['context', 'question'], template="Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n        Contexts: {context}\n\n        Question: {question}\n        Helpful Answer:"), llm=CTransformers(client=<ctransformers.llm.LLM object at 0x7b5e4d0e8a30>, model='TheBloke/Llama-2-7B-Chat-GGML', model_type='llama', config={'temperature': 0.01, 'max_new_tokens': 600, 'context_length': 1200})), document_variable_name='context'), return_source_documents=True, retriever=VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7b5e5480cdf0>, search_kwargs={'k': 4}))

In [34]:
res = qa_chain.invoke("What is Aurigo?")
res['result']

' Aurigo is a software as a service (SaaS) based product that enables infrastructure and construction agencies to efficiently collaborate with the general public potentially using a proposed infrastructure.'