In [1]:
import PyPDF2

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.llms import LlamaCpp


from langchain.embeddings import HuggingFaceEmbeddings # import hf embedding
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain


from langchain.prompts import PromptTemplate
from sentence_transformers import SentenceTransformer, util
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# Step 1: Preparing pdf metadata

In [2]:
pdf_files=["C:/Users/Mrinal Kalita/Python Projects/AIML Capstone Project - CV - Pneumonia Detection-1.pdf"]

In [3]:
def process_pdf(pdf_files):
    documents = []
    metadata = []
    content = []

    for i in pdf_files:

        pdf_read = PyPDF2.PdfReader(i)
        for ind, text in enumerate(pdf_read.pages):
            doc_page = {'title': i + " page " + str(ind + 1),
                        'content': pdf_read.pages[ind].extract_text()}
            documents.append(doc_page)
    for doc in documents:
        content.append(doc["content"])
        metadata.append({
            "title": doc["title"]
        })
    print("Content and metadata are extracted from the documents")
    return content, metadata

In [4]:
content, metadata = process_pdf(pdf_files)

Content and metadata are extracted from the documents


# Step 2: Split the content into smaller portion

In [5]:
def split_content(content, metadata):
    splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=512,chunk_overlap=256)
    smaller_docs = splitter.create_documents(content, metadatas=metadata)
    print(f"Docs are split into {len(smaller_docs)} passages")
    return smaller_docs

In [6]:
smaller_docs=split_content(content, metadata)

Docs are split into 7 passages


# Step 3: Ingest into Vector Database locally

In [7]:
def ingest_into_vectordb(smaller_docs):
    emb = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': 'cpu'})
    db = FAISS.from_documents(smaller_docs, emb)

    DB_FAISS_PATH = 'vectorstore/db_faiss'
    db.save_local(DB_FAISS_PATH)
    return db

In [10]:
vector =ingest_into_vectordb(smaller_docs)

# Step4 : LLM Prompt conversation

In [9]:
template = """[INST]
As an AI expert, based on the provided document,please provide accurate, important and relevant information. Your responses should follow the following guidelines:
- Answer the question based on the provided documents.
- Be direct, factual and precise while answering, limited to 50 words and 2-3 sentences. Begin your response without using introductory phrases like yes, no etc.
- Maintain an ethical, unbiased and neutral tone, avoiding harmful or offensive content.
- If the document does not contain relevant information, state "The document doesn't have any relevent information avilable."
- Do not include questions in your responses.
- Answer the questions directly. do not ask me questions
{question}
[/INST]
"""

#template = """Given the document and the current conversation between a user and an agent, your task is as follows: Answer any user query by using information from the document. The response should be detailed."""
callback = CallbackManager([StreamingStdOutCallbackHandler()])
def conversation_func(vector):
    llama_llm = LlamaCpp(
    model_path="llama-2-7b-chat.Q4_K_M.gguf",
    temperature=0.75,
    max_tokens=200,
    top_p=1,
    callback_manager=callback,
    n_ctx=3000)

    retriever = vector.as_retriever()
    CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(template)

    memory = ConversationBufferMemory(
        memory_key='history', return_messages=True, output_key='answer')

    conversation_chat = (ConversationalRetrievalChain.from_llm
                          (llm=llama_llm,
                           retriever=retriever,
                           #condense_question_prompt=CONDENSE_QUESTION_PROMPT,
                           memory=memory,
                           return_source_documents=True))
    print("Conversation function created for the LLM using the vector store")
    return conversation_chat

In [11]:
conversation_func(vector)

ValidationError: 1 validation error for LlamaCpp
__root__
  Could not load Llama model from path: llama-2-7b-chat.Q4_K_M.gguf. Received error Model path does not exist: llama-2-7b-chat.Q4_K_M.gguf (type=value_error)