## Installing dependencies

In [None]:
!pip install lancedb langchain langchain_community pypdf requests numpy

## Step 1: Extracting the relevant information

First, we'll load the local PDF file you specified (CPG.pdf).

In [None]:
import os
from langchain_community.document_loaders import PyPDFLoader

# Load the local PDF file
pdf_path = "CPG.pdf"  # Update this path if your PDF is in a different location
pdf_loader = PyPDFLoader(pdf_path)
docs = pdf_loader.load()

print(f"Loaded {len(docs)} pages from {pdf_path}")

## Step 2: Breaking the information into smaller chunks

Now we'll split the PDF content into smaller chunks for better processing and retrieval.

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = text_splitter.split_documents(docs)

print(f"Split into {len(chunks)} chunks")

## Step 3: Creating the embeddings and storing them in a vector database

We'll use a sentence transformer model to create embeddings for our text chunks and store them in a LanceDB database.

In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings

# Using a lightweight but effective embedding model
embedding_model_name = 'sentence-transformers/all-MiniLM-L6-v2'
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name, model_kwargs={'device': 'cpu'})

# Check embedding dimensions
test_query = "Test embedding dimensions"
embedding_dim = len(embeddings.embed_query(test_query))
print(f"Embedding dimension: {embedding_dim}")

### Storing the embeddings to a vector database

In [None]:
import lancedb
from langchain_community.vectorstores import LanceDB

# Create a LanceDB database
db = lancedb.connect("cpg_lance_db")

# Initialize the table with a sample
table = db.create_table(
    "cpg_data",
    data=[
        {
            "vector": embeddings.embed_query("Initialization vector"),
            "text": "Initialization vector",
            "id": "0",
        }
    ],
    mode="overwrite",
)

# Store document chunks and their embeddings
docsearch = LanceDB.from_documents(chunks, embeddings, connection=table)
print("Documents successfully embedded and stored in vector database")

## Step 4: Create a prompt template for the LLM

Let's create a comprehensive prompt template that incorporates context from our retrieved documents.

In [None]:
from langchain_core.prompts import ChatPromptTemplate

template = """
You are a helpful assistant that answers questions based on the provided context.

Context information is below:
---------------------
{context}
---------------------

Given the context information and not prior knowledge, answer the following question:
Question: {query}

If the answer cannot be determined from the context, say so.
"""

prompt = ChatPromptTemplate.from_template(template)

## Step 5 & 6: Set up the retriever to fetch relevant documents

Now we'll configure the retriever to search for the most relevant document chunks when given a query.

In [None]:
# Set up the retriever with parameters for how many documents to fetch
retriever = docsearch.as_retriever(search_kwargs={"k": 4})  # Fetch top 4 most relevant chunks

# Test the retriever with a sample query
test_query = "What are the main topics covered in this document?"
retrieved_docs = retriever.get_relevant_documents(test_query)

print(f"Retrieved {len(retrieved_docs)} documents for the test query")
print("\nSample of first retrieved document:")
print(retrieved_docs[0].page_content[:200] + "...")

## Step 7: Connect to the locally served LLM API

Instead of using Hugging Face Hub, we'll connect to your locally served LLM API.

In [None]:
from langchain_community.llms import OpenAI
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# Configure your local LLM API endpoint
# Change these parameters based on your local LLM API setup
local_llm_url = "http://localhost:8000/v1"  # Update with your actual API endpoint
api_key = "local-api-key"  # Use your API key or set to a dummy value if not required

# Set up the LLM to use your local API
llm = OpenAI(
    openai_api_key=api_key,
    openai_api_base=local_llm_url,
    streaming=True,
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
    temperature=0.3,
    max_tokens=1024,
    model_name="local-model"  # This can be any string that your API requires
)

## Alternative Step 7: Use LangChain's ChatOpenAI for chat models

If your local LLM is a chat model rather than a completion model, you might want to use this cell instead.

In [None]:
from langchain_openai import ChatOpenAI

# Uncomment and use this if your local LLM is a chat model
'''
llm = ChatOpenAI(
    openai_api_key=api_key,
    openai_api_base=local_llm_url,
    streaming=True,
    temperature=0.3,
    max_tokens=1024,
    model_name="local-model"  # This can be any string that your API requires
)
'''

## Step 8: Create a chain for invoking the LLM

Finally, we'll create a chain that combines our retriever, prompt template, and LLM to answer questions.

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# Create a helper function to format the context from retrieved documents
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

# Create the RAG chain
rag_chain = (
    {"context": retriever | format_docs, "query": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

## Testing the RAG Application

Now let's test our RAG application with some questions.

In [None]:
# Execute the RAG chain with a question
question = "What are the main recommendations in this document?"
print("\nQuestion:", question)
print("\nAnswer:")
response = rag_chain.invoke(question)

# The response is already printed via streaming due to the StreamingStdOutCallbackHandler

In [None]:
# Try another question
question = "What is the scope of this document?"
print("\nQuestion:", question)
print("\nAnswer:")
response = rag_chain.invoke(question)

## Bonus: Create a Simple Q&A Interface

Let's create a simple loop to ask multiple questions interactively.

In [None]:
def ask_question(question):
    print(f"\nQ: {question}")
    print("\nA: ", end="")
    return rag_chain.invoke(question)

# Interactive Q&A loop
while True:
    user_question = input("\nEnter your question (or type 'exit' to quit): ")
    if user_question.lower() == 'exit':
        print("Exiting Q&A session.")
        break
    
    ask_question(user_question)