In [8]:
# Import necessary libraries
from langchain_ollama import OllamaEmbeddings
from langchain_ollama import ChatOllama
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

# Import FAISS and related libraries
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from uuid import uuid4

# 1. Load PDF Document
pdf_path = "dietarySupplements.pdf"
loader = PyMuPDFLoader(pdf_path)
documents = loader.load()

# 2. Split the documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,  # Adjust based on your document
    chunk_overlap=50  # Some overlap to maintain context
)
split_docs = text_splitter.split_documents(documents)

# 3. Create Embeddings using Ollama
embeddings = OllamaEmbeddings(model='nomic-embed-text', base_url='http://localhost:11434')

# 4. Create FAISS Vector Store
def create_faiss_vector_store(documents, embeddings):
    # Create a FAISS index
    embedding_dimension = len(embeddings.embed_query("test"))
    index = faiss.IndexFlatL2(embedding_dimension)
    
    # Create vector store
    vector_store = FAISS(
        embedding_function=embeddings,
        index=index,
        docstore=InMemoryDocstore({str(uuid4()): doc for doc in documents}),
        index_to_docstore_id={}
    )
    
    return vector_store

# Create FAISS vector store
vectorstore = create_faiss_vector_store(split_docs, embeddings)

# 5. Create Retriever
retriever = vectorstore.as_retriever(
    search_kwargs={
        "k": 3,  # Retrieve top 3 most relevant chunks
        "search_type": "mmr"  # Maximal Marginal Relevance
    }
)

# 6. Create Chat Model using Ollama
chat_model = ChatOllama(model='llama3.2:3b', base_url='http://localhost:11434')

# 7. Create RAG Prompt Template
prompt_template = ChatPromptTemplate.from_template("""
You are an expert assistant. Answer the question based only on the following context:

{context}

Question: {question}
""")

query = "What is the main topic of this document?"
retrieved_docs = retriever.invoke(query)

print("=== Retrieved Documents ===")
for i, doc in enumerate(retrieved_docs, 1):
    print(f"\nDocument {i}:")
    print("Content:", doc.page_content)
    print("Metadata:", doc.metadata)
# 8. Create RAG Chain
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt_template
    | chat_model
    | StrOutputParser()
)

# 9. Example Usage
query = "What is the main topic of this document?"
response = rag_chain.invoke(query)
print(response)

# Optional: Save and load the vector store
def save_and_load_example():
    # Save the vector store locally
    vectorstore.save_local("faiss_index")
    
    # Load the vector store
    loaded_vectorstore = FAISS.load_local(
        "faiss_index", 
        embeddings, 
        allow_dangerous_deserialization=True
    )
    
    # Use the loaded vector store
    loaded_retriever = loaded_vectorstore.as_retriever()
    return loaded_retriever

# Demonstrate saving and loading
loaded_retriever = save_and_load_example()


=== Retrieved Documents ===
I'm happy to help, but there's no context provided for the question. Could you please provide more information or clarify what document we're referring to? I'll do my best to answer your question accurately.
