In [1]:
# Imports
from langchain_community.document_loaders import PyPDFLoader
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

# Suppress warnings
import warnings
warnings.filterwarnings('ignore')

# Jupyter-specific imports
from IPython.display import display, Markdown

# Set environment variable for protobuf
import os
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"

In [2]:
local_path = "harry-potter-trivia-questions-pdf.pdf"
if local_path:
    loader = PyPDFLoader(file_path=local_path)
    data = loader.load()
    print(f"PDF loaded successfully: {local_path}")
else:
    print("Upload a PDF file")

PDF loaded successfully: harry-potter-trivia-questions-pdf.pdf


In [3]:
# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(data)
print(f"Text split into {len(chunks)} chunks")

Text split into 18 chunks


In [4]:
import subprocess

# Pull the model
result1 = subprocess.run(['ollama', 'pull', 'all‑mpnet‑base‑v2'], 
                       capture_output=True, text=True)

result2 = subprocess.run(['ollama', 'pull', 'llama3.2:1b'], 
                       capture_output=True, text=True)

# Create vector database
vector_db = Chroma.from_documents(
    documents=chunks,
    embedding=OllamaEmbeddings(model="nomic-embed-text"),
    collection_name="local-rag"
)
print("Vector database created successfully")

Vector database created successfully


In [5]:
# Set up LLM and retrieval
local_model = "llama3.2:1b"  # or whichever model you prefer
llm = ChatOllama(model=local_model)

In [6]:
# Query prompt template
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate 2
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

# Set up retriever
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(), 
    llm,
    prompt=QUERY_PROMPT
)

In [7]:
# RAG prompt template
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [8]:
# Create chain
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [9]:
def chat_with_pdf(question):
    """
    Chat with the PDF using the RAG chain.
    """
    return display(Markdown(chain.invoke(question)))

In [10]:
# Example 1
chat_with_pdf("What is the main idea of this document?")

The main idea of this document appears to be a collection or quiz about the Harry Potter series by J.K. Rowling, specifically covering various trivia questions and topics from the books. The content ranges from character names and spells (e.g., Rita Skeeter being an Animagus beetle) to events like Dumbledore's hand turning blackened and Voldemort's Horcrux creation.

In [11]:
#Example 2
chat_with_pdf("Who is Fluffy")

The answer to the question "Who is Fluffy" is Hagrid's three-headed dog.

In [13]:
chat_with_pdf("Who is Snape?")

Based on the provided context, it appears that Severus Snape is mentioned multiple times as the half-blood prince.

In [12]:
chat_with_pdf("What are some trivia questions mentioned in the document?")

Here are the trivia questions mentioned in the document:

1. Trivia Question: Rita Skeeter is an Animagus who transforms into a beetle.
2. Trivia Question: Dumbledore leaves Ron Weasley's name in his will as Deluminator.
3. Trivia Question: Ginny names the Pygmy Puff that she buys from Weasleys' Wizard Wheezes Arnold.
4. Trivia Question: Fred and George Weasley were born on April 1st.
5. Trivia Question: Harry manages to breathe underwater during the second task of the Triwizard Tournament by eating gillyweed.
6. Trivia Question: Tonks has the ability to transform her features due to being a Metamorphmagus.
7. Trivia Question: Dumbledore tells Harry that he sees "thick wollen socks" in the Mirror of Erised.
8. Trivia Question: The Philosopher's Stone is often infested with Nargles, magical creatures.
9. Trivia Question: James Potter transfigures into a stag.
10. Trivia Question: Hermione names her son Edward Remus Lupin after Professor Lupin.
11. Trivia Question: Dumbledore leaves a Vial of Felix Felicis in Ron Weasley's will.
12. Trivia Question: Ginny names the treehouse where Harry, Ron, and Hermione live under a spell to protect it from being destroyed by the Ministry of Magic.
13. Trivia Question: The Hogwarts house that Moaning Myrtle belongs to is Ravenclaw.
14. Trivia Question: James Potter transforms into a stag during his first year at Hogwarts.
15. Trivia Question: The Philosopher's Stone contains a bezoar, a magical plant trap.
16. Trivia Question: Professor McGonagall teaches Transfiguration at Hogwarts.
17. Trivia Question: Ginny names the Pygmy Puff that she bought from Weasleys' Wizard Wheezes after it is petrified by Mrs. Norris.
18. Trivia Question: Fleur Delacour was one of the four competitors in Goblet of Fire's Triwizard Tournament.
19. Trivia Question: When Harry Potter's birthday is, July 31st
20. Trivia Question: The Hogwarts school motto in English is Never tickle a sleeping dragon