In [29]:
# Imports
from langchain_community.document_loaders import PyPDFLoader
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

# Suppress warnings
import warnings
warnings.filterwarnings('ignore')

# Jupyter-specific imports
from IPython.display import display, Markdown

# Set environment variable for protobuf
import os
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"

In [30]:
local_path = "harry-potter-trivia-questions-pdf.pdf"
if local_path:
    loader = PyPDFLoader(file_path=local_path)
    data = loader.load()
    print(f"PDF loaded successfully: {local_path}")
else:
    print("Upload a PDF file")

PDF loaded successfully: harry-potter-trivia-questions-pdf.pdf


In [31]:
# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(data)
print(f"Text split into {len(chunks)} chunks")

Text split into 18 chunks


In [32]:
import subprocess

# Pull the model
result1 = subprocess.run(['ollama', 'pull', 'all‑mpnet‑base‑v2'], 
                       capture_output=True, text=True)

result2 = subprocess.run(['ollama', 'pull', 'llama3.2:1b'], 
                       capture_output=True, text=True)

# Create vector database
vector_db = Chroma.from_documents(
    documents=chunks,
    embedding=OllamaEmbeddings(model="nomic-embed-text"),
    collection_name="local-rag"
)
print("Vector database created successfully")

Vector database created successfully


In [33]:
# Set up LLM and retrieval
local_model = "llama3.2:1b"  # or whichever model you prefer
llm = ChatOllama(model=local_model)

In [34]:
# Query prompt template
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate 2
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

# Set up retriever
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(), 
    llm,
    prompt=QUERY_PROMPT
)

In [35]:
# RAG prompt template
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [36]:
# Create chain
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [37]:
def chat_with_pdf(question):
    """
    Chat with the PDF using the RAG chain.
    """
    return display(Markdown(chain.invoke(question)))

In [38]:
# Example 1
chat_with_pdf("What is the main idea of this document?")

The main idea of this document appears to be related to Harry Potter trivia questions, likely from a PDF guide or worksheet. The content includes various trivia questions and answers about characters, events, and concepts from the Harry Potter series by J.K. Rowling.

In [39]:
#Example 2
chat_with_pdf("Who is Fluffy")

The answer to the question "Who is Fluffy?" is Hagrid's three-headed dog, which is described as being a very intimidating and powerful creature.

In [40]:
chat_with_pdf("What are some trivia questions mentioned in the document?")

Based on the provided context, here are some trivia questions mentioned:

1. Trivia question about Rita Skeeter's type of Animagus: A beetle
2. Trivia question about what is a bezoar: An antidote to poison
3. Trivia question about Dumbledore's hand turning blackened and shriveled: Marvolo Gaunt’s ring (A horcrux)
4. Trivia questions about the first names of Voldemort's parents: Tom and Merope
5. Trivia question about Severus Snape being a half-blood prince: Yes, with his son Edward Remus Lupin as the full name.
6. Trivia question about what Dumbledore tells Harry he sees in the Mirror of Erised: Thick wollen socks
7. Trivia questions about Hogwarts houses and professors:
   - Ravenclaw: Moaning Myrtle's house
   - Slytherin: Sirius Black, Draco Malfoy (though not explicitly mentioned as a professor)
   - Gryffindor: James Potter, Ron Weasley
   - Hufflepuff: Harry Potter, Rubeus Hagrid
8. Trivia questions about magical creatures and plants:
   - Devil's Snare (trap that traps Harry, Ron, and Hermione on their way to get the Philosopher's Stone)
   - Nargles (magical creature infested with them)
   - Aragog (Hagrid's pet giant spider)
   - Bellatrix Lestrange
9. Trivia questions about magical devices:
   - Portkey (an ordinary object enchanted to teleport people)
   - Pensieve (a magical device used to store and review memories)
10. Trivia questions about the characters' abilities and traits:
    - Metamorphmagus (Tonks' ability to transform her features)
    - Veritaserum (the truth-telling potion that makes people tell the truth)
    - Expelliarmus (a spell to disarm an opponent)
    - Neville's toad's name is Trevor
11. Trivia questions about the Death Eaters:
    - A terrorist group of wizards and witches
12. Trivia questions about Hogwarts and its history:
    - The golden snitch
    - Yule ball
    - Death eaters
13. Trivia questions about Harry Potter's backstory:
    - Tom Riddle (young Lord Voldemort)
    - Horace Slughorn (founder of the Slug Club)
14. Trivia questions about the Hogwarts founders and their first names:
    - Godric Gryffindor, Salazar Slytherin, Helga Hufflepuff, Rowena Ravenclaw
15. Trivia question about when Harry Potter's birthday is: July 31