In [None]:
# This cell imports all the necessary libraries and adds the project's root 
# directory to the system path. This allows us to import our own modules, 
# like the `rag.py` and `settings.py` files we created earlier.

import os
import sys
import pprint
from dotenv import load_dotenv

# Add the project root to the Python path
# This assumes the notebook is in a subdirectory of the project root.
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
    print(f"Added '{project_root}' to the system path.")

# Now we can import our own modules
from src.backend.core.rag import get_retriever
from src.backend.core.settings import settings

# LangChain specific imports
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI

In [5]:
# # 2. Configuration and Model Initialization
#
# Here we load our environment variables, define which book we want to test,
# and initialize the LLM.

# Load environment variables from the .env file in the project root
load_dotenv() 

# -- IMPORTANT: Set this to the ID of the book you want to test --
# The book_id is the filename of your PDF without the .pdf extension.
# This should match one of the directories created by the ingestion script.
BOOK_ID = "David Foster - Generative Deep Learning_ Teaching Machines To Paint, Write, Compose, and Play (2023, O'Reilly Media) - libgen.li"

# Initialize the LLM from our settings
llm = ChatGoogleGenerativeAI(model=settings.LLM_MODEL, temperature=0)

print(f"Configuration loaded. Testing with book: '{BOOK_ID}'")

Configuration loaded. Testing with book: 'David Foster - Generative Deep Learning_ Teaching Machines To Paint, Write, Compose, and Play (2023, O'Reilly Media) - libgen.li'


In [8]:
# # 3. Load the Retriever
#
# We will now use the `get_retriever` function from our application's code.
# This ensures we are testing the exact same logic that our final API will use.

retriever = get_retriever(book_id=BOOK_ID)

if retriever:
    print("\nRetriever loaded successfully!")
else:
    print("\nFailed to load retriever. Please check the BOOK_ID and ensure you have run the ingestion script.")



Retriever loaded successfully!


In [13]:
# # 4. Test 1: Document Retrieval
#
# Let's test the retriever by itself. We'll ask it a question and see what raw
# documents (chunks) it pulls from the vector store. This helps verify that
# the retrieval part of RAG is working correctly.

query = "implementation code of DCGAN."

print(f"Testing retriever with query: '{query}'\n")

if retriever:
    # .invoke() runs the retriever and gets the results
    retrieved_docs = retriever.invoke(query)

    print(f"Retrieved {len(retrieved_docs)} documents:\n")
    
    # Use pprint for a cleaner print of the documents
    for i, doc in enumerate(retrieved_docs):
        print(f"--- Document {i+1} ---")
        print(f"Source: {doc.metadata.get('source', 'N/A')}")
        print(f"Page: {doc.metadata.get('page', 'N/A')}")
        print(f"Content: {doc.page_content[:]}...\n")
else:
    print("Retriever not available. Cannot perform test.")


Testing retriever with query: 'implementation code of DCGAN.'

Retrieved 4 documents:

--- Document 1 ---
Source: D:\Programming\Programming Tutor\programming_tutor\scripts\..\data\David Foster - Generative Deep Learning_ Teaching Machines To Paint, Write, Compose, and Play (2023, O'Reilly Media) - libgen.li.pdf
Page: 125
Content: Networks. ”2 In this 2015 paper, the authors show how to build a deep convolutional
GAN to generate realistic images from a variety of datasets. They also introduce sev‐
eral changes that significantly improve the quality of the generated images.
Running the Code for This Example
The code for this example can be found in the Jupyter notebook
located at notebooks/04_gan/01_dcgan/dcgan.ipynb in the book
repository.
The Bricks Dataset
First, you’ll need to download the training data. We’ll be using the Images of LEGO
Bricks dataset that is available through Kaggle. This is a computer-rendered collec‐
tion of 40,000 photographic images of 50 different toy bricks,

In [14]:
# # 5. Test 2: Full RAG Chain (End-to-End Test)
#
# Now, let's build and test the complete RAG chain. This chain will:
# 1. Take a question.
# 2. Use the retriever to fetch relevant documents.
# 3. "Stuff" the documents and the question into a prompt.
# 4. Pass the prompt to the LLM to generate a final answer.

if retriever:
    # Define the prompt template for the LLM
    template = """
    You are an AI assistant for question-answering tasks.
    Use the following pieces of retrieved context to answer the question.
    If you don't know the answer, just say that you don't know.
    Keep the answer concise and helpful.

    Context:
    {context}

    Question:
    {question}

    Answer:
    """
    
    prompt = PromptTemplate.from_template(template)

    # This is the LangChain Expression Language (LCEL) way of building a chain
    rag_chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )

    print("RAG chain created. Ready to answer questions.")

RAG chain created. Ready to answer questions.


In [16]:
# # Run the RAG Chain
#
# Now we can invoke the chain with our question and get the final answer.
if 'rag_chain' in locals():
    question_to_ask = "What are the components of a Generative Adversarial Network (GAN)?"
    
    print(f"Asking the RAG chain: '{question_to_ask}'\n")
    
    # .invoke() on the chain runs the full process
    final_answer = rag_chain.invoke(question_to_ask)
    
    print("--- Generated Answer ---")
    print(final_answer)
else:
    print("RAG chain not created. Cannot run this cell.")


Asking the RAG chain: 'What are the components of a Generative Adversarial Network (GAN)?'

--- Generated Answer ---
A GAN consists of two networks: a generator and a discriminator.  The generator creates images, and the discriminator tries to distinguish between real and generated images.  The training process involves alternating training of these two networks.
