## Setting up the Gemini API

In [5]:
import os
from langchain.chat_models import init_chat_model

os.environ["GOOGLE_API_KEY"] = "AIzaSyCq9RkCd1Jw8dLUdSLQA2sbdL2d8Wmou1Q"

model = init_chat_model("google_genai:gemini-2.5-flash-lite")

## Setting up the HuggingFace Embeddings Model 

In [14]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

## Setting up FAISS


In [15]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

embedding_dim = len(embeddings.embed_query("hello world"))
index = faiss.IndexFlatL2(embedding_dim)

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

## TextSplitter

In [16]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
    chunk_overlap=200,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)

## Processing all the docs in the folder 

In [17]:
# load all PDF files from the ./docs folder using the existing PyPDFLoader
pdf_files = [
    os.path.join("docs", f)
    for f in os.listdir("docs")
    if os.path.isfile(os.path.join("docs", f)) and f.lower().endswith(".pdf")
]

all_pdf_docs = []
for path in pdf_files:
    loader = PyPDFLoader(path)
    loaded = loader.load()
    # ensure each document records its source file
    for d in loaded:
        d.metadata["source"] = path
    all_pdf_docs.extend(loaded)

print(f"Loaded {len(all_pdf_docs)} documents from {len(pdf_files)} PDF files.")
pdf_files[:10]

# --- chunk, embed, and upload PDFs to existing FAISS vector_store ---
# reuse the existing text_splitter defined in a later cell
pdf_splits = text_splitter.split_documents(all_pdf_docs)
print(f"Split {len(all_pdf_docs)} PDF docs into {len(pdf_splits)} chunks.")

# add to the existing vector_store (uses the embeddings already configured)
pdf_document_ids = vector_store.add_documents(documents=pdf_splits)
print(f"Uploaded {len(pdf_document_ids)} PDF chunks to FAISS. Sample ids: {pdf_document_ids[:3]}")

Ignoring wrong pointing object 9 0 (offset 0)
Ignoring wrong pointing object 19 0 (offset 0)
Ignoring wrong pointing object 47 0 (offset 0)
Ignoring wrong pointing object 52 0 (offset 0)
Ignoring wrong pointing object 55 0 (offset 0)
Ignoring wrong pointing object 57 0 (offset 0)
Ignoring wrong pointing object 59 0 (offset 0)
Ignoring wrong pointing object 72 0 (offset 0)
Ignoring wrong pointing object 233 0 (offset 0)


Loaded 60 documents from 3 PDF files.
Split 60 PDF docs into 252 chunks.
Uploaded 252 PDF chunks to FAISS. Sample ids: ['2e4831d0-3e08-487f-b83c-35aca832038a', 'aa5176dc-ab27-4f3c-a5b9-078d5ad1ef23', '63baf6b1-b5dc-4ae7-9e1a-65c358ea9890']


In [21]:
print(all_pdf_docs)

[Document(metadata={'producer': 'Mac OS X 10.10.5 Quartz PDFContext', 'creator': 'TeX', 'creationdate': "D:20150917102908Z00'00'", 'moddate': "D:20150917102908Z00'00'", 'source': 'docs/SMPL2015.pdf', 'total_pages': 16, 'page': 0, 'page_label': '1'}, page_content='SMPL: A Skinned Multi-Person Linear Model\nMatthew Loper⇤12 Naureen Mahmood†1 Javier Romero†1 Gerard Pons-Moll†1 Michael J. Black†1\n1Max Planck Institute for Intelligent Systems, T¨ubingen, Germany\n2Industrial Light and Magic, San Francisco, CA\nFigure 1: SMPL is a realistic learned model of human body shape and pose that is compatible with existing rendering engines, allows\nanimator control, and is available for research purposes. (left) SMPL model (orange) ﬁt to ground truth 3D meshes (gray). (right) Unity 5.0\ngame engine screenshot showing bodies from the CAESAR dataset animated in real time.\nAbstract\nWe present a learned model of human body shape and pose-\ndependent shape variation that is more accurate than previou

## Setting up the Model and System Prompt

In [22]:
from langchain.agents.middleware import dynamic_prompt, ModelRequest
from langchain.agents import create_agent

@dynamic_prompt
def prompt_with_context(request: ModelRequest) -> str:
    """Inject context into state messages."""
    last_query = request.state["messages"][-1].text
    retrieved_docs = vector_store.similarity_search(last_query)

    docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)

    system_message = (
        '''
        You are a research assistant specialized in analyzing scientific papers.

        You must only use the retrieved context to answer. If the answer cannot be fully supported by the provided context, say “Not enough information in the retrieved documents.”

        When answering:
        -Cite passages using the citation format used by the RAG system (e.g., [source_3]).
        -Do not hallucinate definitions, math, or claims not present in the retrieved text.
        -Use clear, structured, academic language.
        -When comparing or summarizing, reference which retrieved chunks support each point.
        -If the user asks for opinions, base them strictly on retrieved evidence.

        Your task: provide precise, context-grounded, verifiable answers.
        '''
        f"\n\n{docs_content}"
    )

    return system_message


agent = create_agent(model, tools=[], middleware=[prompt_with_context])

## Prompting

In [25]:
query = "what is smpl model, what does it do?"
for step in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


what is smpl model, what does it do?

The SMPL (Skinned Multi-Person Linear) model is a statistical body model that represents a person's shape and pose [source_1, source_13]. It is an additive model in vertex space [source_1]. The model is a function of joint angles and shape parameters [source_2].

The SMPL model learns blend shapes from a large set of training meshes to represent various poses [source_1]. It learns a simplified function that relates pose to blend-shape weights, which is linear in the elements of part rotation matrices [source_1]. This allows the model to generalize to different poses and is efficient for animation in game engines [source_1]. The model can be animated using conventional animation methods in software like Maya and Unity [source_4].
