# 📘 Agentic RAG Core Notebook
This notebook implements a LangGraph-based RAG pipeline using modular agents.

In [1]:
# Set base path for project
import os
import sys

base_path = os.path.abspath(os.path.join(os.getcwd()))
sys.path.append(os.path.join(base_path, "src"))

In [2]:
# Install dependencies
%pip install langgraph faiss-cpu sentence-transformers pdfplumber openai



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
%pip install openai==0.28


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [4]:
%pip install python-dotenv


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [5]:
from dotenv import load_dotenv
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [6]:
from utils.pdf_utils import extract_text_from_pdf, clean_text, chunk_text
from agents.embedder_agent import EmbedderAgent
from agents.retriever_agent import RetrieverAgent
from agents.qa_agent import QAAgent
from langgraph.graph import StateGraph

  from .autonotebook import tqdm as notebook_tqdm


## Step 1: Load and Chunk PDF

In [7]:
file_path = f"{base_path}/data/pharmacology_review.pdf"
raw_text = extract_text_from_pdf(file_path)
cleaned = clean_text(raw_text)
chunks = chunk_text(cleaned)


## Step 2: Initialize Agents

In [8]:
import os

print(f"Current base_path: {base_path}")
print(f"Contents of {base_path}:")
try:
    for item in os.listdir(base_path):
        print(item)
except FileNotFoundError:
    print(f"Error: Directory not found at {base_path}")

Current base_path: /Users/samo13loaner05/Desktop/PharmAGen
Contents of /Users/samo13loaner05/Desktop/PharmAGen:
.DS_Store
requirements.txt
02_agentic_rag_core.ipynb
.gitignore
.env
venv
.git
.vscode
data
outputs
notebooks
src


In [9]:
embedder = EmbedderAgent()
retriever = RetrieverAgent(chunks, embedder)
qa_agent = QAAgent()


  return forward_call(*args, **kwargs)


## Step 3: Build LangGraph

In [10]:
from langgraph.graph import StateGraph
from typing import TypedDict, List

In [12]:
class RAGState(TypedDict):
    query: str
    query_embedding: List[float]
    retrieved_chunks: List[str]
    answer: str

builder = StateGraph(RAGState)

builder.add_node("embedder", embedder.run)
builder.add_node("retriever", retriever.run)
builder.add_node("qa", qa_agent.run)

builder.set_entry_point("embedder")
builder.add_edge("embedder", "retriever")
builder.add_edge("retriever", "qa")

graph = builder.compile()

## Step 4: Query the Graph

In [None]:
query = "What is the half-life of Semaglutide?"
response = graph.invoke({"query": query})


{'query': 'What is the half-life of Semaglutide?', 'query_embedding': array([-2.45255344e-02,  1.97728965e-02, -2.34427652e-03,  6.53356761e-02,
       -4.13661003e-02, -9.75445732e-02, -7.67604560e-02,  1.07920542e-01,
        1.23784868e-02, -1.37841394e-02, -1.74341928e-02, -4.72524911e-02,
        1.10178266e-03, -3.14479321e-02, -3.39602008e-02,  3.70279737e-02,
        5.87818101e-02, -9.42537412e-02, -1.10493936e-02,  1.75741557e-02,
        6.49396237e-03, -2.39969045e-02, -1.57541726e-02,  3.60591933e-02,
        5.54192513e-02,  1.90579332e-02, -9.91290584e-02, -3.53672206e-02,
       -4.42136079e-02, -2.35726796e-02,  1.81931071e-02, -2.72807840e-04,
        4.53015156e-02, -1.64316036e-02,  9.81429871e-03,  3.47209051e-02,
       -3.01048364e-02, -1.90009177e-02,  5.66143123e-03,  5.46779223e-02,
       -5.58175594e-02, -4.56829071e-02, -1.01273127e-01,  2.21272539e-02,
       -1.74234305e-02, -1.11801349e-01, -3.46443579e-02,  1.79802421e-02,
        4.41783294e-03,  1.643

In [14]:
print(response["answer"])

The half-life of semaglutide is approximately 149 to 168 hours, which is about 6 to 7 days, depending on the study and administration method.
