# 📘 Agentic RAG Core Notebook
This notebook implements a LangGraph-based RAG pipeline using modular agents.

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
# Set base path for project
import sys
base_path = '/content/drive/MyDrive/PharmaAGen'
sys.path.append("/content/drive/MyDrive/PharmaAGen/src")

In [7]:
for p in sys.path:
    print(p)

/content
/env/python
/usr/lib/python311.zip
/usr/lib/python3.11
/usr/lib/python3.11/lib-dynload

/usr/local/lib/python3.11/dist-packages
/usr/lib/python3/dist-packages
/usr/local/lib/python3.11/dist-packages/IPython/extensions
/root/.ipython
/content/drive/MyDrive/PharmaAGen/src
/content/drive/MyDrive/PharmaAGen/src


In [2]:
# Install dependencies
!pip install langgraph faiss-cpu sentence-transformers pdfplumber


Collecting langgraph
  Downloading langgraph-0.6.2-py3-none-any.whl.metadata (6.8 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0.post1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.0 kB)
Collecting pdfplumber
  Downloading pdfplumber-0.11.7-py3-none-any.whl.metadata (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting langgraph-checkpoint<3.0.0,>=2.1.0 (from langgraph)
  Downloading langgraph_checkpoint-2.1.1-py3-none-any.whl.metadata (4.2 kB)
Collecting langgraph-prebuilt<0.7.0,>=0.6.0 (from langgraph)
  Downloading langgraph_prebuilt-0.6.2-py3-none-any.whl.metadata (4.5 kB)
Collecting langgraph-sdk<0.3.0,>=0.2.0 (from langgraph)
  Downloading langgraph_sdk-0.2.0-py3-none-any.whl.metadata (1.5 kB)
Collecting pdfminer.six==20250506 (from pdfplumber)
  Downloading pdfminer_six-20250506-py3-none-any.whl.metadata (4.2 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
 

In [32]:
!ls /content/drive/MyDrive/PharmaAGen/src/utils/

ls: cannot access '/content/drive/MyDrive/PharmaAGen/src/utils/': No such file or directory


In [6]:
# Imports and setup
import sys
# Removed redundant sys.path.append as base_path is now set correctly
sys.path.append(base_path + '/src')

In [8]:
from utils.pdf_utils import extract_text_from_pdf, clean_text, chunk_text
from agents.embedder_agent import EmbedderAgent
from agents.retriever_agent import RetrieverAgent
from agents.qa_agent import QAAgent
from langgraph.graph import StateGraph

ModuleNotFoundError: No module named 'utils'

## Step 1: Load and Chunk PDF

In [None]:
file_path = f"{base_path}/data/sample_docs/semaglutide_review.pdf"
raw_text = extract_text_from_pdf(file_path)
cleaned = clean_text(raw_text)
chunks = chunk_text(cleaned)


## Step 2: Initialize Agents

In [8]:
import os

print(f"Current base_path: {base_path}")
print(f"Contents of {base_path}:")
try:
    for item in os.listdir(base_path):
        print(item)
except FileNotFoundError:
    print(f"Error: Directory not found at {base_path}")

Current base_path: /content/drive/MyDrive/PharmaAGen
Contents of /content/drive/MyDrive/PharmaAGen:
Error: Directory not found at /content/drive/MyDrive/PharmaAGen


In [None]:
embedder = EmbedderAgent()
retriever = RetrieverAgent(chunks, embedder)
qa_agent = QAAgent()


## Step 3: Build LangGraph

In [None]:
builder = StateGraph()

builder.add_node("embedder", embedder.run)
builder.add_node("retriever", retriever.run)
builder.add_node("qa", qa_agent.run)

builder.set_entry_point("retriever")
builder.add_edge("retriever", "qa")

graph = builder.compile()


## Step 4: Query the Graph

In [None]:
query = "What is the half-life of Semaglutide?"
response = graph.invoke({"query": query})
print(response)
