In [27]:
import os
from dotenv import load_dotenv

from notion_client import Client

load_dotenv()

True

In [28]:
NOTION_API_KEY = os.getenv("NOTION_API_KEY")

In [30]:
notion = Client(auth=NOTION_API_KEY)

In [4]:
def get_all_databases(notion):
    databases = []
    cursor = None
    while True:
        response = notion.search(filter={"property": "object", "value": "database"}, start_cursor=cursor)
        databases.extend(response["results"])
        cursor = response.get("next_cursor")
        if not cursor:
            break
    return databases

In [5]:
def get_pages_from_database(notion, database_id):
    pages = []
    cursor = None
    while True:
        response = notion.databases.query(database_id=database_id, start_cursor=cursor)
        pages.extend(response["results"])
        cursor = response.get("next_cursor")
        if not cursor:
            break
    return pages

In [6]:
def find_child_pages(notion, parent_id):
    pages = []
    children = notion.blocks.children.list(parent_id)["results"]

    for block in children:
        if block["type"] == "child_page":
            pages.append(block)
        if block.get("has_children"):
            pages.extend(find_child_pages(notion, block["id"]))

    return pages

In [None]:


databases = get_all_databases(notion)

all_pages = []

# Step 2: Get pages from databases
for db in databases:
    db_id = db["id"]
    pages = get_pages_from_database(notion, db_id)
    all_pages.extend(pages)

# Step 3: Find nested child pages
for page in all_pages:
    child_pages = find_child_pages(notion, page["id"])
    all_pages.extend(child_pages)

# Step 4: Print or process
print(f"Found {len(all_pages)} pages.")
for page in all_pages:
    print(page["id"])

KeyboardInterrupt: 

In [8]:
def get_all_blocks(notion, block_id):
    """Recursively fetch all blocks inside a page or block"""
    all_blocks = []

    cursor = None
    while True:
        response = notion.blocks.children.list(block_id=block_id, start_cursor=cursor)
        results = response["results"]
        all_blocks.extend(results)

        for block in results:
            if block.get("has_children"):
                child_blocks = get_all_blocks(notion, block["id"])
                all_blocks.extend(child_blocks)

        cursor = response.get("next_cursor")
        if not cursor:
            break

    return all_blocks


In [9]:
def extract_text_from_block(block):
    block_type = block["type"]
    data = block.get(block_type, {})

    text = ""

    if "rich_text" in data:
        text = "".join([t["plain_text"] for t in data["rich_text"]])

    elif block_type == "child_page":
        text = f"[Child Page] {data['title']}"

    elif block_type == "image":
        text = f"[Image] {data.get('type')}: {data.get('external', {}).get('url', '')}"

    elif block_type == "code":
        text = f"[Code ({data.get('language', 'unknown')})]\n" + "".join([t["plain_text"] for t in data["rich_text"]])

    # Add more block types as needed (equation, video, file, etc.)

    return text


In [10]:
def extract_page_text(notion, page_id):
    blocks = get_all_blocks(notion, page_id)
    lines = []

    for block in blocks:
        text = extract_text_from_block(block)
        if text:
            lines.append(text)

    return "\n".join(lines)


In [15]:
page_id = "21615fcdd071807889b4c29a7f8a54b6"  # copy from URL or Notion client
text = extract_page_text(notion, page_id)
print(text)


[Child Page] Agenda
[Child Page] Notes
[Child Page] All tasks
[Child Page] LangChain for LLM Application Development Notes
[Child Page] LangChain: Chat with Your Data Notes
Summary: Basics of LangChain
[Child Page] Lesson 1: Models, Prompts and parsers
[Child Page] Lesson 2: Memory
[Child Page] Lesson 3: Chains
[Child Page] Lesson 4: Q&A over Documents
[Child Page] Lesson 5: Evaluation
[Child Page] Lesson 6: Agents
🚗 Prompt templates
🤔 Why: prevent prompt injection, reuse prompts.
📝 Important libraries: 
from langchain.chat_models import ChatOpenAI  # (clickable link to all providers available)
from langchain.prompts import ChatPromptTemplate
👩‍💻 Code example:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

template_string = """Translate the text \
that is delimited by triple backticks \
into a style that is {style}. \
text: ```{text}```
"""

prompt_template = ChatPromptTemplate.from_template(template_string)

print(prompt_template.message

In [16]:
def get_block_children(notion, block_id):
    children = []
    cursor = None
    while True:
        response = notion.blocks.children.list(block_id=block_id, start_cursor=cursor)
        children.extend(response["results"])
        cursor = response.get("next_cursor")
        if not cursor:
            break
    return children


In [17]:
def extract_text_from_block(block):
    block_type = block["type"]
    text = ""

    if block_type in ["paragraph", "heading_1", "heading_2", "heading_3",
                      "bulleted_list_item", "numbered_list_item", "quote", "callout", "to_do"]:
        for rt in block[block_type].get("rich_text", []):
            text += rt.get("plain_text", "")
    elif block_type == "child_page":
        text += f"[Child Page] {block['child_page']['title']}"
    elif block_type == "child_database":
        text += f"[Database] {block['child_database']['title']}"
    return text.strip()


In [36]:
def traverse_page(notion, block_id, depth=0):
    content = []
    blocks = get_block_children(notion, block_id)

    for block in blocks:
        prefix = "  " * depth
        # text = extract_text_from_block(block)
        # if text:
        #     content.append(f"{prefix}- {text}")

        # Recursively handle nested blocks
        if block.get("has_children"):
            content.extend(traverse_page(notion, block["id"], depth + 1))

        # Special case: child_database – query it
        if block["type"] == "child_database":
            db_id = block["id"]
            try:
                db_pages = get_pages_from_database(notion, db_id)
                for p in db_pages:
                    title = extract_page_title(p)
                    content.append(f"{prefix}  - 🗂️ Database Page: {title}")
                    content.extend(traverse_page(notion, p["id"], depth + 2))
            except Exception as e:
                print(str(e))
    
    return content


In [19]:
def get_pages_from_database(notion, database_id):
    pages = []
    cursor = None
    while True:
        response = notion.databases.query(database_id=database_id, start_cursor=cursor)
        pages.extend(response["results"])
        cursor = response.get("next_cursor")
        if not cursor:
            break
    return pages

def extract_page_title(page):
    props = page.get("properties", {})
    for val in props.values():
        if val["type"] == "title":
            return "".join([t["plain_text"] for t in val["title"]])
    return "[No Title]"


In [37]:
page_id = "21615fcdd071807889b4c29a7f8a54b6"

content = traverse_page(notion, page_id)
print("\n".join(content))


Could not find database with ID: 22a15fcd-d071-808c-ab19-cc42fad5a7a5. Make sure the relevant pages and databases are shared with your integration.
      - 🗂️ Database Page: Finnish  LangChain: Chat with Your Data 
      - 🗂️ Database Page: Try to find better interview preparation materials
      - 🗂️ Database Page: Collect all comments for movie-success-predictor
      - 🗂️ Database Page: Finnish at least 3 lessons in  LangChain: Chat with Your Data 
      - 🗂️ Database Page: Day 1 of interview-question-data-science 
      - 🗂️ Database Page: Finnish LangChain for LLM Application Development 
      - 🗂️ Database Page: Start running data collection for movie-success-predictor
  - 🗂️ Database Page: Data Science Interview Questions and Answers
  - 🗂️ Database Page: LangChain: Chat with Your Data
  - 🗂️ Database Page: LangChain for LLM Application Development
  - 🗂️ Database Page: Prompt Engineering for Vision Models
  - 🗂️ Database Page: ChatGPT Prompt Engineering for Developers
  - 🗂️ D

In [33]:
notion.databases.query(database_id="22a15fcdd071808cab19cc42fad5a7a5")

APIResponseError: Could not find database with ID: 22a15fcd-d071-808c-ab19-cc42fad5a7a5. Make sure the relevant pages and databases are shared with your integration.