In [20]:
from pathlib import Path

md_text = Path("files/coffee_shops_sf.md").read_text(encoding="utf-8")
print(md_text[:500])  # preview first 500 chars

# San Francisco Coffee Shops Research

## Top Coffee Shops in San Francisco

### Blue Bottle Coffee
- **Founded**: 2002 in Oakland, expanded to SF
- **Specialty**: Single-origin coffee, pour-over methods
- **Notable**: Known for freshly roasted beans, minimalist aesthetic
- **Locations**: Multiple SF locations including Ferry Building

### Philz Coffee
- **Founded**: 2003 in San Francisco
- **Specialty**: Custom blended coffee, personalized service
- **Notable**: Founded by Phil Jaber, known for


In [21]:
import uuid
from typing import TypedDict, List
from langgraph.graph import StateGraph, START, END
from pydantic import BaseModel, Field
from langchain.chat_models import init_chat_model


In [29]:

# Initialize Model
model = init_chat_model("google_genai:models/gemini-2.5-flash-lite")

# --- 1. TypedDicts (The Internal Memory) ---
class Checkpoint(TypedDict):
    id: str
    name: str
    objective: str  # Matches Pydantic field below
    study_material: str
    quiz_questions: list[str]

class State(TypedDict):
    report: str
    checkpoints: list[Checkpoint]

# --- 2. Pydantic Models (The LLM Interface) ---
class CheckpointItem(BaseModel):
    name: str = Field(description="The name of the checkpoint")
    objective: str = Field(description="the specific objective of the checkpoint") # FIXED: typo 'objetive' -> 'objective'

class CheckpointResponse(BaseModel):
    checkpoints: List[CheckpointItem]

class CheckpointContent(BaseModel):
    study_material: str = Field(description="Brief study material (approx 100 words)")
    quiz_questions: List[str] = Field(description="Exactly 3 assessment questions")

# Setup LLM
model_with_structured_output = model.with_structured_output(CheckpointResponse)
content_gen=model.with_structured_output(CheckpointContent)


In [23]:
# --- 3. The Node ---
def generate(state: State):
    report = state['report']
    
    # Fixed variable name spelling
    response = model_with_structured_output.invoke(f"extract and breakdown this report {report} into learning checkpoints")
    
    clean_checkpoints = []
    for item in response.checkpoints:
        data = item.model_dump()
        
        # Inject ID
        data['id'] = str(uuid.uuid4())
        
        clean_checkpoints.append(data)
    
    # FIXED: Key must match State definition exactly ("checkpoints")
    return {"checkpoints": clean_checkpoints} 

# --- 4. Build Graph ---
builder = StateGraph(State)
builder.add_node("generate", generate)
builder.add_edge(START, "generate")
builder.add_edge("generate", END)

graph = builder.compile()

# --- 5. Run ---
# Define the variable first
# report_var = "This is a report about learning Python." 
result = graph.invoke({"report": md_text})

print(result)

{'report': '# San Francisco Coffee Shops Research\n\n## Top Coffee Shops in San Francisco\n\n### Blue Bottle Coffee\n- **Founded**: 2002 in Oakland, expanded to SF\n- **Specialty**: Single-origin coffee, pour-over methods\n- **Notable**: Known for freshly roasted beans, minimalist aesthetic\n- **Locations**: Multiple SF locations including Ferry Building\n\n### Philz Coffee\n- **Founded**: 2003 in San Francisco\n- **Specialty**: Custom blended coffee, personalized service\n- **Notable**: Founded by Phil Jaber, known for unique blends like "Tesora"\n- **Locations**: Original location in Mission District, now citywide\n\n### Sightglass Coffee\n- **Founded**: 2009\n- **Specialty**: Third-wave coffee, roasting transparency\n- **Notable**: Large roastery with viewing windows, industrial aesthetic\n- **Location**: SOMA district flagship location\n\n### Ritual Coffee Roasters\n- **Founded**: 2005\n- **Specialty**: Direct trade relationships, seasonal offerings\n- **Notable**: Pioneer in San F

In [24]:
result

{'report': '# San Francisco Coffee Shops Research\n\n## Top Coffee Shops in San Francisco\n\n### Blue Bottle Coffee\n- **Founded**: 2002 in Oakland, expanded to SF\n- **Specialty**: Single-origin coffee, pour-over methods\n- **Notable**: Known for freshly roasted beans, minimalist aesthetic\n- **Locations**: Multiple SF locations including Ferry Building\n\n### Philz Coffee\n- **Founded**: 2003 in San Francisco\n- **Specialty**: Custom blended coffee, personalized service\n- **Notable**: Founded by Phil Jaber, known for unique blends like "Tesora"\n- **Locations**: Original location in Mission District, now citywide\n\n### Sightglass Coffee\n- **Founded**: 2009\n- **Specialty**: Third-wave coffee, roasting transparency\n- **Notable**: Large roastery with viewing windows, industrial aesthetic\n- **Location**: SOMA district flagship location\n\n### Ritual Coffee Roasters\n- **Founded**: 2005\n- **Specialty**: Direct trade relationships, seasonal offerings\n- **Notable**: Pioneer in San F

content="Parrots have colorful feathers for a variety of reasons, and it's a fascinating interplay of **evolution, communication, and survival**. Here's a breakdown of the main drivers behind their vibrant plumage:\n\n**1. Communication and Social Signaling:**\n\n*   **Mate Attraction:** This is arguably the most significant reason. Brighter, more elaborate, and healthier-looking feathers can signal a parrot's fitness and desirability to potential mates. Males, in particular, often display more vibrant colors to attract females. Think of it like a peacock's tail – a visual advertisement of genetic quality.\n*   **Species Recognition:** In dense rainforests where visibility can be limited, distinct color patterns help parrots identify members of their own species. This is crucial for flocking, mating, and avoiding hybridization. Imagine trying to find your family in a crowded marketplace without distinct clothing – colors serve a similar purpose.\n*   **Social Status and Dominance:** Wi

In [33]:
import uuid
from typing import TypedDict, List
from langgraph.graph import StateGraph, START, END
from pydantic import BaseModel, Field
from langchain.chat_models import init_chat_model

# Initialize Model
model = init_chat_model("google_genai:models/gemini-flash-latest")

# --- 1. STATE DEFINITIONS ---
class Checkpoint(TypedDict):
    id: str
    name: str
    objective: str
    study_material: str 
    quiz_questions: list[str]

class State(TypedDict):
    report: str
    user_request: str
    checkpoints: list[Checkpoint]

# --- 2. PYDANTIC MODELS ---

# For Node 1
class CheckpointItem(BaseModel):
    name: str = Field(description="Name of the checkpoint")
    objective: str = Field(description="Objective of the checkpoint")

class CheckpointResponse(BaseModel):
    checkpoints: List[CheckpointItem]

# For Node 2
class CheckpointContent(BaseModel):
    study_material: str = Field(description="Brief study material (approx 300 words)")
    quiz_questions: List[str] = Field(description="Exactly 3 assessment questions")

# Setup LLMs
structure_gen = model.with_structured_output(CheckpointResponse)
content_gen = model.with_structured_output(CheckpointContent)


# --- 3. NODE 1: GENERATE STRUCTURE ---
def generate_structure(state: State):
    report = state['report']
    response = structure_gen.invoke(f"Extract checkpoints from: {report}")
    
    clean_checkpoints = []
    for item in response.checkpoints:
        data = item.model_dump()
        data['id'] = str(uuid.uuid4())
        # Initialize placeholders
        data['study_material'] = "" 
        data['quiz_questions'] = []
        clean_checkpoints.append(data)
        
    return {"checkpoints": clean_checkpoints}


# --- 4. NODE 2: CREATE CONTENT (UPDATED WITH BATCH) ---
def create_content(state: State):
    report = state['report']
    user_req = state['user_request']
    checkpoints = state['checkpoints']
    
    # A. Prepare the list of prompts (Input List)
    batch_prompts = []
    
    for cp in checkpoints:
        prompt = f"""
        Context: {report}
        User Goal: {user_req}
        
        Task: Create content for this checkpoint:
        - Topic: {cp['name']}
        - Objective: {cp['objective']}
        """
        batch_prompts.append(prompt)
    
    # B. Execute Batch (Parallel Processing)
    # The model processes all prompts effectively at the same time
    # Returns a list of CheckpointContent objects
    batch_results = content_gen.batch(batch_prompts)
    
    # C. Map results back to the checkpoints
    # zip() pairs the original checkpoint with its corresponding result
    updated_checkpoints = []
    for cp, result in zip(checkpoints, batch_results):
        cp['study_material'] = result.study_material
        cp['quiz_questions'] = result.quiz_questions
        updated_checkpoints.append(cp)
    
    return {"checkpoints": updated_checkpoints}


# --- 5. GRAPH SETUP ---
builder = StateGraph(State)
builder.add_node("generate_structure", generate_structure)
builder.add_node("create_content", create_content)

builder.add_edge(START, "generate_structure")
builder.add_edge("generate_structure", "create_content")
builder.add_edge("create_content", END)

graph = builder.compile()

# --- 6. EXECUTION ---
inputs = {
    "report": md_text,
    "user_request": "I want to learn best coffee shops in San Francisco."
}

result = graph.invoke(inputs)

# Verify Output
import json
print(json.dumps(result["checkpoints"], indent=2))

[
  {
    "name": "Blue Bottle Coffee",
    "objective": "Focus on single-origin coffee and pour-over methods, known for freshly roasted beans and minimalist aesthetic.",
    "id": "6f4511ab-0cfa-402d-96f1-40e173666c14",
    "study_material": "Blue Bottle Coffee, though founded in Oakland in 2002, quickly became a cornerstone of San Francisco's specialty coffee landscape. The brand distinguishes itself through a strong commitment to sourcing and preparation precision. A central focus for Blue Bottle is the use of **single-origin coffee**, meaning their beans are typically sourced from a specific farm or region to highlight unique flavor profiles and the coffee's terroir. To ensure these unique characteristics are fully expressed, Blue Bottle heavily relies on meticulous brewing techniques, most notably **pour-over methods**. This manual process allows for precise control over water temperature and flow, resulting in a clean and defined cup.\r\rThe company is celebrated for its dedicati

In [31]:
result

{'report': 'Python uses indentation. Lists are mutable. Dictionaries are key-value pairs.',
 'user_request': 'I want to learn python basics',
 'checkpoints': [{'name': 'Python Syntax',
   'objective': 'Understand that Python uses indentation for code structure.',
   'id': '34561e89-8f15-484d-bf77-d0c4214b63e9',
   'study_material': "Unlike languages that use curly braces ({}) to define blocks of code, Python uses indentation. Indentation is mandatory and defines the logical structure of the program, marking which lines belong to conditional statements (like 'if'), loops ('for', 'while'), or functions. Standard Python convention dictates using exactly four spaces for each level of indentation. Mixing tabs and spaces or using inconsistent indentation levels within the same block will result in an 'IndentationError' or 'TabError'. This strict requirement enforces clean, readable code, making indentation a critical part of Python's syntax.",
   'quiz_questions': ['What is the primary role 