In [11]:
import os
from typing import Any, Dict, List, TypedDict

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableConfig
from langchain_openai import ChatOpenAI
from langgraph.constants import Send
from langgraph.graph import END, START, StateGraph

# Initialize the language model
llm = ChatOpenAI(model="gpt-4o-mini")

# Define the state structure
class State(TypedDict):
    pdf_path: str
    text: str
    content_table: List[str]
    word_counts: Dict[str, int]
    chapters: Dict[str, str]
    final_document: str

# Node 1: Extract Text from PDF
def extract_text_from_pdf(state: State, config: RunnableConfig) -> Dict[str, Any]:
    import fitz  # PyMuPDF
    doc = fitz.open(state["pdf_path"])
    text = "".join(page.get_text() for page in doc)
    return {"text": text.strip()}

# Node 2: Generate a Content Table
async def generate_content_table(state: State, config: RunnableConfig) -> Dict[str, Any]:
    prompt = ChatPromptTemplate([
        ("system", "You are an expert in creating structured content tables for educational materials."),
        ("user", "Based on the following text, create a content table with chapters and subtopics for a teacher-friendly transcript:\n\n{context}")
    ])
    content_table_chain = prompt | llm | StrOutputParser()
    content_table = await content_table_chain.ainvoke({"context": state["text"]}, config)
    return {"content_table": content_table.split("\n")}

# Node 3: Assign Word Counts
def assign_word_counts(state: State, config: RunnableConfig) -> Dict[str, Any]:
    total_words = 3900
    num_chapters = len(state["content_table"])
    words_per_chapter = total_words // num_chapters
    word_counts = {chapter: words_per_chapter for chapter in state["content_table"]}
    return {"word_counts": word_counts}

# Node 4: Fill Each Chapter
async def fill_each_chapter(state: State, config: RunnableConfig) -> Dict[str, Any]:
    chapters = {}
    for chapter in state["content_table"]:
        prompt = ChatPromptTemplate([
            ("system", f"You are writing detailed content for the chapter '{chapter}' in an educational transcript."),
            ("user", f"Based on the following text, write content for the chapter '{chapter}' with approximately {state['word_counts'][chapter]} words:\n\n{{context}}")
        ])
        fill_chapter_chain = prompt | llm | StrOutputParser()
        chapter_content = await fill_chapter_chain.ainvoke({"context": state["text"]}, config)
        chapters[chapter] = chapter_content
    return {"chapters": chapters}

# Node 5: Assemble the Final Document
def assemble_final_document(state: State, config: RunnableConfig) -> Dict[str, Any]:
    final_document = "\n\n".join(f"## {chapter}\n\n{content}" for chapter, content in state["chapters"].items())
    return {"final_document": final_document}

# Define the graph
graph = StateGraph(State)
graph.add_node("extract_text_from_pdf", extract_text_from_pdf)
graph.add_node("generate_content_table", generate_content_table)
graph.add_node("assign_word_counts", assign_word_counts)
graph.add_node("fill_each_chapter", fill_each_chapter)
graph.add_node("assemble_final_document", assemble_final_document)

# Define the workflow
graph.add_edge(START, "extract_text_from_pdf")
graph.add_edge("extract_text_from_pdf", "generate_content_table")
graph.add_edge("generate_content_table", "assign_word_counts")
graph.add_edge("assign_word_counts", "fill_each_chapter")
graph.add_edge("fill_each_chapter", "assemble_final_document")
graph.add_edge("assemble_final_document", END)

# Compile the graph into an application
app = graph.compile()

# Execute the graph
initial_state = {"pdf_path": "example_data/Practical Test v2.pdf"}
result = app.invoke(initial_state)
print(result["final_document"])


TypeError: No synchronous function provided to "generate_content_table".
Either initialize with a synchronous function or invoke via the async API (ainvoke, astream, etc.)