In [1]:
!pip install langchain==0.3.0 langchain-openai==0.2.0 langgraph==0.2.22 httpx==0.27.2

Collecting langchain==0.3.0
  Downloading langchain-0.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-openai==0.2.0
  Downloading langchain_openai-0.2.0-py3-none-any.whl.metadata (2.6 kB)
Collecting langgraph==0.2.22
  Downloading langgraph-0.2.22-py3-none-any.whl.metadata (13 kB)
Collecting httpx==0.27.2
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain==0.3.0)
  Downloading sqlalchemy-2.0.41-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting langchain-core<0.4.0,>=0.3.0 (from langchain==0.3.0)
  Downloading langchain_core-0.3.60-py3-none-any.whl.metadata (5.8 kB)
Collecting langchain-text-splitters<0.4.0,>=0.3.0 (from langchain==0.3.0)
  Downloading langchain_text_splitters-0.3.8-py3-none-any.whl.metadata (1.9 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain==0.3.0)
  Downloading langsmith-0.1.147-py3-none-any.whl.metadata (14 kB)
Collecting numpy<2,>=1 (from langchain==0

# 日本語

In [2]:
import operator
from typing import Annotated, Any, Optional

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langgraph.graph import END, StateGraph
from pydantic import BaseModel, Field

import os
from google.colab import userdata

# 環境変数設定（Colabでの実行を想定したサンプル）
os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = userdata.get("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_PROJECT"] = "agent-book"

In [4]:
import operator
from typing import Annotated, Any, Optional

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langgraph.graph import END, StateGraph
from pydantic import BaseModel, Field

import os
from google.colab import userdata

# 環境変数設定（Colabでの実行を想定したサンプル）
os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = userdata.get("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_PROJECT"] = "agent-book"


# --------------------------------------------------------------------------------
# 1. データモデル
# --------------------------------------------------------------------------------

# ペルソナを表すデータモデル
class Persona(BaseModel):
    name: str = Field(..., description="ペルソナの名前")
    background: str = Field(..., description="ペルソナの持つ背景")
    # 追加例: 検索意図や知識レベル、利用デバイス等
    knowledge_level: Optional[str] = Field(None, description="トピックに対する知識レベル")
    search_motivation: Optional[str] = Field(None, description="検索の主目的・動機")

# ペルソナのリストを表すデータモデル
class Personas(BaseModel):
    personas: list[Persona] = Field(
        default_factory=list, description="ペルソナのリスト"
    )

# インタビュー内容を表すデータモデル（1つの質問→回答ペア）
class Interview(BaseModel):
    persona: Persona = Field(..., description="インタビュー対象のペルソナ")
    question: str = Field(..., description="インタビューでの質問")
    answer: str = Field(..., description="インタビューでの回答")

# インタビュー結果のリスト
class InterviewResult(BaseModel):
    interviews: list[Interview] = Field(
        default_factory=list, description="インタビュー結果のリスト"
    )

# SEO用の情報をまとめるデータモデル例
class SEOData(BaseModel):
    main_keywords: list[str] = Field(default_factory=list)
    sub_keywords: list[str] = Field(default_factory=list)
    search_intent_analysis: str = Field("", description="検索意図のまとめや考察など")

# エージェントのステート
class InterviewState(BaseModel):
    user_request: str = Field(..., description="ユーザーからのリクエスト")
    personas: Annotated[list[Persona], operator.add] = Field(
        default_factory=list, description="生成されたペルソナのリスト"
    )
    interviews: Annotated[list[Interview], operator.add] = Field(
        default_factory=list, description="実施されたインタビューのリスト"
    )
    # 要約後のインタビューを保持するフィールド
    summarized_interviews: Annotated[list[Interview], operator.add] = Field(
        default_factory=list, description="重複を排除・要約したインタビュー内容"
    )
    seo_data: Optional[SEOData] = Field(
        default=None, description="自動生成されたSEO関連情報"
    )
    requirements_doc: str = Field(default="", description="生成された要件定義(記事作成指示書)")
    iteration: int = Field(
        default=0, description="ペルソナ生成とインタビューの反復回数"
    )


# --------------------------------------------------------------------------------
# 2. 各ステップのクラス
# --------------------------------------------------------------------------------

# (A) ペルソナ生成クラス
class PersonaGenerator:
    def __init__(self, llm: ChatOpenAI, k: int = 5):
        self.llm = llm.with_structured_output(Personas)
        self.k = k

    def run(self, user_request: str) -> Personas:
        # プロンプトテンプレートを定義（検索意図や知識レベル等も含める）
        prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    "あなたはブログ記事のターゲットユーザーのペルソナを作成する専門家です。"
                    "複数の属性（検索意図、知識レベル、利用デバイスなど）も考慮してください。"
                ),
                (
                    "human",
                    f"以下のブログ記事のトピックに関するインタビュー用に、{self.k}人の多様なペルソナを生成してください。\n\n"
                    "トピック: {user_request}\n\n"
                    "各読者ペルソナには以下を含めてください:\n"
                    "- 名前\n"
                    "- 簡単な背景（年齢、性別、職業など）\n"
                    "- トピックに対する知識レベル\n"
                    "- 検索の主目的や動機\n"
                    "年齢・職業・検索意図・知識レベルのバリエーションを確保してください。"
                ),
            ]
        )
        chain = prompt | self.llm
        return chain.invoke({"user_request": user_request})


# (B) インタビュー実施クラス（複数質問対応）
class InterviewConductor:
    def __init__(self, llm: ChatOpenAI, n_questions: int = 3):
        """
        :param n_questions: 各ペルソナに対して生成する質問数
        """
        self.llm = llm
        self.n_questions = n_questions

    def run(self, user_request: str, personas: list[Persona]) -> InterviewResult:
        interviews = []
        for persona in personas:
            # ペルソナごとに複数の質問を作成
            questions = self._generate_questions(user_request, persona, self.n_questions)
            # それぞれに回答を取得
            answers = self._generate_answers(persona, questions)
            # Interview のリストを構築して結合
            for q, a in zip(questions, answers):
                interviews.append(
                    Interview(persona=persona, question=q, answer=a)
                )
        return InterviewResult(interviews=interviews)

    def _generate_questions(self, user_request: str, persona: Persona, n: int) -> list[str]:
        """
        1回の呼び出しでn個の質問をまとめて生成する実装例。
        """
        question_prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    "あなたは優秀なインタビュアーです。ペルソナが抱える悩みや課題を深堀りする質問を作成します。"
                ),
                (
                    "human",
                    "以下の読者ペルソナが、ブログ記事トピック({user_request})に関して抱える悩みや課題を引き出す、"
                    f"オープンな質問を{n}個作成してください。\n\n"
                    "読者ペルソナ:\n"
                    "名前: {persona_name}\n"
                    "背景: {persona_background}\n"
                    "知識レベル: {knowledge_level}\n"
                    "検索の主目的: {search_motivation}\n"
                    "それぞれシンプルかつ深堀りできる内容にしてください。"
                ),
            ]
        )
        question_chain = question_prompt | self.llm | StrOutputParser()
        questions_text = question_chain.invoke(
            {
                "user_request": user_request,
                "persona_name": persona.name,
                "persona_background": persona.background,
                "knowledge_level": persona.knowledge_level or "不明",
                "search_motivation": persona.search_motivation or "不明",
            }
        )
        # 箇条書き出力を分割（例: 「1. ～\n2. ～」のようなフォーマットを想定）
        lines = questions_text.split("\n")
        filtered = [x.strip("0123456789. ") for x in lines if x.strip()]
        return filtered

    def _generate_answers(self, persona: Persona, questions: list[str]) -> list[str]:
        answers = []
        for q in questions:
            answer_prompt = ChatPromptTemplate.from_messages(
                [
                    (
                        "system",
                        "あなたは以下の読者ペルソナになりきっています。"
                        "インタビュアーの質問に対して、具体的な悩みや課題、期待する解決策をできるだけ詳細に教えてください。"
                    ),
                    (
                        "human",
                        "ペルソナ:\n"
                        "名前: {persona_name}\n"
                        "背景: {persona_background}\n"
                        "知識レベル: {knowledge_level}\n"
                        "検索の主目的: {search_motivation}\n"
                        "質問: {question}"
                    ),
                ]
            )
            answer_chain = answer_prompt | self.llm | StrOutputParser()
            ans = answer_chain.invoke(
                {
                    "persona_name": persona.name,
                    "persona_background": persona.background,
                    "knowledge_level": persona.knowledge_level or "不明",
                    "search_motivation": persona.search_motivation or "不明",
                    "question": q,
                }
            )
            answers.append(ans)
        return answers


# (C) 回答サマリ/重複排除ステップ
class InterviewSummarizer:
    def __init__(self, llm: ChatOpenAI):
        self.llm = llm

    def run(self, interviews: list[Interview]) -> list[Interview]:
        """
        簡易的に、同一ペルソナ内で重複や類似点が多い回答を要約する例。
        """
        persona_buckets = {}
        for iv in interviews:
            key = iv.persona.name
            if key not in persona_buckets:
                persona_buckets[key] = []
            persona_buckets[key].append(iv)

        summarized_interviews = []

        for persona_name, iv_list in persona_buckets.items():
            # まとめて要約プロンプトを呼ぶ
            combined_text = ""
            for iv in iv_list:
                combined_text += f"- 質問: {iv.question}\n  回答: {iv.answer}\n"

            summary_prompt = ChatPromptTemplate.from_messages(
                [
                    (
                        "system",
                        "あなたはインタビュー回答の要約をする専門家です。重複表現や同じ趣旨の回答をまとめ、より分かりやすく整理してください。"
                    ),
                    (
                        "human",
                        "以下は同一ペルソナから得られた複数の質問・回答です。内容が重複する場合はまとめて要約してください。\n\n"
                        "{combined_text}\n\n"
                        "要約・再編後のフォーマット:\n"
                        "1) 質問\n   回答\n"
                        "2) 質問\n   回答\n"
                        "…"
                    ),
                ]
            )
            summary_chain = summary_prompt | self.llm | StrOutputParser()
            summary_text = summary_chain.invoke({"combined_text": combined_text})

            lines = summary_text.split("\n")
            current_q = None
            current_a = None

            def save_qa_if_valid():
                if current_q and current_a:
                    summarized_interviews.append(
                        Interview(
                            persona=iv_list[0].persona,  # 同一ペルソナのため
                            question=current_q,
                            answer=current_a
                        )
                    )

            for line in lines:
                line_stripped = line.strip()
                if line_stripped.startswith("質問"):
                    save_qa_if_valid()
                    current_q = line_stripped.replace("質問", "").strip(":： ")
                    current_a = None
                elif line_stripped.startswith("回答"):
                    current_a = line_stripped.replace("回答", "").strip(":： ")
                else:
                    if current_a is None and current_q is not None:
                        current_q += " " + line_stripped
                    elif current_a is not None:
                        current_a += " " + line_stripped

            # 最後に残ったQAを登録
            save_qa_if_valid()

        return summarized_interviews


# (D) SEO情報生成ステップ
class SEODataGenerator:
    def __init__(self, llm: ChatOpenAI):
        self.llm = llm

    def run(self, user_request: str) -> SEOData:
        seo_prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    "あなたはSEOの専門家です。ユーザーが検索エンジンで入力するキーワードを分析し、主要キーワードと関連キーワードを提案してください。"
                ),
                (
                    "human",
                    "以下のブログ記事のトピックに関して、想定される主キーワードとサブキーワードを挙げ、それぞれの検索意図も簡潔に整理してください。\n\n"
                    "トピック: {user_request}\n\n"
                    "フォーマット例:\n"
                    "メインキーワード:\n- ...\n\n"
                    "サブキーワード:\n- ...\n\n"
                    "検索意図:\n..."
                ),
            ]
        )
        chain = seo_prompt | self.llm | StrOutputParser()
        seo_text = chain.invoke({"user_request": user_request})

        lines = seo_text.split("\n")
        main_keywords = []
        sub_keywords = []
        search_intent_analysis = []

        bucket = None
        for line in lines:
            line_strip = line.strip()
            if "メインキーワード" in line_strip:
                bucket = "main"
                continue
            if "サブキーワード" in line_strip:
                bucket = "sub"
                continue
            if "検索意図" in line_strip:
                bucket = "intent"
                continue

            if bucket == "main" and line_strip.startswith("-"):
                main_keywords.append(line_strip.strip("- ").strip())
            elif bucket == "sub" and line_strip.startswith("-"):
                sub_keywords.append(line_strip.strip("- ").strip())
            elif bucket == "intent":
                search_intent_analysis.append(line_strip)

        return SEOData(
            main_keywords=main_keywords,
            sub_keywords=sub_keywords,
            search_intent_analysis="\n".join(search_intent_analysis),
        )


# (E) 自己評価ステップ【削除】


# (F) 記事作成の指示書生成クラス
class RequirementsDocumentGenerator:
    def __init__(self, llm: ChatOpenAI):
        self.llm = llm

    def run(
        self,
        user_request: str,
        interviews: list[Interview],
        seo_data: SEOData
    ) -> str:
        # インタビュー結果をテキスト形式にまとめる
        interview_results_text = "\n".join(
            f"ペルソナ: {i.persona.name} - {i.persona.background}\n"
            f"質問: {i.question}\n回答: {i.answer}\n"
            for i in interviews
        )

        prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    "あなたは収集した情報に基づいてブログ記事作成の指示書を作成する専門家です。"
                ),
                (
                    "human",
                    "以下のブログ記事のトピックと複数の読者ペルソナからのインタビュー結果、さらにSEO情報に基づいて、"
                    "網羅的な記事作成の指示書を作成してください。\n\n"
                    "トピック: {user_request}\n\n"
                    "インタビュー結果:\n{interview_results}\n"
                    "SEO情報:\n"
                    "メインキーワード: {main_kw}\n"
                    "サブキーワード: {sub_kw}\n"
                    "検索意図分析: {intent_analysis}\n\n"
                    "記事作成の指示書には以下のセクションを含めてください:\n"
                    "1. 記事の目的\n"
                    "2. ターゲット読者\n"
                    "3. 読者の悩み\n"
                    "4. SEOのターゲットキーワードとトピック\n"
                    "5. 記事の構成案(見出し案含む)\n"
                    "6. 注意事項\n\n"
                    "出力は日本語でお願いします。\n\n"
                    "記事作成の指示書:"
                ),
            ]
        )
        chain = prompt | self.llm | StrOutputParser()

        final_text = chain.invoke(
            {
                "user_request": user_request,
                "interview_results": interview_results_text,
                "main_kw": ", ".join(seo_data.main_keywords),
                "sub_kw": ", ".join(seo_data.sub_keywords),
                "intent_analysis": seo_data.search_intent_analysis,
            }
        )
        return final_text


# --------------------------------------------------------------------------------
# 3. エージェント実行クラス (StateGraph)
# --------------------------------------------------------------------------------

class DocumentationAgent:
    def __init__(self, llm: ChatOpenAI, k: Optional[int] = None):
        # 各ステップのインスタンス化
        self.persona_generator = PersonaGenerator(llm=llm, k=k or 3)
        self.interview_conductor = InterviewConductor(llm=llm, n_questions=3)
        self.interview_summarizer = InterviewSummarizer(llm=llm)
        self.seo_data_generator = SEODataGenerator(llm=llm)
        self.requirements_generator = RequirementsDocumentGenerator(llm=llm)

        self.graph = self._create_graph()

    def _create_graph(self) -> StateGraph:
        workflow = StateGraph(InterviewState)

        # ノードの追加
        workflow.add_node("generate_personas", self._generate_personas)
        workflow.add_node("conduct_interviews", self._conduct_interviews)
        workflow.add_node("summarize_answers", self._summarize_answers)
        workflow.add_node("generate_seo_data", self._generate_seo_data)
        workflow.add_node("generate_requirements", self._generate_requirements)

        # エントリーポイント
        workflow.set_entry_point("generate_personas")

        # 遷移設定
        workflow.add_edge("generate_personas", "conduct_interviews")
        workflow.add_edge("conduct_interviews", "summarize_answers")
        workflow.add_edge("summarize_answers", "generate_seo_data")
        # 自己評価ステップを削除したので直接最終ステップへ
        workflow.add_edge("generate_seo_data", "generate_requirements")

        # 最終ノード
        workflow.add_edge("generate_requirements", END)

        return workflow.compile()

    def _generate_personas(self, state: InterviewState) -> dict[str, Any]:
        new_personas: Personas = self.persona_generator.run(state.user_request)
        return {
            "personas": new_personas.personas,
            "iteration": state.iteration + 1,
        }

    def _conduct_interviews(self, state: InterviewState) -> dict[str, Any]:
        new_personas = state.personas[-5:]  # 多い場合は最後の5人のみ
        new_interviews = self.interview_conductor.run(state.user_request, new_personas)
        return {"interviews": new_interviews.interviews}

    def _summarize_answers(self, state: InterviewState) -> dict[str, Any]:
        summarized = self.interview_summarizer.run(state.interviews)
        return {"summarized_interviews": summarized}

    def _generate_seo_data(self, state: InterviewState) -> dict[str, Any]:
        seo_data = self.seo_data_generator.run(state.user_request)
        return {"seo_data": seo_data}

    def _generate_requirements(self, state: InterviewState) -> dict[str, Any]:
        final_doc = self.requirements_generator.run(
            user_request=state.user_request,
            interviews=state.summarized_interviews,  # 要約後のインタビューを使用
            seo_data=state.seo_data
        )
        return {"requirements_doc": final_doc}

    def run(self, user_request: str) -> str:
        initial_state = InterviewState(user_request=user_request)
        final_state = self.graph.invoke(initial_state)
        return final_state["requirements_doc"]


# --------------------------------------------------------------------------------
# 4. メイン実行部
# --------------------------------------------------------------------------------

def main():
    user_request = input("ブログ記事のトピックを記載してください: ")
    k = 3  # ペルソナの人数

    llm = ChatOpenAI(model_name="gpt-4.1-mini-2025-04-14", temperature=0.3)
    # llm = ChatOpenAI(model_name="gpt-4.1-2025-04-14", temperature=0.3)


    agent = DocumentationAgent(llm=llm, k=k)
    final_output = agent.run(user_request=user_request)

    print("\n===== 最終的な記事作成指示書 =====")
    print(final_output)


if __name__ == "__main__":
    main()

ブログ記事のトピックを記載してください: ディスプレイ広告

===== 最終的な記事作成指示書 =====
# 記事作成の指示書

## 1. 記事の目的
この記事の目的は、ディスプレイ広告に関する包括的な情報を提供し、読者がこの広告手法を理解し、実際に運用するための知識を得ることです。特に、ディスプレイ広告の効果、種類、作成方法、費用、ターゲティング、成功事例、リスティング広告との違いについて詳しく解説します。

## 2. ターゲット読者
- **マーケティング担当者**: 広告戦略を考える際にディスプレイ広告を検討している。
- **中小企業の経営者**: 限られた予算で効果的な広告手法を模索している。
- **広告代理店のスタッフ**: クライアントに対してディスプレイ広告の提案を行う必要がある。
- **広告運用初心者**: ディスプレイ広告の基礎を学びたいと考えている。

## 3. 読者の悩み
- ディスプレイ広告の基本的な理解が不足している。
- 効果的な広告戦略を構築するための具体的なデータや事例が欲しい。
- 実際にディスプレイ広告を作成する方法や費用についての情報が不足している。
- ターゲティングの方法や成功事例を知りたい。
- ディスプレイ広告とリスティング広告の違いを理解し、適切な広告手法を選びたい。

## 4. SEOのターゲットキーワードとトピック
- **メインキーワード**: ディスプレイ広告
- **サブキーワード**: 
  - ディスプレイ広告とは
  - ディスプレイ広告の効果
  - ディスプレイ広告の種類
  - ディスプレイ広告の作成方法
  - ディスプレイ広告の費用
  - ディスプレイ広告のターゲティング
  - ディスプレイ広告の成功事例
  - ディスプレイ広告とリスティング広告の違い

## 5. 記事の構成案(見出し案含む)
### はじめに
- ディスプレイ広告の重要性と目的

### 1. ディスプレイ広告とは
- 定義と基本的な仕組み

### 2. ディスプレイ広告の効果
- どのように効果を測定するか
- 具体的な効果のデータ

### 3. ディスプレイ広告の種類
- バナー広告
- 動画広告
- リッチメディア広告
- ネイティブ広告

### 4. ディスプレイ広告の作成方法
- デ

# 英語

In [None]:
import operator
from typing import Annotated, Any, Optional

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langgraph.graph import END, StateGraph
from pydantic import BaseModel, Field

import os
from google.colab import userdata

# Setting environment variables (example for Colab execution)
os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = userdata.get("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_PROJECT"] = "agent-book"


# --------------------------------------------------------------------------------
# 1. Data Models
# --------------------------------------------------------------------------------

class Persona(BaseModel):
    """Represents a persona for the blog's target audience."""
    name: str = Field(..., description="Persona's name")
    background: str = Field(..., description="Background of this persona")
    knowledge_level: Optional[str] = Field(None, description="Knowledge level about the topic")
    search_motivation: Optional[str] = Field(None, description="Main motivation or reason for searching")


class Personas(BaseModel):
    """Holds a list of Persona objects."""
    personas: list[Persona] = Field(default_factory=list, description="List of personas")


class Interview(BaseModel):
    """Represents a single interview Q&A with a persona."""
    persona: Persona = Field(..., description="The persona being interviewed")
    question: str = Field(..., description="The interview question asked")
    answer: str = Field(..., description="The persona's answer to the interview question")


class InterviewResult(BaseModel):
    """Represents the results from multiple interview Q&As."""
    interviews: list[Interview] = Field(default_factory=list, description="List of interview Q&A results")


class SEOData(BaseModel):
    """Represents SEO-related data, such as keywords and user search intent."""
    main_keywords: list[str] = Field(default_factory=list)
    sub_keywords: list[str] = Field(default_factory=list)
    search_intent_analysis: str = Field("", description="Summary or analysis of the search intent")


class InterviewState(BaseModel):
    """
    State object that keeps track of:
    - The user's request (blog topic)
    - The generated personas
    - The interviews (Q&A pairs)
    - Summarized interview results
    - Generated SEO data
    - Final requirements document for article creation
    - Iteration count if needed
    """
    user_request: str = Field(..., description="The request from the user (blog topic)")
    personas: Annotated[list[Persona], operator.add] = Field(
        default_factory=list, description="List of generated personas"
    )
    interviews: Annotated[list[Interview], operator.add] = Field(
        default_factory=list, description="Conducted interviews"
    )
    summarized_interviews: Annotated[list[Interview], operator.add] = Field(
        default_factory=list, description="Interviews after deduplication/summary"
    )
    seo_data: Optional[SEOData] = Field(
        default=None, description="Automatically generated SEO data"
    )
    requirements_doc: str = Field(default="", description="Generated instructions for article creation")
    iteration: int = Field(
        default=0, description="Number of iterations of persona generation and interviews"
    )


# --------------------------------------------------------------------------------
# 2. Classes for Each Step
# --------------------------------------------------------------------------------

class PersonaGenerator:
    """
    Generates multiple personas for interviews.
    """
    def __init__(self, llm: ChatOpenAI, k: int = 5):
        self.llm = llm.with_structured_output(Personas)
        self.k = k

    def run(self, user_request: str) -> Personas:
        prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    "You are an expert at creating diverse personas for a blog's target audience, "
                    "including attributes like search intent and knowledge level."
                ),
                (
                    "human",
                    f"Please generate {self.k} diverse personas for interviews related to the following blog topic.\n\n"
                    "Topic: {user_request}\n\n"
                    "For each persona, include:\n"
                    "- Name\n"
                    "- Brief background (e.g., age, gender, occupation)\n"
                    "- Knowledge level regarding the topic\n"
                    "- Main motivation or reason for searching\n"
                    "Ensure variety in age, occupation, search intent, and knowledge level."
                ),
            ]
        )
        chain = prompt | self.llm
        return chain.invoke({"user_request": user_request})


class InterviewConductor:
    """
    Conducts multiple interviews by generating questions and collecting answers from each persona.
    """
    def __init__(self, llm: ChatOpenAI, n_questions: int = 3):
        """
        :param n_questions: Number of questions to generate for each persona
        """
        self.llm = llm
        self.n_questions = n_questions

    def run(self, user_request: str, personas: list[Persona]) -> InterviewResult:
        interviews = []
        for persona in personas:
            # Generate multiple questions per persona
            questions = self._generate_questions(user_request, persona, self.n_questions)
            # Generate answers for each question
            answers = self._generate_answers(persona, questions)
            # Combine them into Interview objects
            for q, a in zip(questions, answers):
                interviews.append(
                    Interview(persona=persona, question=q, answer=a)
                )
        return InterviewResult(interviews=interviews)

    def _generate_questions(self, user_request: str, persona: Persona, n: int) -> list[str]:
        """
        Generates n open-ended questions to draw out challenges or concerns
        that the persona may have regarding the blog topic.
        """
        question_prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    "You are a skilled interviewer who creates questions to explore the persona's concerns or challenges."
                ),
                (
                    "human",
                    "Below is a persona who may have concerns or issues related to the blog topic ({user_request}).\n"
                    f"Please create {n} open-ended questions to uncover this persona's perspective.\n\n"
                    "Persona:\n"
                    "Name: {persona_name}\n"
                    "Background: {persona_background}\n"
                    "Knowledge Level: {knowledge_level}\n"
                    "Main Motivation: {search_motivation}\n"
                    "Keep the questions simple yet probing enough to elicit detailed issues."
                ),
            ]
        )
        question_chain = question_prompt | self.llm | StrOutputParser()
        questions_text = question_chain.invoke(
            {
                "user_request": user_request,
                "persona_name": persona.name,
                "persona_background": persona.background,
                "knowledge_level": persona.knowledge_level or "Unknown",
                "search_motivation": persona.search_motivation or "Unknown",
            }
        )
        # Assuming bullet-point format, e.g. "1. ...\n2. ..."
        lines = questions_text.split("\n")
        filtered = [x.strip("0123456789. ") for x in lines if x.strip()]
        return filtered

    def _generate_answers(self, persona: Persona, questions: list[str]) -> list[str]:
        """
        Simulates persona answers to the generated questions.
        """
        answers = []
        for q in questions:
            answer_prompt = ChatPromptTemplate.from_messages(
                [
                    (
                        "system",
                        "You are now taking on the role of the following persona. "
                        "Please provide detailed concerns, challenges, and any potential solutions you imagine."
                    ),
                    (
                        "human",
                        "Persona:\n"
                        "Name: {persona_name}\n"
                        "Background: {persona_background}\n"
                        "Knowledge Level: {knowledge_level}\n"
                        "Main Motivation: {search_motivation}\n"
                        "Question: {question}"
                    ),
                ]
            )
            answer_chain = answer_prompt | self.llm | StrOutputParser()
            ans = answer_chain.invoke(
                {
                    "persona_name": persona.name,
                    "persona_background": persona.background,
                    "knowledge_level": persona.knowledge_level or "Unknown",
                    "search_motivation": persona.search_motivation or "Unknown",
                    "question": q,
                }
            )
            answers.append(ans)
        return answers


class InterviewSummarizer:
    """
    Summarizes and removes redundant or overlapping answers for the same persona.
    """
    def __init__(self, llm: ChatOpenAI):
        self.llm = llm

    def run(self, interviews: list[Interview]) -> list[Interview]:
        """
        For each persona, merges similar answers and returns a summarized list of Q&A pairs.
        """
        persona_buckets = {}
        for iv in interviews:
            key = iv.persona.name
            if key not in persona_buckets:
                persona_buckets[key] = []
            persona_buckets[key].append(iv)

        summarized_interviews = []

        for persona_name, iv_list in persona_buckets.items():
            combined_text = ""
            for iv in iv_list:
                combined_text += f"- Question: {iv.question}\n  Answer: {iv.answer}\n"

            summary_prompt = ChatPromptTemplate.from_messages(
                [
                    (
                        "system",
                        "You specialize in summarizing interview answers, merging redundant or similar points into a concise format."
                    ),
                    (
                        "human",
                        "Below are multiple Q&A pairs from the same persona. If there's repetition, combine and summarize them.\n\n"
                        "{combined_text}\n\n"
                        "Use this format after summarizing:\n"
                        "1) Question\n   Answer\n"
                        "2) Question\n   Answer\n"
                        "..."
                    ),
                ]
            )
            summary_chain = summary_prompt | self.llm | StrOutputParser()
            summary_text = summary_chain.invoke({"combined_text": combined_text})

            lines = summary_text.split("\n")
            current_q = None
            current_a = None

            def save_qa_if_valid():
                if current_q and current_a:
                    summarized_interviews.append(
                        Interview(
                            persona=iv_list[0].persona,  # same persona
                            question=current_q,
                            answer=current_a
                        )
                    )

            for line in lines:
                line_stripped = line.strip()
                if line_stripped.startswith("Question"):
                    # Save the previous Q&A before starting a new one
                    save_qa_if_valid()
                    current_q = line_stripped.replace("Question", "").strip(":： ")
                    current_a = None
                elif line_stripped.startswith("Answer"):
                    current_a = line_stripped.replace("Answer", "").strip(":： ")
                else:
                    if current_q is not None and current_a is None:
                        current_q += " " + line_stripped
                    elif current_a is not None:
                        current_a += " " + line_stripped

            # Save any leftover Q&A
            save_qa_if_valid()

        return summarized_interviews


class SEODataGenerator:
    """
    Generates SEO-related information such as main keywords, sub keywords, and a short analysis of user search intent.
    """
    def __init__(self, llm: ChatOpenAI):
        self.llm = llm

    def run(self, user_request: str) -> SEOData:
        seo_prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    "You are an SEO expert. Analyze what users might search for in search engines, "
                    "and propose main keywords, sub-keywords, and a brief summary of their search intentions."
                ),
                (
                    "human",
                    "For the following blog topic, list the main keywords and sub-keywords you expect users to search for, "
                    "and briefly outline their search intent.\n\n"
                    "Topic: {user_request}\n\n"
                    "Example format:\n"
                    "Main Keywords:\n- ...\n\n"
                    "Sub Keywords:\n- ...\n\n"
                    "Search Intent:\n..."
                ),
            ]
        )
        chain = seo_prompt | self.llm | StrOutputParser()
        seo_text = chain.invoke({"user_request": user_request})

        lines = seo_text.split("\n")
        main_keywords = []
        sub_keywords = []
        search_intent_analysis = []

        bucket = None
        for line in lines:
            line_strip = line.strip()
            if "Main Keywords" in line_strip:
                bucket = "main"
                continue
            if "Sub Keywords" in line_strip:
                bucket = "sub"
                continue
            if "Search Intent" in line_strip:
                bucket = "intent"
                continue

            if bucket == "main" and line_strip.startswith("-"):
                main_keywords.append(line_strip.strip("- ").strip())
            elif bucket == "sub" and line_strip.startswith("-"):
                sub_keywords.append(line_strip.strip("- ").strip())
            elif bucket == "intent":
                search_intent_analysis.append(line_strip)

        return SEOData(
            main_keywords=main_keywords,
            sub_keywords=sub_keywords,
            search_intent_analysis="\n".join(search_intent_analysis),
        )


class RequirementsDocumentGenerator:
    """
    Generates a final requirements/instructions document for creating a blog article.
    """
    def __init__(self, llm: ChatOpenAI):
        self.llm = llm

    def run(
        self,
        user_request: str,
        interviews: list[Interview],
        seo_data: SEOData
    ) -> str:
        # Combine interview results into text
        interview_results_text = "\n".join(
            f"Persona: {i.persona.name} - {i.persona.background}\n"
            f"Question: {i.question}\nAnswer: {i.answer}\n"
            for i in interviews
        )

        prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    "You are an expert who creates comprehensive article instructions based on collected information."
                ),
                (
                    "human",
                    "Using the following blog topic, multiple interview results from various personas, "
                    "and the SEO information provided, please create a thorough instruction document for writing the blog article.\n\n"
                    "Topic: {user_request}\n\n"
                    "Interview Results:\n{interview_results}\n"
                    "SEO Information:\n"
                    "Main Keywords: {main_kw}\n"
                    "Sub Keywords: {sub_kw}\n"
                    "Search Intent Analysis: {intent_analysis}\n\n"
                    "Please include the following sections in the instruction document:\n"
                    "1. Purpose of the Article\n"
                    "2. Target Readers\n"
                    "3. Readers' Challenges or Pain Points\n"
                    "4. SEO Target Keywords and Topics\n"
                    "5. Proposed Article Structure (including headings)\n"
                    "6. Important Notes\n\n"
                    "The output must be in English.\n\n"
                    "Article Creation Instructions:"
                ),
            ]
        )
        chain = prompt | self.llm | StrOutputParser()

        final_text = chain.invoke(
            {
                "user_request": user_request,
                "interview_results": interview_results_text,
                "main_kw": ", ".join(seo_data.main_keywords),
                "sub_kw": ", ".join(seo_data.sub_keywords),
                "intent_analysis": seo_data.search_intent_analysis,
            }
        )
        return final_text


# --------------------------------------------------------------------------------
# 3. The Agent Class with StateGraph
# --------------------------------------------------------------------------------

class DocumentationAgent:
    """
    Orchestrates the entire flow:
      1) Generate personas
      2) Conduct interviews
      3) Summarize answers
      4) Generate SEO data
      5) Produce final requirements doc
    """
    def __init__(self, llm: ChatOpenAI, k: Optional[int] = None):
        self.persona_generator = PersonaGenerator(llm=llm, k=k or 3)
        self.interview_conductor = InterviewConductor(llm=llm, n_questions=3)
        self.interview_summarizer = InterviewSummarizer(llm=llm)
        self.seo_data_generator = SEODataGenerator(llm=llm)
        self.requirements_generator = RequirementsDocumentGenerator(llm=llm)

        self.graph = self._create_graph()

    def _create_graph(self) -> StateGraph:
        workflow = StateGraph(InterviewState)

        # Add nodes
        workflow.add_node("generate_personas", self._generate_personas)
        workflow.add_node("conduct_interviews", self._conduct_interviews)
        workflow.add_node("summarize_answers", self._summarize_answers)
        workflow.add_node("generate_seo_data", self._generate_seo_data)
        workflow.add_node("generate_requirements", self._generate_requirements)

        # Entry point
        workflow.set_entry_point("generate_personas")

        # Edges
        workflow.add_edge("generate_personas", "conduct_interviews")
        workflow.add_edge("conduct_interviews", "summarize_answers")
        workflow.add_edge("summarize_answers", "generate_seo_data")
        workflow.add_edge("generate_seo_data", "generate_requirements")
        workflow.add_edge("generate_requirements", END)

        return workflow.compile()

    def _generate_personas(self, state: InterviewState) -> dict[str, Any]:
        new_personas: Personas = self.persona_generator.run(state.user_request)
        return {
            "personas": new_personas.personas,
            "iteration": state.iteration + 1,
        }

    def _conduct_interviews(self, state: InterviewState) -> dict[str, Any]:
        # If there are many personas, only take the last five as an example
        new_personas = state.personas[-5:]
        new_interviews = self.interview_conductor.run(state.user_request, new_personas)
        return {"interviews": new_interviews.interviews}

    def _summarize_answers(self, state: InterviewState) -> dict[str, Any]:
        summarized = self.interview_summarizer.run(state.interviews)
        return {"summarized_interviews": summarized}

    def _generate_seo_data(self, state: InterviewState) -> dict[str, Any]:
        seo_data = self.seo_data_generator.run(state.user_request)
        return {"seo_data": seo_data}

    def _generate_requirements(self, state: InterviewState) -> dict[str, Any]:
        final_doc = self.requirements_generator.run(
            user_request=state.user_request,
            interviews=state.summarized_interviews,
            seo_data=state.seo_data
        )
        return {"requirements_doc": final_doc}

    def run(self, user_request: str) -> str:
        # Start with the initial state and run through the flow
        initial_state = InterviewState(user_request=user_request)
        final_state = self.graph.invoke(initial_state)
        return final_state["requirements_doc"]


# --------------------------------------------------------------------------------
# 4. Main Execution
# --------------------------------------------------------------------------------

def main():
    user_request = input("Please enter your blog topic: ")
    k = 3  # Number of personas

    llm = ChatOpenAI(model_name="gpt-4.1-mini-2025-04-14", temperature=0.3)
    # llm = ChatOpenAI(model_name="gpt-4.1-2025-04-14", temperature=0.3)

    agent = DocumentationAgent(llm=llm, k=k)
    final_output = agent.run(user_request=user_request)

    print("\n===== Final Article Creation Instructions =====")
    print(final_output)


if __name__ == "__main__":
    main()

Please enter your blog topic: what is ai agent

===== Final Article Creation Instructions =====
# Article Creation Instructions: "What is an AI Agent"

## 1. Purpose of the Article
The purpose of this article is to provide a comprehensive overview of AI agents, including their definition, functionality, applications, and advantages over traditional software. The article aims to educate readers about the significance of AI agents in the realm of artificial intelligence and technology, while also addressing their future potential. By the end of the article, readers should have a clear understanding of what AI agents are, how they work, and their relevance in various sectors.

## 2. Target Readers
The target audience for this article includes:
- Technology enthusiasts seeking to understand AI concepts.
- Professionals in the tech industry looking to implement AI solutions.
- Students and researchers studying artificial intelligence.
- Business leaders interested in leveraging AI agents fo