# Storm Research Assistant

Reference
https://github.com/langchain-ai/langgraph/blob/main/examples/storm/storm.ipynb


In [1]:
## Prereqs

In [2]:

# %pip install -U langchain_community langchain_openai langgraph wikipedia  scikit-learn  langchain_fireworks
# # We use one or the other search engine below
# %pip install -U tavily-python
# %pip install -U duckduckgo-search
# # ! apt-get install graphviz graphviz-dev
# %pip install pygraphviz



In [3]:

example_topic = "Impact of million-plus token context window language models on RAG"



In [4]:
from storm import *

fast_llm, long_context_llm = get_openai_llms(regular_model="gpt-3.5-turbo", long_context_model="gpt-3.5-turbo-0125")

### Generate Initial Outline


In [5]:
outline_system_prompt = prompts.outline_system_wiki_writer
outline_human_prompt = prompts.outline_user_topic_formatinstructions
direct_gen_outline_prompt = get_chat_prompt_from_prompt_templates([outline_system_prompt, outline_human_prompt])

outline_parser = get_pydantic_parser(pydantic_object=Outline)
generate_outline_direct = get_chain_with_outputparser(direct_gen_outline_prompt, fast_llm, outline_parser)

In [6]:

initial_outline = generate_outline_direct.invoke({"topic": example_topic})
logger.info(initial_outline.as_str)

### Expand Topics


In [7]:
related_subjects_prompt = get_chat_prompt_from_prompt_templates([prompts.related_subjects_human_wiki_writer])
related_topics_parser = get_pydantic_parser(RelatedSubjects)
expand_chain = get_chain_with_outputparser(related_subjects_prompt, fast_llm, related_topics_parser)

In [8]:
related_subjects = await expand_chain.ainvoke({"topic": example_topic})
related_subjects

RelatedSubjects(topics=['Relevance-Aware Generation (RAG) models', 'Natural language processing (NLP) models', 'BERT (Bidirectional Encoder Representations from Transformers)', 'GPT (Generative Pre-trained Transformer)', 'Transformer (machine learning model architecture)', 'Context window in language models', 'Impact of large language models on information retrieval'])

## Generate Perspectives

From these related subjects, we can select representative Wiki editors as "subject matter experts" with distinct backgrounds and affiliations. These will help distribute the search process to encourage a more well-rounded final report.


In [9]:


gen_perspectives_prompt = get_chat_prompt_from_prompt_templates([prompts.perspective_system_generator, prompts.outline_user_topic_formatinstructions])
perspectives_parser = get_pydantic_parser(pydantic_object=Perspectives)

gen_perspectives_chain = gen_perspectives_prompt.partial(format_instructions=perspectives_parser.get_format_instructions()) | fast_llm | perspectives_parser



@as_runnable
async def survey_subjects(topic: str)-> Perspectives:
    logger.info(f"Survey Subjects for Topic: {topic}")
    related_subjects = await expand_chain.ainvoke({"topic": topic})
    retrieved_docs = await wikipedia_retriever.abatch(
        related_subjects.topics, return_exceptions=True
    )
    all_docs = []
    for docs in retrieved_docs:
        if isinstance(docs, BaseException):
            continue
        all_docs.extend(docs)
    logger.info(f"Retrieved {len(all_docs)} docs for Topic: {topic}")
    
    formatted = format_docs(all_docs)
    return await gen_perspectives_chain.ainvoke({"examples": formatted, "topic": topic})


In [10]:
perspectives = await survey_subjects.ainvoke(example_topic)
perspectives.dict()

{'editors': [{'affiliation': 'Academic Institution',
   'name': 'Dr. Linguistics',
   'role': 'Language Model Expert',
   'description': 'Dr. Linguistics is a renowned expert in language models, focusing on the impact of million-plus token context window models on the RAG framework. Their research involves analyzing the effectiveness and limitations of these models in relation to the RAG paradigm.'},
  {'affiliation': 'Tech Company',
   'name': 'AI Developer',
   'role': 'Software Engineer',
   'description': 'AI Developer is a software engineer working at a tech company specializing in large language models. They are interested in practical implementations and optimizations of million-plus token context window models for RAG applications, aiming to enhance performance and efficiency.'},
  {'affiliation': 'OpenAI',
   'name': 'Research Scientist',
   'role': 'AI Researcher',
   'description': 'Research Scientist at OpenAI dedicated to exploring cutting-edge advancements in large langua

## Expert Dialog

Each wikipedia writer is primed to role-play using the perspectives presented above. It will ask a series of questions of a second "domain expert" with access to a search engine. This generate content to generate a refined outline as well as an updated index of reference documents.

### Interview State

The conversation is cyclic, so we will construct it within its own graph. The State will contain messages, the reference docs, and the editor (with its own "persona") to make it easy to parallelize these conversations.


# Dialog Roles

The graph will have two participants: the wikipedia editor (generate_question), who asks questions based on its assigned role, and a domain expert (`gen_answer_chain), who uses a search engine to answer the questions as accurately as possible.


In [11]:
gen_qn_prompt = get_chat_prompt_from_prompt_templates([prompts.gen_question_system_generator, prompts.generate_messages_placeholder()])


def swap_roles(state: InterviewState, name: str) -> InterviewState:

    # Normalize name
    name = cleanup_name(name)

    logger.info(f'Swapping roles for {name}')

    converted = []
    for message in state["messages"]:
        if isinstance(message, AIMessage) and message.name != name:
            message = HumanMessage(**message.dict(exclude={"type"}))
        converted.append(message)
    
    state['messages'] = converted
    
    logger.info(f'Converted messages for {name} while swapping roles: {len(converted)} messages')
    return state


@as_runnable
async def generate_question(state: InterviewState) -> InterviewState:
    editor: Editor = state["editor"]

    name = cleanup_name(editor.name)


    logger.info(f'Generating question for {name}')

    gn_chain = (
        RunnableLambda(swap_roles).bind(name=name)
        | gen_qn_prompt.partial(persona=editor.persona)
        | fast_llm
        | RunnableLambda(tag_with_name).bind(name=name)
    )
    result:AIMessage = await gn_chain.ainvoke(state)
    state["messages"] = ([result])

    logger.info(f'Generated question for {name}')
    return state

In [12]:
intial_messages = [prompts.initial_question]
initial_state: InterviewState = {
    "editor": perspectives.editors[0],
    "messages": intial_messages,
    "references": None
}

question = await generate_question.ainvoke(initial_state)

question["messages"][0]

AIMessage(content='Can you explain how million-plus token context window models impact the RAG framework in your research?', response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 224, 'total_tokens': 243}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_b28b39ffa8', 'finish_reason': 'stop', 'logprobs': None}, name='DrLinguistics')

### Answer questions

The `gen_answer_chain` first generates queries (query expansion) to answer the editor's question, then responds with citations.


In [13]:




gen_queries_prompt = get_chat_prompt_from_prompt_templates([prompts.gen_queries_system_generator, prompts.generate_messages_placeholder()])
queries_parser = get_pydantic_parser(Queries)

gen_queries_chain = get_chain_with_outputparser(gen_queries_prompt, fast_llm, queries_parser)
# gen_queries_prompt.partial(format_instructions=queries_parser.get_format_instructions()) | fast_llm | queries_parser

In [14]:

queries = await gen_queries_chain.ainvoke(
    {"messages": [HumanMessage(content=question["messages"][0].content)]}
)

queries

Queries(queries=['How do million-plus token context window models impact the RAG framework in research?'])

In [15]:
gen_answer_prompt = get_chat_prompt_from_prompt_templates([prompts.generate_answer_system_generator, prompts.generate_messages_placeholder()])
ac_parser = get_pydantic_parser(pydantic_object=AnswerWithCitations)

gen_answer_chain = get_chain_with_outputparser(gen_answer_prompt, fast_llm, ac_parser)\
    .with_config(run_name="GenerateAnswer")\
    # .with_structured_output(AnswerWithCitations, include_raw=True)                                  

In [26]:

async def gen_answer(
    state: InterviewState,
    config: Optional[RunnableConfig] = None,
    name: str = "SubjectMatterExpert",
    max_str_len: int = 15000,
):
    name = cleanup_name(name)

    logger.info(f'START - Generate answers for [{name}]')

    swapped_state = swap_roles(state, name)  # Convert all other AI messages
    
    # Generate search engine queries
    queries:Queries = await gen_queries_chain.ainvoke(swapped_state)

    logger.info(f"Got {len(queries.queries)} search engine queries for [{name}] -\n\t {queries.queries}")

    # Run search engine
    query_results = await search_engine.abatch(
        queries.queries, config, return_exceptions=True
    )
    successful_results = [
        res for res in query_results if not isinstance(res, Exception)
    ]

    all_query_results = {
        res["url"]: res["content"] for results in successful_results for res in results
    }
    
    logger.info(f"Got {len(successful_results)} search engine results for [{name}] - \n\t {all_query_results}")

    # We could be more precise about handling max token length if we wanted to here
    dumped_successful_results = json.dumps(all_query_results)[:max_str_len]
    
    logger.info(f"Dumped {len(dumped_successful_results)} characters for [{name}] - \n\t {dumped_successful_results}")
    
    # Append Questions from Wikipedia and Answers from the search engine
    ai_message_for_queries: AIMessage = get_ai_message(json.dumps(queries.as_dict()))
    
    tool_results_message = generate_human_message(dumped_successful_results)
    
    logger.debug(f"Got {ai_message_for_queries} for [{name}]")
    
    # tool_call = queries["raw"].additional_kwargs["tool_calls"][0]
    # tool_id = tool_call["id"]

    # tool_message = ToolMessage(tool_call_id=tool_id, content=dumped)
    

    swapped_state["messages"].extend([ai_message_for_queries, tool_results_message])
    
    # Only update the shared state with the final answer to avoid
    # polluting the dialogue history with intermediate messages
    try:
        generated: AnswerWithCitations = await gen_answer_chain.ainvoke(swapped_state)
        
        logger.info(f"Genreted final answer {generated} for [{name}] - \n\t {generated.as_str}")

    except Exception as e:
        logger.error(f"Error generating answer for [{name}] - {e}")
        generated = AnswerWithCitations(answer="", cited_urls=[])
    
    cited_urls = set(generated.cited_urls)
    
    # Save the retrieved information to a the shared state for future reference
    cited_references = {k: v for k, v in all_query_results.items() if k in cited_urls}
    
    formatted_message = AIMessage(name=name, content=generated.as_str)
    # Add message to shared state
    # state["messages"].append(formatted_message)
    state["messages"] = add_messages(state["messages"], [formatted_message])
    
    # Update references with cited references
    state["references"] = update_references(state["references"], cited_references)

    logger.info(f'END - generate answer for [{name}]')
    
    return state
    

In [27]:
intial_messages = [prompts.initial_question, generate_human_message(question["messages"][0].content)]

initial_state: InterviewState = {
    "editor": perspectives.editors[0],
    "messages": intial_messages,
    "references": {}
}

example_answer = await gen_answer(initial_state)
example_answer["messages"][-1].content

Searching DuckDuckGo for [Writing an article on {example_topic}]
Got search engine results: 5 for [Writing an article on {example_topic}]
Searching DuckDuckGo for [Impact of million-plus token context window models on the RAG framework in research]
Got search engine results: 5 for [Impact of million-plus token context window models on the RAG framework in research]


'The introduction of large-context models with million-plus token context windows, such as Gemini 1.5 and Claude, has sparked discussions in the AI community about their impact on the Retrieval-Augmented Generation (RAG) framework. These models claim the ability to effectively process a 1 million token context window, which is a significant increase compared to previous models like GPT-4 that supported 128,000 tokens. Some experts predict potential challenges for RAG due to these new large-context models, as they enable handling much larger amounts of information in a single pass, potentially reducing the need for retrieval mechanisms in RAG. This breakthrough in context window size could revolutionize how AI processes and understands vast amounts of data, leading to discussions about the future relevance of frameworks like RAG in the face of such advancements.\n\nCitations:\n\n[1]: https://medium.com/enterprise-rag/why-gemini-1-5-and-other-large-context-models-are-bullish-for-rag-ce32

In [28]:
example_answer["messages"]

[HumanMessage(content='So you said you were writing an article on {example_topic}?'),
 HumanMessage(content='Can you explain how million-plus token context window models impact the RAG framework in your research?'),
 AIMessage(content='{"queries": ["Writing an article on {example_topic}", "Impact of million-plus token context window models on the RAG framework in research"]}', name='AI'),
 HumanMessage(content='{"https://seowind.io/how-to-write-an-article/": "Step 2: Select a Topic and an Attractive Heading. Having understood your audience, select a relevant topic based on their interests and questions. Be sure it\'s one you can competently discuss. When deciding how to start writing an article, ensure it begins with a captivating title.", "https://www.semrush.com/blog/article-writing/": "An article is a piece of writing that provides information, presents ideas, or discusses a topic in a structured manner. You\'ll find articles in newspapers, magazines, blogs, websites, and other publ

# Construct the Interview Graph

Now that we've defined the editor and domain expert, we can compose them in a graph.


In [30]:
max_num_turns = 7

def route_messages(state: InterviewState, name: str = "SubjectMatterExpert"):

    name = cleanup_name(name)

    logger.info(f'Routing messages for [{name}]')

    messages = state["messages"]
    num_responses = len(
        [m for m in messages if isinstance(m, AIMessage) and m.name == name]
    )

    if num_responses >= MAX_INTERVIEW_QUESTIONS:
        return END
    
    last_question = messages[-2]
    if last_question.content.endswith("Thank you so much for your help!"):
        return END
    
    logger.info(f'Continue asking question for [{name}] as this is not the last end of the conversation')
    return "ask_question"

In [31]:
builder = StateGraph(InterviewState)

builder.add_node("ask_question", generate_question)
builder.add_node("answer_question", gen_answer)
builder.add_conditional_edges("answer_question", route_messages)
builder.add_edge("ask_question", "answer_question")

builder.set_entry_point("ask_question")
interview_graph = builder.compile().with_config(run_name="Conduct Interviews")

In [None]:
from IPython.display import Image

# comment out if you have not installed pygraphviz
# Image(interview_graph.get_graph().draw_png())

In [32]:

final_step = None

initial_state = {
    "editor": perspectives.editors[0],
    "messages": [
        AIMessage(
            content=f"So you said you were writing an article on {example_topic}?",
            name="SubjectMatterExpert",
        )
    ],
}
async for step in interview_graph.astream(initial_state):
    name = next(iter(step))
    logger.info(f"Processing step: {name}")
    logger.debug("-- ", str(step[name]["messages"])[:300])
    if END in step:
        final_step = step

Searching DuckDuckGo for [Advantages of using million-plus token context window language models within the RAG framework]
Got search engine results: 5 for [Advantages of using million-plus token context window language models within the RAG framework]
Searching DuckDuckGo for [Disadvantages of using million-plus token context window language models within the RAG framework]
Got search engine results: 5 for [Disadvantages of using million-plus token context window language models within the RAG framework]
Searching DuckDuckGo for [Impact of million-plus token context window language models on the RAG framework]
Got search engine results: 5 for [Impact of million-plus token context window language models on the RAG framework]


2024-03-30 22:57:40,492 [MainThread  ] [ERROR]  Error generating answer for [SubjectMatterExpert] - Invalid json output: The impact of million-plus token context window language models on the RAG framework has sparked discussions in the AI community. While some predict a negative impact on Retrieval-Augmented Generation (RAG) due to the introduction of models like Gemini 1.5 and Anthropic's Claude boasting a 1 million token context window, there are contrasting views on the benefits of retrieval for long context Language Learning Models (LLMs). The cost implications of using RAG with different context window sizes have also been analyzed, with findings indicating varying costs associated with different context window sizes, such as $0.0004/1k tokens for a 128k context window. Additionally, there have been advancements in context window sizes, with models capable of handling up to 10 million tokens, a significant increase compared to previous models like GPT-4 that supported 128,000 tok

In [33]:
final_state = next(iter(final_step.values()))


In [34]:
final_state

{'messages': [AIMessage(content='So you said you were writing an article on Impact of million-plus token context window language models on RAG?', name='SubjectMatterExpert'),
  AIMessage(content='Can you explain the specific advantages and disadvantages of using million-plus token context window language models within the RAG framework?', response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 238, 'total_tokens': 261}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3bc1b5746c', 'finish_reason': 'stop', 'logprobs': None}, name='DrLinguistics'),
  AIMessage(content='So you said you were writing an article on Impact of million-plus token context window language models on RAG?', name='SubjectMatterExpert'),
  HumanMessage(content='Can you explain the specific advantages and disadvantages of using million-plus token context window language models within the RAG framework?', response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 238, 'tot

## Refine Outline

At this point in STORM, we've conducted a large amount of research from different perspectives. It's time to refine the original outline based on these investigations. Below, create a chain using the LLM with a long context window to update the original outline.


In [None]:
refine_outline_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are a Wikipedia writer. You have gathered information from experts and search engines. Now, you are refining the outline of the Wikipedia page. \
You need to make sure that the outline is comprehensive and specific. \
Topic you are writing about: {topic} 

Old outline:

{old_outline}
""",
        ),
        (
            "user",
            "Refine the outline based on your conversations with subject-matter experts:\n\nConversations:\n\n{conversations}\n\n{format_instructions}\n\nWrite the refined Wikipedia outline:",
        ),
    ]
)


# Using turbo preview since the context can get quite long
refine_outline_chain = refine_outline_prompt.partial(format_instructions=outline_parser.get_format_instructions()) | long_context_llm | outline_parser

In [None]:
refined_outline = refine_outline_chain.invoke(
    {
        "topic": example_topic,
        "old_outline": initial_outline.as_str,
        "conversations": "\n\n".join(
            f"### {m.name}\n\n{m.content}" for m in final_state["messages"]
        ),
    }
)

In [None]:
logger.info(refined_outline.as_str)

In [None]:
## Generate Article

In [None]:
from langchain_core.documents import Document

from langchain_community.vectorstores import SKLearnVectorStore
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
reference_docs = [
    Document(page_content=v, metadata={"source": k})
    for k, v in final_state["references"].items()
]

logger.info(f"Number of references: {len(reference_docs)}")

# This really doesn't need to be a vectorstore for this size of data.
# It could just be a numpy matrix. Or you could store documents
# across requests if you want.
vectorstore = SKLearnVectorStore.from_documents(
    reference_docs,
    embedding=embeddings,
)
retriever = vectorstore.as_retriever(k=10)

In [None]:
retriever.invoke("What's a long context LLM anyway?")

#### Generate Sections

Now you can generate the sections using the indexed docs.


In [None]:
class SubSection(BaseModel):
    subsection_title: str = Field(..., title="Title of the subsection")
    content: str = Field(
        ...,
        title="Full content of the subsection. Include [#] citations to the cited sources where relevant.",
    )

    @property
    def as_str(self) -> str:
        return f"### {self.subsection_title}\n\n{self.content}".strip()


class WikiSection(BaseModel):
    section_title: str = Field(..., title="Title of the section")
    content: str = Field(..., title="Full content of the section")
    subsections: Optional[List[Subsection]] = Field(
        default=None,
        title="Titles and descriptions for each subsection of the Wikipedia page.",
    )
    citations: List[str] = Field(default_factory=list)

    @property
    def as_str(self) -> str:
        subsections = "\n\n".join(
            subsection.as_str for subsection in self.subsections or []
        )
        citations = "\n".join([f" [{i}] {cit}" for i, cit in enumerate(self.citations)])
        return (
            f"## {self.section_title}\n\n{self.content}\n\n{subsections}".strip()
            + f"\n\n{citations}".strip()
        )


section_writer_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert Wikipedia writer. Complete your assigned WikiSection from the following outline:\n\n"
            "{outline}\n\nCite your sources, using the following references:\n\n<Documents>\n{docs}\n<Documents>",
        ),
        ("user", "Write the full WikiSection for the {section} section.\n{format_instructions}"),
    ]
)


async def retrieve(inputs: dict):
    docs = await retriever.ainvoke(inputs["topic"] + ": " + inputs["section"])
    formatted = "\n".join(
        [
            f'<Document href="{doc.metadata["source"]}"/>\n{doc.page_content}\n</Document>'
            for doc in docs
        ]
    )
    return {"docs": formatted, **inputs}

wiki_parser = PydanticOutputParser(pydantic_object=WikiSection)

section_writer = (
    retrieve
    | section_writer_prompt.partial(format_instructions=wiki_parser.get_format_instructions())
    | long_context_llm
    | wiki_parser
)

In [None]:
section = await section_writer.ainvoke(
    {
        "outline": refined_outline.as_str,
        "section": refined_outline.sections[1].section_title,
        "topic": example_topic,
    }
)
print(section.as_str)

#### Generate final article

Now we can rewrite the draft to appropriately group all the citations and maintain a consistent voice.


In [None]:
from langchain_core.output_parsers import StrOutputParser

writer_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert Wikipedia author. Write the complete wiki article on {topic} using the following section drafts:\n\n"
            "{draft}\n\nStrictly follow Wikipedia format guidelines.",
        ),
        (
            "user",
            'Write the complete Wiki article using markdown format. Organize citations using footnotes like "[1]","" avoiding duplicates in the footer. Include URLs in the footer.',
        ),
    ]
)

writer = writer_prompt | long_context_llm | StrOutputParser()

In [None]:
for tok in writer.stream({"topic": example_topic, "draft": section.as_str}):
    print(tok, end="")

## Final Flow

Now it's time to string everything together. We will have 6 main stages in sequence:
.

1. Generate the initial outline + perspectives
2. Batch converse with each perspective to expand the content for the article
3. Refine the outline based on the conversations
4. Index the reference docs from the conversations
5. Write the individual sections of the article
6. Write the final wiki

The state tracks the outputs of each stage.


In [None]:
class ResearchState(TypedDict):
    topic: str
    outline: Outline
    editors: List[Editor]
    interview_results: List[InterviewState]
    # The final sections output
    sections: List[WikiSection]
    article: str

In [None]:
import asyncio


async def initialize_research(state: ResearchState):
    topic = state["topic"]
    coros = (
        generate_outline_direct.ainvoke({"topic": topic}),
        survey_subjects.ainvoke(topic),
    )
    results = await asyncio.gather(*coros)
    return {
        **state,
        "outline": results[0],
        "editors": results[1].editors,
    }


async def conduct_interviews(state: ResearchState):
    topic = state["topic"]
    initial_states = [
        {
            "editor": editor,
            "messages": [
                AIMessage(
                    content=f"So you said you were writing an article on {topic}?",
                    name="SubjectMatterExpert",
                )
            ],
        }
        for editor in state["editors"]
    ]
    # We call in to the sub-graph here to parallelize the interviews
    interview_results = await interview_graph.abatch(initial_states)

    return {
        **state,
        "interview_results": interview_results,
    }


def format_conversation(interview_state):
    messages = interview_state["messages"]
    convo = "\n".join(f"{m.name}: {m.content}" for m in messages)
    return f'Conversation with {interview_state["editor"].name}\n\n' + convo


async def refine_outline(state: ResearchState):
    convos = "\n\n".join(
        [
            format_conversation(interview_state)
            for interview_state in state["interview_results"]
        ]
    )

    updated_outline = await refine_outline_chain.ainvoke(
        {
            "topic": state["topic"],
            "old_outline": state["outline"].as_str,
            "conversations": convos,
        }
    )
    return {**state, "outline": updated_outline}


async def index_references(state: ResearchState):
    all_docs = []
    for interview_state in state["interview_results"]:
        reference_docs = [
            Document(page_content=v, metadata={"source": k})
            for k, v in interview_state["references"].items()
        ]
        all_docs.extend(reference_docs)
    await vectorstore.aadd_documents(all_docs)
    return state


async def write_sections(state: ResearchState):
    outline = state["outline"]
    sections = await section_writer.abatch(
        [
            {
                "outline": refined_outline.as_str,
                "section": section.section_title,
                "topic": state["topic"],
            }
            for section in outline.sections
        ]
    )
    return {
        **state,
        "sections": sections,
    }


async def write_article(state: ResearchState):
    topic = state["topic"]
    sections = state["sections"]
    draft = "\n\n".join([section.as_str for section in sections])
    article = await writer.ainvoke({"topic": topic, "draft": draft})
    return {
        **state,
        "article": article,
    }

#### Create the graph


In [None]:
builder_of_storm = StateGraph(ResearchState)

nodes = [
    ("init_research", initialize_research),
    ("conduct_interviews", conduct_interviews),
    ("refine_outline", refine_outline),
    ("index_references", index_references),
    ("write_sections", write_sections),
    ("write_article", write_article),
]
for i in range(len(nodes)):
    name, node = nodes[i]
    builder_of_storm.add_node(name, node)
    if i > 0:
        builder_of_storm.add_edge(nodes[i - 1][0], name)

builder_of_storm.set_entry_point(nodes[0][0])
builder_of_storm.set_finish_point(nodes[-1][0])
storm = builder_of_storm.compile()

In [None]:
async for step in storm.astream(
    {
        "topic": "Building better slack bots using LLMs",
    }
):
    name = next(iter(step))
    print(name)
    logger.info("-- ", str(step[name])[:300])
    if END in step:
        results = step

In [None]:
article = results[END]["article"]

## Render the Wiki

Now we can render the final wiki page!


In [None]:
from IPython.display import Markdown

# We will down-header the sections to create less confusion in this notebook
Markdown(article.replace("\n#", "\n##"))