In [1]:
# ==== Standard Library ====
from typing import List, Optional, Annotated
from typing_extensions import TypedDict
import json
import getpass
import os
from IPython.core.display import display, HTML

# ==== Third-party Libraries ====
from pydantic import BaseModel, Field, field_validator

# ==== LangChain Core ====
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnableLambda, RunnableConfig
from langchain_core.runnables import chain as as_runnable
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage, AnyMessage
from langchain_core.documents import Document
from langchain_core.tools import tool
from langchain_core.output_parsers import StrOutputParser


# ==== LangChain Community ====
from langchain_community.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
from langchain_community.retrievers import WikipediaRetriever
from langchain_community.vectorstores import InMemoryVectorStore
from langchain_community.document_loaders import ArxivLoader
from langchain_community.tools import TavilySearchResults

# ==== LangChain OpenAI ====
from langchain_openai import OpenAIEmbeddings

# ==== LangGraph ====
from langgraph.graph import START, END, StateGraph
from langgraph.pregel import RetryPolicy

# ==== Local Modules ====
from Utils  import *
from Setup import *
from Promt_tem import *
from Gen_Initial_Outline import *
from Expand_Topics import * 
from Gen_Perspectives import *
from Interview_State import InterviewState
from Dialog_Roles import *
from search_engine import *

  from IPython.core.display import display, HTML


In [2]:
####### Input #######
import os
os.environ["OPENAI_API_KEY"] = ""
os.environ["TAVILY_API_KEY"] =""

_input = "How People Are Really Using Gen AI in 2025"

selector = LLMSelector(
        fast_model_name="gpt-4o",
        long_context_model_name="gpt-4.5-preview-2025-02-27"
    )
use_long_context =   False
fast_llm, long_context_llm = selector.get_llms()

# Run 

In [3]:
from typing import List
from pydantic import BaseModel, Field
from langchain_core.utils.function_calling import convert_to_openai_function
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser


class Tagging(BaseModel):
    """Tag the piece of text with particular info."""
    Topic: str = Field(description="Head line Topic ")


prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a world-class writer, your job is to take a topic and create a new interesting topic that gets read."),
    ("user", "{input}")
])
model_with_functions = fast_llm.bind(
    functions=[convert_to_openai_function(Tagging)],
    function_call={"name": "Tagging"}
)



tagging_chain = prompt | model_with_functions | JsonOutputFunctionsParser()
aa = tagging_chain.invoke({"input":_input }) 
_input = aa['Topic']

In [4]:
## Generate Initial Outline
gen = GenInitialOutline(fast_llm, long_context_llm)

# outline
initial_outline = gen.generate_outline(example_topic= _input , use_long_context=use_long_context)
generate_outline_direct = direct_gen_outline_prompt | fast_llm.with_structured_output(
    Outline
)

initial_outline = generate_outline_direct.invoke({"topic": _input})
# initial_outline.as_str

related_subjects, expand_chain = await expand_topics(_input, fast_llm)

gen = GenPerspectives(fast_llm=fast_llm, expand_chain=expand_chain)
perspectives = await gen.survey_subjects(_input)
#perspectives.model_dump()
#gen_perspectives_chain




  lis = BeautifulSoup(html).find_all('li')


In [5]:
@as_runnable
async def generate_question(state: InterviewState):
    editor = state["editor"]
    gn_chain = (
        RunnableLambda(swap_roles).bind(name=editor.name)
        | gen_qn_prompt.partial(persona=editor.persona)
        | fast_llm
        | RunnableLambda(tag_with_name).bind(name=editor.name)
    )
    result = await gn_chain.ainvoke(state)
    return {"messages": [result]}
    
messages = [
    HumanMessage(f"So you said you were writing an article on {_input}?")
]
question = await generate_question.ainvoke(
    {
        "editor": perspectives.editors[0],
        "messages": messages,
    }
)

# question["messages"][0].content
#### Answer questions
gen_queries_chain = gen_queries_prompt | fast_llm.with_structured_output(
    Queries, include_raw=True, method="function_calling"
)

In [6]:
queries = await gen_queries_chain.ainvoke(
    {"messages": [HumanMessage(content=question["messages"][0].content)]}
)
# queries["parsed"].queries

In [7]:
gen_answer_chain = gen_answer_prompt | fast_llm.with_structured_output(
    AnswerWithCitations, include_raw=True
).with_config(run_name="GenerateAnswer")

In [8]:
'''

# DDG
search_engine = DuckDuckGoSearchAPIWrapper()

@tool
async def search_engine(query: str):
    """Search engine to the internet."""
    results = DuckDuckGoSearchAPIWrapper()._ddgs_text(query)
    return [{"content": r["body"], "url": r["href"]} for r in results]
'''

# Tavily is typically a better search engine, but your free queries are limited
search_engine = TavilySearchResults(max_results=4)
tavily_search =  TavilySearchResults(
    max_results=20,
    include_answer=True,
    include_raw_content=True,
    include_images=True,
    # search_depth="advanced",
    # include_domains = []
    # exclude_domains = []
)
@tool
async def search_engine(query: str):
    """Search engine to the internet."""
    results = tavily_search.invoke(query)
    return [{"content": r["content"], "url": r["url"]} for r in results]


In [9]:
async def gen_answer(
    state: InterviewState,
    config: Optional[RunnableConfig] = None,
    name: str = "Subject_Matter_Expert",
    max_str_len: int = 15000,
):
    swapped_state = swap_roles(state, name)  # Convert all other AI messages
    queries = await gen_queries_chain.ainvoke(swapped_state)
    query_results = await search_engine.abatch(
        queries["parsed"].queries, config, return_exceptions=True
    )
    successful_results = [
        res for res in query_results if not isinstance(res, Exception)
    ]
    all_query_results = {
        res["url"]: res["content"] for results in successful_results for res in results
    }
    # We could be more precise about handling max token length if we wanted to here
    dumped = json.dumps(all_query_results)[:max_str_len]
    ai_message: AIMessage = queries["raw"]
    tool_call = queries["raw"].tool_calls[0]
    tool_id = tool_call["id"]
    tool_message = ToolMessage(tool_call_id=tool_id, content=dumped)
    swapped_state["messages"].extend([ai_message, tool_message])
    # Only update the shared state with the final answer to avoid
    # polluting the dialogue history with intermediate messages
    generated = await gen_answer_chain.ainvoke(swapped_state)
    cited_urls = set(generated["parsed"].cited_urls)
    # Save the retrieved information to a the shared state for future reference
    cited_references = {k: v for k, v in all_query_results.items() if k in cited_urls}
    formatted_message = AIMessage(name=name, content=generated["parsed"].as_str)
    return {"messages": [formatted_message], "references": cited_references}


builder = StateGraph(InterviewState)

builder.add_node("ask_question", generate_question, retry=RetryPolicy(max_attempts=5))
builder.add_node("answer_question", gen_answer, retry=RetryPolicy(max_attempts=5))
builder.add_conditional_edges("answer_question", route_messages)
builder.add_edge("ask_question", "answer_question")

builder.add_edge(START, "ask_question")
interview_graph = builder.compile(checkpointer=False).with_config(
    run_name="Conduct Interviews"
)

In [None]:
final_step = None

initial_state = {
    "editor": perspectives.editors[0],
    "messages": [
        AIMessage(
            content=f"So you said you were writing an article on {_input}?",
            name="Subject_Matter_Expert",
        )
    ],
}
async for step in interview_graph.astream(initial_state):
    name = next(iter(step))
    print(name)
    print("-- ", str(step[name]["messages"])[:300])
final_step = step

final_state = next(iter(final_step.values()))

ask_question
--  [AIMessage(content="Yes, that's correct! As an AI Ethics Researcher, I'm particularly focused on exploring the ethical considerations and social implications of generative AI on daily life. To begin, could you share your thoughts on the potential privacy concerns that might arise as generative AI be


In [None]:
## Refine Outline
# Using turbo preview since the context can get quite long
refine_outline_chain = refine_outline_prompt | long_context_llm.with_structured_output(
    Outline
)
refined_outline = refine_outline_chain.invoke(
    {
        "topic": _input,
        "old_outline": initial_outline.as_str,
        "conversations": "\n\n".join(
            f"### {m.name}\n\n{m.content}" for m in final_state["messages"]
        ),
    }
)


In [None]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
reference_docs = [
    Document(page_content=v, metadata={"source": k})
    for k, v in final_state["references"].items()
]
# This really doesn't need to be a vectorstore for this size of data.
# It could just be a numpy matrix. Or you could store documents
# across requests if you want.
vectorstore = InMemoryVectorStore.from_documents(
    reference_docs,
    embedding=embeddings,
)
retriever = vectorstore.as_retriever(k=3)

refined_outline = refine_outline_chain.invoke(
    {
        "topic": _input ,
        "old_outline": initial_outline.as_str,
        "conversations": "\n\n".join(
            f"### {m.name}\n\n{m.content}" for m in final_state["messages"]
        ),
    }
)



In [None]:
async def retrieve(inputs: dict):
    docs = await retriever.ainvoke(inputs["topic"] + ": " + inputs["section"])
    formatted = "\n".join(
        [
            f'<Document href="{doc.metadata["source"]}"/>\n{doc.page_content}\n</Document>'
            for doc in docs
        ]
    )
    return {"docs": formatted, **inputs}



In [None]:
section_writer = (
    retrieve
    | section_writer_prompt
    | long_context_llm.with_structured_output(WikiSection)
)

section = await section_writer.ainvoke(
    {
        "outline": refined_outline.as_str,
        "section": refined_outline.sections[1].section_title,
        "topic": _input,
    }
)


writer = writer_prompt | long_context_llm | StrOutputParser()
result = writer.invoke({"topic": _input, "draft": section.as_str})
#print(result)


# Out put 

In [None]:
for i in  refined_outline.sections :
    print (i .section_title
        
    )
    print( i.description)
    print() 


In [None]:
from IPython.display import Markdown

Markdown(result)

In [None]:
result

In [None]:
result

In [None]:
from IPython.display import Markdown

Markdown(result)

In [None]:
result