<a href="https://colab.research.google.com/github/timothyow/news_search_summary_agent/blob/main/News_search_n_summary_agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langgraph langchain-openai tavily-python python-docx python-dateutil langchain_community

Collecting langgraph
  Downloading langgraph-0.5.1-py3-none-any.whl.metadata (6.7 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.3.27-py3-none-any.whl.metadata (2.3 kB)
Collecting tavily-python
  Downloading tavily_python-0.7.9-py3-none-any.whl.metadata (7.5 kB)
Collecting python-docx
  Downloading python_docx-1.2.0-py3-none-any.whl.metadata (2.0 kB)
Collecting langchain_community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting langgraph-checkpoint<3.0.0,>=2.1.0 (from langgraph)
  Downloading langgraph_checkpoint-2.1.0-py3-none-any.whl.metadata (4.2 kB)
Collecting langgraph-prebuilt<0.6.0,>=0.5.0 (from langgraph)
  Downloading langgraph_prebuilt-0.5.2-py3-none-any.whl.metadata (4.5 kB)
Collecting langgraph-sdk<0.2.0,>=0.1.42 (from langgraph)
  Downloading langgraph_sdk-0.1.72-py3-none-any.whl.metadata (1.5 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.

In [None]:
# news_search_n_summary_agent.py

import os
import operator
from typing import TypedDict, Annotated
from langgraph.graph import StateGraph, END, START
from langchain_core.messages import HumanMessage
from langchain_openai import ChatOpenAI
from langchain_community.utilities.tavily_search import TavilySearchAPIWrapper
from docx import Document

# === Define state type ===
class State(TypedDict):
    state: Annotated[list[tuple[str, str, list]], operator.add]

# === LLM and search init ===
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
search = TavilySearchAPIWrapper()

# === Static country list ===
COUNTRIES = ["Thailand", "Indonesia", "Malaysia", "Philippines", "Vietnam"]

# === .docx writer ===
def write_summary_to_docx(country: str, summary: str, articles: list, date_range: str) -> str:
    from docx import Document
    os.makedirs("news_summaries", exist_ok=True)
    file_path = f"news_summaries/{country}_{date_range.replace(' ', '_')}.docx"

    doc = Document()
    doc.add_heading(f"{country} — Dairy News Summary", level=1)
    doc.add_paragraph(f"Date Range: {date_range}")
    doc.add_heading("Key Summary", level=2)
    for bullet in summary.split("\n"):
        if bullet.strip():
            doc.add_paragraph(bullet.strip(), style='ListBullet')

    doc.add_heading("Top 10 Articles", level=2)
    for article in articles:
        doc.add_paragraph(f"{article['title']}", style='ListNumber')
        doc.add_paragraph(article['url'], style='Normal')

    doc.save(file_path)
    return file_path

# === Main callable function ===
def run_news_agent(start_date: str, end_date: str) -> list[dict]:
    date_range = f"{start_date} to {end_date}"
    class State(TypedDict):
        state: Annotated[list[tuple[str, str, list]], operator.add]

    def country_summary_node(country_name):
        def _node(_: State) -> State:
            query = f"dairy industry news in {country_name} from {date_range}"
            results = search.results(query=query, max_results=20)

            article_summaries = "\n".join(
                [f"Title: {r['title']}\nURL: {r['url']}\n" for r in results]
            )

            prompt = f"""

You are a BCG analyst. Based on the following articles, ensure the articles are correct to the dairy industry in {country_name} between {DATE_RANGE},
read the articles and summarize into a paragraph of no more than 200 words.
After the summary, compile the top 5 article titles and URLs relevant to the summary, do not compile similar articles.
Do not repeat similar news articles and do not complie articles from research agency or articles related to Dairy Market Size, Trends and Forecast.

Articles:
{article_summaries}
            """.strip()

            summary = llm.invoke([HumanMessage(content=prompt)]).content
            return {"state": [(country_name, summary, results)]}
        return _node

    def combine_all_nodes(data: State) -> State:
        for country, summary, articles in data["state"]:
            write_summary_to_docx(country, summary, articles, date_range)
        return data

    builder = StateGraph(State)
    builder.add_node("start", lambda _: {"state": []})

    for country in COUNTRIES:
        builder.add_node(country, country_summary_node(country))
        builder.add_edge("start", country)
        builder.add_edge(country, "combine")

    builder.add_node("combine", combine_all_nodes)
    builder.add_edge("combine", END)
    builder.set_entry_point("start")

    graph = builder.compile()
    final_state = graph.invoke({})

    output = []
    for country, summary, articles in final_state["state"]:
        file_path = f"news_summaries/{country}_{date_range.replace(' ', '_')}.docx"
        output.append({
            "country": country,
            "summary": summary,
            "file_path": file_path
        })

    return output
