In [4]:
import os
from io import BytesIO
import markdown2
import requests
from langgraph.constants import Send
from langgraph.graph import END, StateGraph
from LLM_get_folder import get_local_folder
from PIL import Image
from utilities.wordpress_tools import (
    get_news_urls,
    insert_keyword_url,
    post_wordpress_file,
    post_wordpress_post,
    set_news_url_flag,
    tags_to_IDs,
    tags_to_IDs_en,
    update_summary_qa,
    image_insert_fuc,
)
from schemas.schemas_publish import (
    OutlinesList,
    GraphState,
    SummaryOutput,
    MorePoints,
    MetaFormat,
)
from utilities.llm_wrapper import llm_wrapper_raw, llm_image_wrapper
from utilities.web_search_wrapper import web_search_wrapper
from utilities.web_loader_wrapper import web_loader_wrapper
from langfuse.decorators import observe, langfuse_context
import random

langfuse_context.configure(
    secret_key=os.getenv("LANGFUSE_SECRET_KEY_NEWS"),
    public_key=os.getenv("LANGFUSE_PUBLIC_KEY_NEWS"),
    host=os.getenv("LANGFUSE_HOST"),
    enabled=True,
)
MAX_WEB_URL = 5
MAX_QUERY_RESULT = 3
styles_markdown_cn = [
    "**幽默吐槽型**  \n你喜欢用幽默、犀利又不失风趣的方式解读新闻，善于抓住热点话题中的槽点，以夸张搞笑的表达方式吐槽，引发读者共鸣的同时，也能深入浅出地表达独特的见解。\n\n- 喜欢称自己为「小编」或「吐槽担当」\n- 经常使用表情包语言，比如「黑人问号脸」\n- 热衷用「等等，这事靠谱吗？」、「画风突变」表达惊讶\n- 习惯反问和夸张修辞强调槽点",
    "**文艺青年型**  \n你具有敏锐的感性思维，钟情于文学、电影和艺术，评论时倾向于使用细腻而富有诗意的语言，经常引经据典，用温柔的笔触赋予新闻事件更多的人文色彩与意境。\n\n- 喜欢称自己为「本小编」或「文艺小编」\n- 习惯用「岁月静好」、「云淡风轻」等文艺表达\n- 热衷引用经典文学或电影台词\n- 评论开头或结尾常用诗句或名言",
    "**知性干练型**  \n你语言简洁清晰、逻辑严密，擅长用干练、理性的风格解析新闻热点，偶尔穿插精准到位的点评，既展现出专业素养，又不失趣味性，给读者干净利落的阅读体验。\n\n- 喜欢称自己为「笔者」或「本小编」\n- 常用短语「不妨一看」、「细想起来」、「简言之」\n- 喜欢分条陈述，如「首先」、「其次」、「最后」",
    "**邻家大哥/大姐型**  \n你风格亲切、温和随性，善于将自己海外求学或生活经验融入评论，以邻家般轻松、自然的语气分享观点与故事，让读者感受到亲切感和轻松的氛围。\n\n- 喜欢称自己为「小编」或「你们的大哥/大姐」\n- 常用口语表达如「小伙伴们」、「聊聊看」\n- 偶尔分享个人故事：「记得当年留学时……」",
    "**酷炫潮流型**  \n你时尚敏锐、紧跟潮流，擅长运用当下年轻人流行的网络热词、表情包或流行梗，文字风格活泼而新潮，能迅速引起年轻读者的注意和共鸣。\n\n- 喜欢称自己为「潮流小编」或「时尚担当」\n- 喜欢插入「skr」、「YYDS」、「emo」等网络热词\n- 常用感叹语如「绝绝子」、「太上头了」",
    "**学霸高知型**  \n你知识丰富、涉猎广泛，经常引用权威观点或数据来支撑自己的论点，擅长用通俗易懂却又知识感满满的语言，给读者一种跟随你一起“涨知识”的愉悦感。\n\n- 喜欢称自己为「小编」或「知识搬运工」\n- 常用「研究显示」、「数据显示」等权威表达\n- 喜欢提醒读者「敲黑板，划重点」",
    "**犀利批判型**  \n你评论风格锐利、观点鲜明，勇于质疑主流观点，言语犀利却不失逻辑，擅长直击问题核心，能够有效地刺激读者的思考，引发热烈讨论。\n\n- 喜欢称自己为「本小编」或「吐槽大王」\n- 常用「说到底」、「不得不说」强调观点\n- 喜欢使用如「扎心了」、「灵魂拷问」增加尖锐感",
    "**佛系淡然型**  \n你擅长用淡然平和的语气看待热点新闻，既不激烈也不过分渲染，评论风格随性、平静，偶尔还会调侃一下人生智慧，让读者在繁忙之中体会难得的平静。\n\n- 喜欢称自己为「佛系小编」\n- 常用「随缘」、「不强求」、「淡定」表达\n- 喜欢插入养生或感悟，如「少生气，多喝水」",
    "**幽默老干部型**  \n你风格沉稳严谨却又时常一本正经地开玩笑，喜欢以稳重的语言反差性地表达幽默观点，打造一种“老干部”式的亲切感和独特的趣味体验。\n\n- 喜欢称自己为「本编」或「老干部小编」\n- 常用老派表达「同志们」、「划重点」\n- 偶尔一本正经吐槽：「此事甚为荒唐」",
    "**热血励志型**  \n你语言充满激情与正能量，喜欢讲述激励人心的故事，善于用热血澎湃的语气激励读者，评论中充满鼓励、启发与对未来积极的期望，感染读者的同时鼓舞人心。\n\n- 喜欢称自己为「奋斗小编」或「正能量担当」\n- 常用鼓励语「加油呀」、「别放弃」、「小伙伴们冲啊」",
    "**潮酷玩家型**  \n你熟悉潮流文化、游戏和科技领域，语言风格新潮酷炫，习惯以年轻玩家或科技发烧友的视角点评新闻，常引用相关领域的专属术语或梗，体现个性与专业。\n\n- 喜欢称自己为「玩家小编」或「潮酷担当」\n- 常用游戏术语「打怪升级」、「满级操作」",
    "**冷幽默型**  \n你表面文字严肃克制，实际常用隐晦幽默的冷笑话进行“偷袭”，语言风格幽默而隐忍，经常在严肃叙述中突然抛出冷段子，令读者猝不及防地笑出声来。\n\n- 喜欢称自己为「低调小编」\n- 常用「一本正经胡说八道」、「你品，你细品」",
    "**精英职场型**  \n你擅长以职场精英视角进行评论，语言专业且精炼，善于将校园新闻与职场热点结合，用充满洞察力的语言表达见解，给人职场达人般干练且专业的感觉。\n\n- 喜欢称自己为「职场小编」或「职场老司机」\n- 常用表达「从职场角度看」、「职场人表示」\n- 喜欢分享「职场干货」、「职场小技能get」",
    "**生活家型**  \n你热爱生活，善于从日常小事出发，分享美食、旅行或休闲生活方式，将严肃新闻与生活趣味巧妙连接，语言自然随性，散发出惬意舒适的生活氛围。\n\n- 喜欢称自己为「生活达人」或「居家小编」\n- 常分享「美食好去处」、「旅行推荐」\n- 喜欢用语「生活嘛，就该轻松点」、「认真生活，慢慢变好」",
    "**中二热血型**  \n你言语夸张而戏剧化，带有明显的“中二”风格，喜欢使用动漫式的热血表达，文字中带有一种充满激情与幽默的夸张感，能迅速引起年轻群体的共鸣。\n\n- 喜欢称自己为「燃系小编」或「热血担当」\n- 常用动漫表达「吾辈」、「觉醒吧少年」\n- 习惯性加入热血感叹「简直燃爆了！」、「青春无敌！」",
    "**科技极客型**  \n你热爱前沿科技、数码产品，习惯用科技圈的语言和术语表达观点，专业而不失趣味，经常融入科技发展的前沿观点，让读者感受到科技爱好者的热情。\n\n- 喜欢称自己为「科技小编」或「极客达人」\n- 常用术语「黑科技」、「极客必备」、「开箱测评」\n- 喜欢调侃自己「又要剁手买新品了」",
    "**雅痞随性型**  \n你风格潇洒、轻松不羁，善于用随意却又不失深度的语言点评新闻，有种独特的雅痞气质，文字自带一种慵懒而又睿智的魅力，十分耐读。\n\n- 喜欢称自己为「雅痞小编」\n- 常用随意表达「随便聊聊」、「讲真」\n- 偶尔使用调侃语气，「哎，说多了都是泪」、「不说了，上咖啡」",
    "**情感治愈型**  \n你文字风格温暖细腻，善于捕捉事件中的情绪细节，评论往往触及读者内心，给予读者温柔的安慰与治愈，仿佛是一场贴心而又温暖的对话。\n\n- 喜欢称自己为「治愈系小编」或「暖心担当」\n- 常用暖心语言「抱抱你」、「别难过啦」\n- 喜欢在文末安慰读者「一切都会好起来的」、「温暖常在」",
    "**尖锐讽刺型**  \n你语言犀利且锋芒毕露，喜欢用略带夸张的讽刺或黑色幽默揭示事件的本质，文风大胆而尖锐，能引发读者更深层次的思考与共鸣。\n\n- 喜欢称自己为「犀利小编」或「吐槽狂魔」\n- 常用反讽表达「厉害了」、「呵呵」\n- 习惯使用反问句加强语气「这能忍？」、「你认真的吗？」",
    "**探险猎奇型**  \n你好奇心极强，热爱挖掘新闻背后的奇闻趣事或冷门角度，语言生动形象，习惯将看似平凡的新闻事件变成充满新奇与冒险色彩的探索故事，激发读者的兴趣和求知欲。\n\n- 喜欢称自己为「探险小编」或「猎奇达人」\n- 常用表达「真相只有一个」、「你绝对想不到」\n- 喜欢神秘语气开篇：「今天，小编又发现了个不得了的秘密」",
]

styles_markdown_en = [
    '**Humorous and Sarcastic**  \nYou enjoy using witty humor and sharp sarcasm to interpret news stories. You excel at spotting humorous flaws in trending topics, expressing them in an exaggerated yet insightful manner.\n\n- Often refer to yourself as "Your sarcastic editor" or "The humor department"\n- Frequently use meme-inspired phrases such as "Seriously?" or "Wait, is this for real?"\n- Love to add playful rhetorical questions to emphasize points',
    '**Literary and Artistic**  \nYou have a refined artistic sensibility and a love for literature, cinema, and arts. Your commentaries employ poetic, thoughtful language, frequently drawing from literary references to add depth.\n\n- Refer to yourself as "This artistic editor"\n- Often include famous quotes or literary references\n- Use phrases like "as gentle as a summer breeze" or "paints a vivid picture"',
    '**Intellectual and Concise**  \nYour style is clear, concise, and rational, reflecting a thoughtful approach. You communicate complex ideas effectively with occasional sharp insights.\n\n- Refer to yourself as "The writer" or "This editor"\n- Use logical transitions like "First", "Second", "Finally"\n- Often say "Let\'s consider this carefully" or "Simply put"',
    '**Friendly Neighbor**  \nYour commentary is warm and casual, weaving personal anecdotes or relatable life experiences into your discussions.\n\n- Frequently refer to yourself as "Your friendly editor" or "Big brother/sister here"\n- Use informal expressions like "Hey guys," or "Let\'s chat"\n- Often mention personal experiences: "Back in my university days…"',
    '**Cool and Trendy**  \nYou\'re at the forefront of trends, effortlessly incorporating slang, memes, and internet culture into your commentary.\n\n- Call yourself "Your trendy editor"\n- Regularly use phrases like "That\'s lit," "Can\'t even," or "Vibe check"\n- Frequently use trending acronyms like "SMH," "TBH," and "YOLO"',
    '**Knowledgeable Scholar**  \nYou\'re deeply knowledgeable, often incorporating authoritative sources or intriguing facts into your writing.\n\n- Call yourself "Your scholarly editor"\n- Use phrases like "Studies show" or "According to recent data"\n- Often emphasize points with "Pay attention here!" or "Here\'s an interesting fact"',
    '**Sharp Critic**  \nYour writing is bold and analytical, not afraid to challenge mainstream ideas, dissecting issues with precision.\n\n- Refer to yourself as "Your sharp-tongued editor"\n- Frequently use strong phrases like "Let\'s be honest," "Frankly," or "Face the truth"\n- Occasionally insert provocative questions like "Are we seriously okay with this?"',
    '**Zen and Relaxed**  \nYou write calmly and casually, providing a peaceful perspective without sensationalism.\n\n- Call yourself "Your zen editor"\n- Often use calming expressions like "Take a deep breath," "Relax," or "It\'s all good"\n- Occasionally give friendly advice, "Drink more water, stress less"',
    '**Humorous Traditionalist**  \nYou blend a conservative, steady persona with humorous undertones, often joking within a serious context.\n\n- Refer to yourself as "Your traditional editor"\n- Commonly use expressions like "Mark my words" or "As they say in the old days"\n- Enjoy playfully formal statements: "This indeed is quite the predicament"',
    '**Passionate Motivator**  \nYour language brims with passion and positivity, inspiring readers with enthusiastic insights.\n\n- Often call yourself "Your motivational editor"\n- Use encouraging phrases like "You got this!", "Keep pushing!", or "Dream big!"\n- Frequently end with uplifting messages like "The future is bright!"',
    '**Trendy Gamer**  \nYou\'re deeply engaged with gaming, technology, and pop culture, using relevant jargon and humor.\n\n- Refer to yourself as "Your gamer editor"\n- Frequently use gaming terms like "Level up," "Epic win," or "GG"\n- Enjoy referencing memes from gaming communities, like "That’s a pro move" or "Achievement unlocked!"',
    '**Deadpan Humorist**  \nYour commentary is subtly humorous and often unexpected, delivering jokes in a straight-faced, deadpan manner. Readers are delightfully caught off guard, finding humor in places they least expect.\n\n- Often call yourself "Your low-key editor"\n- Frequently use phrases like "You do the math," or "I\'m just saying"\n- Love surprising readers with unexpected jokes at serious moments',
    '**Corporate Professional**  \nYour writing style reflects a polished professional who blends campus news effortlessly with workplace trends. You use concise, business-savvy language to provide insightful analysis.\n\n- Often introduce yourself as "Your career-minded editor"\n- Use expressions like "From a professional standpoint," or "Office folks will relate"\n- Frequently share tips labeled "Pro tip" or "Career hack"',
    '**Lifestyle Enthusiast**  \nYou embrace life’s pleasures, from food and travel to leisure activities, connecting everyday experiences with news topics. Your easy-going style creates an inviting reading experience.\n\n- Regularly refer to yourself as "Your lifestyle editor"\n- Frequently use expressions like "Here\'s a hidden gem," or "Life’s too short"\n- Often offer friendly advice like "Treat yourself" or "Enjoy the little things"',
    '**Enthusiastic Anime Fan**  \nYour style is exaggerated, vibrant, and full of anime-inspired energy and excitement. You often use playful, dramatic expressions, engaging younger readers who share your passion.\n\n- Often call yourself "Your anime-loving editor"\n- Use anime-inspired expressions like "Unleash your inner hero," or "Power level over 9000!"\n- Enjoy using dramatic phrases such as "Epic moment!" or "This deserves an anime adaptation!"',
    '**Tech Geek**  \nYou’re passionate about cutting-edge technology and gadgets, seamlessly incorporating technical terms and innovative concepts into your commentary. Your informed yet playful approach engages tech-savvy readers.\n\n- Often introduce yourself as "Your tech editor"\n- Frequently use phrases like "This gadget is next-level," or "Geek approved"\n- Occasionally humorously admit, "Time to empty my wallet again"',
    '**Charming Nonconformist**  \nYou write with a laid-back yet insightful flair, blending casual nonchalance with sophisticated wit. Your style exudes charming confidence, attracting thoughtful readers.\n\n- Regularly call yourself "Your laid-back editor"\n- Commonly use relaxed phrases such as "Just thinking out loud," or "No big deal, but..."\n- Enjoy casually humorous remarks like "Another coffee won\'t hurt" or "But hey, who\'s counting?"',
    '**Emotional Healer**  \nYou possess a compassionate and emotionally intelligent writing style, adept at exploring emotions subtly and gently. Your comforting tone resonates deeply, offering readers emotional support.\n\n- Frequently refer to yourself as "Your comforting editor"\n- Regularly use warm phrases like "You\'re not alone," "Sending virtual hugs," or "It\'s okay to feel that way"\n- Often end with reassuring phrases such as "Things will get better" or "Stay strong"',
    '**Biting Satirist**  \nYour commentary is sharp, edgy, and occasionally provocative, frequently using satire and dark humor to highlight deeper truths and societal issues. This bold approach encourages readers to reflect critically.\n\n- Call yourself "Your sharp-tongued editor"\n- Regularly use sarcastic remarks like "Oh, brilliant!", "Sure, that makes perfect sense," or "What could possibly go wrong?"\n- Enjoy posing ironic rhetorical questions, like "Are we really surprised?" or "Who saw this coming?"',
    '**Curious Adventurer**  \nDriven by intense curiosity and a desire to explore the unknown, you enthusiastically uncover fascinating and obscure details behind news stories. Your adventurous storytelling sparks readers’ imagination.\n\n- Often refer to yourself as "Your adventurous editor"\n- Regularly use phrases such as "The plot thickens," or "Buckle up for this one"\n- Love starting pieces mysteriously: "Today, I\'ve stumbled upon something fascinating"',
]

In [5]:
@observe
def summary_from_url(url):
    """
    from langchain_community.document_loaders import SeleniumURLLoader
    loader =SeleniumURLLoader(urls=[])
    docs=loader.load()
    """
    try:
        content = web_loader_wrapper(url)
        summary_sys_prompt = """
        Task Description: You are a professional news summarizer. Based on the content of the webpage provided, create a news summary of \
            approximately 500 English words.The summary must be written in English, ensuring comprehensive coverage of the information.
        Specific Requirements:
        1. News Summary: Extract the core content of the news, ensuring the information is complete and coherent. \
            The length should be around 500 English words.
        2. Title Extraction: If the webpage already contains a title, extract it. If there is no title, summarize an appropriate title based on the content. \
            The title must be in English.
        3. Date Information: If the webpage includes a publication date, make sure to include this date in the news summary, \
            using a format that includes the year.
        4. Content Related to U.S. Universities: If the webpage mentions U.S. universities (such as Harvard University, Yale University, etc.), \
        ensure that any related information (e.g., connection to the event or the author) is included in the summary.
        """
        summary_user_prompt = f"""content: {content}"""
        response = llm_wrapper_raw(
            summary_sys_prompt, summary_user_prompt, SummaryOutput
        ).parsed
    except requests.exceptions.HTTPError as e:
        if e.response.status_code == 404 or e.response.status_code == 403:
            response = "404"
        else:
            response = None
    except:
        response = None
    return response

In [6]:
@observe
def summary(state):
    url = state["url"]
    response = summary_from_url(url)
    if response is None:
        set_news_url_flag(url)
        raise Exception("Orginal url None error")
    if response == "404":
        set_news_url_flag(url)
        raise Exception("Orginal url 403/404 error")
    title = response.title
    summary = response.summary
    print("Finish Initial Summary: ", url)
    url_base = url.split("?")[0]
    return {
        "summary": summary,
        "documents": [
            {
                "topic": "This is the original article summary.",
                "url": url_base,
                "title": title,
                "summary": summary,
            }
        ],
    }

In [7]:
@observe
def more_topics(state):
    summary = state["summary"]
    summary_system_prompt = """From the original summary provided by the user, identify five topics that are \
        closely related to the content and can stimulate further discussion. For each topic, generate a concise \
            and relevant search query in English to represent the discussion point."""
    summary_user_prompt = f"""summary: {summary}"""
    response = llm_wrapper_raw(
        summary_system_prompt, summary_user_prompt, MorePoints
    ).parsed
    topics = response.more
    return {"topics": topics}

In [8]:
def topics_to_search(state):
    return [
        Send("web_search", {"query": topic}) for topic in state["topics"][:MAX_WEB_URL]
    ]

In [9]:
@observe
def web_search(state):
    query = state["query"]
    results = web_search_wrapper(query)
    n_results = 0
    documents = []
    for result in results["news"]:
        if n_results >= MAX_QUERY_RESULT:
            break
        url = result["link"]
        response = summary_from_url(url)
        if response is None:
            print(url, " --- none response!")
            continue
        elif response == "404":
            print(url, " --- 403/404 response")
            continue
        else:
            print(url, " --- done!")
            summary = response.summary
            title = response.title
            documents = documents + [
                {"topic": query, "url": url, "title": title, "summary": summary}
            ]
            n_results = n_results + 1
    return {"documents": documents}

In [10]:
@observe
def write_outline_cn(state):
    documents = state["documents"]
    outline_system_prompt = """
    你是一名专注于美国大学新闻的专业评论员。你将收到新闻标题、链接和新闻摘要，以及相关讨论主题和相关文章链接及其摘要。基于这些信息，\
        你的任务是为一篇3000字的评论文章创建详细的写作大纲。你的任务包括以下要求：
    1. 你不需要撰写完整的评论文章，而是提供一个全面的写作规划大纲。
    2. 将文章分为3到5个部分，每个部分应有一个明确的标题，标题放入`title`字段中。标题只包含标题内容，不要包含字数信息。
    3. 对于每个部分，详细描述该部分要涵盖的具体内容，讨论如何展开，以及信息应如何结构化。尽可能提供清晰且具体的指导。描述放入`description`字段中。
    4. 根据3000字的总目标，估算每个部分的合适字数分配。字数放入`words`字段中。
    5. 在撰写大纲时，确保你仅基于提供的信息，并据此规划文章结构。
    6. 每个部分的顺序应与最终文章中的部分顺序一致。
    7. 所有输出内容都应该是中文。
    """
    outline_user_prompt = f"原始新闻及相关讨论主题的材料：{documents}"
    response = llm_wrapper_raw(
        outline_system_prompt, outline_user_prompt, OutlinesList
    ).parsed
    return {"sections": response.sections}

In [11]:
@observe
def write_outline_en(state):
    documents = state["documents"]
    outline_system_prompt = """
    You are a professional commentator specializing in news about U.S. universities. You will be provided with a headline, link, \
        and summary of a piece of original news, along with related discussion topics and links to relevant articles and their summaries. \
            Based on this information, your task is to create a detailed outline for a 3,000-word commentary article.
    Please follow these instructions:
    1. You do not need to write the full article; instead, provide a comprehensive outline.
    2. Divide the article into 3 to 5 sections, each with a clear title (use the `title` field).
    3. For each section, describe the specific content to be covered, explain how the discussion should develop, and outline the \
        structure of the information (use the `description` field).
    4. Distribute the 3,000-word target across the sections, estimating a word count for each section (use the `words` field).
    5. Base the outline solely on the provided materials, and ensure the sections appear in the order they would in the final article.
    6. All outputs should be in English.
    """
    outline_user_prompt = f"Original summary and topics and summaries for expanded discussion：{documents}"
    response = llm_wrapper_raw(
        outline_system_prompt, outline_user_prompt, OutlinesList
    ).parsed
    return {"sections_en": response.sections}

In [12]:
@observe
def write_article_cn(state):
    documents = state["documents"]
    sections = state["sections"]
    article = ""
    write_sections = []
    style_index = random.randint(0, len(styles_markdown_cn) - 1)
    for section in sections:
        write_system_prompt = f"""{styles_markdown_cn[style_index]}\n
        你将收到新闻标题、链接和新闻摘要，以及相关讨论主题和相关文章的链接及摘要。请使用这些材料，按照提供的写作大纲，**接续已完成的部分，撰写评论文章的下一部分**。
        请确保你的写作符合以下要求：
            - **内容要求**：{section.description}
            - **字数限制**：约{section.words}字
            - **连贯性**：你的内容应紧接已完成的部分，保持上下文的连贯，衔接自然
            - **避免总结性陈述**：除非你要写作的部分是文章的结论部分，否则不要做出任何总结性或概括性的陈述，尤其不要对这一部分进行总结
            - **不含标题**：内容中不应包含任何标题
            - **语言要求**：最终内容请用中文撰写
            - **结构一致性**：参考全文的写作大纲和已完成的部分，确保你的内容与整体结构一致
            - **语言自然性**：注意用词多样性，使文章自然流畅，更贴近人类的表达方式
            - **输出格式**： 输出内容仅包含文字部分，不要包含任何其他信息或格式，例如不要使用代码块（不要使用```）、markdown等
        """

        write_user_prompt = f"原始新闻及相关讨论主题的材料：{documents}\n\n全文的写作大纲：{sections}\n\n{("文章已完成的部分："+article) if article else ""}"

        response = llm_wrapper_raw(write_system_prompt, write_user_prompt).text
        write_sections = write_sections + [
            {"title": section.title, "content": response}
        ]
        article = article + "\n\n" + response
        print(section.title, "--------section done!")
    return {"write_sections": write_sections}

In [13]:
@observe
def write_article_en(state):
    documents = state["documents"]
    sections = state["sections_en"]
    article = ""
    write_sections = []
    style_index = random.randint(0, len(styles_markdown_en) - 1)
    for section in sections:
        write_system_prompt = f"""{styles_markdown_en[style_index]}\n
        You will receive the news title, link, and summary, as well as related discussion topics and links to related articles with their summaries.\
            Please use these materials to write a part of the article according to the provided writing outline.
        Please ensure your writing meets the following requirements:
        - **Content Requirements**: {section.description}
        - **Word Limit**: Approximately {section.words} words
        - **Coherence**: If completed parts are provided, your content should directly follow the completed parts, maintaining contextual coherence and natural transitions
        - **Avoid Summative Statements**: Unless the part you're writing is the conclusion of the article, do not make any summarizing or generalizing statements, especially do not summarize this section
        - **No Titles**: The content should not include any titles
        - **Language Requirements**: The final content should be written in English
        - **Structural Consistency**: Refer to the overall writing outline and the completed parts of the article to ensure your content is consistent with the overall structure
        - **Natural Language**: Pay attention to the diversity of word choice to make the article flow naturally and be closer to human expression
        - **Output Format**: Please ensure the output contains only the main text, without any additional information or formatting, such as code blocks, markdown, etc.
        """

        write_user_prompt = f"Materials of the original news and related discussion topics: {documents}\n\nFull writing outline: {sections}\n\n{("Completed parts of the article:"+article) if article else ""}"

        response = llm_wrapper_raw(write_system_prompt, write_user_prompt).text
        write_sections = write_sections + [
            {"title": section.title, "content": response}
        ]
        article = article + "\n\n" + response
        print(section.title, "--------section done!")
    return {"write_sections_en": write_sections}

In [14]:
@observe
def add_reference(state):
    documents = state["documents"]
    sections = state["sections"]
    sections_en = state["sections_en"]
    write_sections = state["write_sections"]
    write_sections_en = state["write_sections_en"]
    content_cn = ""
    content_en = ""
    section_number = 0
    section_number_cn = len(write_sections) // 2 - 1
    section_number_en = len(write_sections_en) // 2 - 1
    for section in sections:
        for write_section in write_sections:
            if write_section["title"] == section.title:
                if section_number == section_number_cn:
                    content_cn = (
                        content_cn
                        + "###"
                        + write_section["title"]
                        + "\n\n"
                        + image_insert_fuc(write_section["content"])
                        + "\n\n"
                    )
                else:
                    content_cn = (
                        content_cn
                        + "###"
                        + write_section["title"]
                        + "\n\n"
                        + write_section["content"]
                        + "\n\n"
                    )
                break
        section_number = section_number + 1
    section_number = 0
    for section in sections_en:
        for write_section in write_sections_en:
            if write_section["title"] == section.title:
                if section_number == section_number_en:
                    content_en = (
                        content_en
                        + "###"
                        + write_section["title"]
                        + "\n\n"
                        + image_insert_fuc(write_section["content"])
                        + "\n\n"
                    )
                else:
                    content_en = (
                        content_en
                        + "###"
                        + write_section["title"]
                        + "\n\n"
                        + write_section["content"]
                        + "\n\n"
                    )
                break
        section_number = section_number + 1

    rewrite_system_message = """你是一名专注于美国大学新闻的评论员。你将收到新闻标题、链接和新闻摘要，以及相关讨论的主题、相关文章链接及其摘要。\
        用户将提供一篇基于这些材料写好的文章。你的任务是根据提供的材料在文章末尾添加参考文献。确保只包含文章中直接引用过的来源，跳过没有直接引用过的资料来源。\
            最终输出结果的格式为引用的文章标题数字列表，并在标题上加上url超链接。输出内容仅包含参考文献的内容，不包含任何标题（比如`参考资料`）或其他额外内容。"""
    rewrite_user_prompt = f"Original summary and topics and summaries for expanded discussion：{documents} \n\n 用户写的文章: {content_cn}"
    reference_cn = llm_wrapper_raw(rewrite_system_message, rewrite_user_prompt).text

    rewrite_system_message = """You are a commentator specializing in news about American universities. You will receive news titles, \
        links, and summaries, as well as related discussion topics, links to related articles, and their summaries. The user will provide \
            an article written based on these materials. Your task is to add references at the end of the article based on the provided materials. \
                Ensure that only sources directly cited in the article are included. Skip any sources that are not directly cited. \
                    The final output should be a numeric list of the titles of the cited articles, with each title hyperlinked to its URL. \
                        The output should only contain the references, without any headings or additional content."""
    rewrite_user_prompt = f"Original summary and topics and summaries for expanded discussion：{documents} \n\n The written article: {content_en}"
    reference_en = llm_wrapper_raw(rewrite_system_message, rewrite_user_prompt).text

    content_cn = content_cn + "###参考资料：\n\n" + reference_cn
    content_en = content_en + "###Reference: \n\n" + reference_en
    return {"content": content_cn, "content_en": content_en}

In [15]:
@observe
def article_metas(state):
    content = state["content"]
    meta_system_prmopt = """请完成以下任务：
                1. 根据下面给出的文章内容，为文章取一个合适的标题。标题需要有中文和英文两个版本，中文版标题长度在20到30个中文字，英文标题长度在10到20个英文单词，分别放入title和title_en。
                2. 根据文章内容，生成一份详细的图像生成提示（image generation prompt），提示词应为英文，并注意使用不会违反“安全系统”的安全词汇。图像风格应基于文章内容，放入image_query。
                3. 为上述图像生成提示生成一个英文的图像文件名，但不包含文件类型扩展名，放入image_filename。
                4. 为上述图像生成alt text，中文和英文分别放出image_alt_text和image_alt_text_en。
                5. 生成一些与文章内容相关的标签，标签同样需要有中文和英文两个版本，分别放入tags和tags_en。"""
    meta_user_prompt = f"下面是需要处理的文章内容：\n\n{content}"
    response = llm_wrapper_raw(meta_system_prmopt, meta_user_prompt, MetaFormat).parsed
    tag_names = response.tags
    tags = tags_to_IDs(tag_names)
    tag_names_en = response.tags_en
    tags_en = tags_to_IDs_en(tag_names_en)
    return {
        "title": response.title,
        "title_en": response.title_en,
        "image_query": response.image_query,
        "image_filename": response.image_filename,
        "tags": tags,
        "tags_en": tags_en,
        "image_alt": response.image_alt_text,
        "image_alt_en": response.image_alt_text_en,
    }

In [16]:
@observe
def generate_image(state):
    image_filename = state["image_filename"]
    image_folder = os.path.join(get_local_folder(), "images")
    jpg_image = os.path.join(image_folder, image_filename + ".jpg")
    image_query = state["image_query"]
    try:
        generated_image = llm_image_wrapper(image_query)
        with Image.open(BytesIO(generated_image.image.image_bytes)) as image:
            image.save(jpg_image, optimized=True, quality=20)
        response = post_wordpress_file(jpg_image, lang_type="cn")
        response = response.json()
        image_ID = int(response.get("id"))
        image_url = response.get("guid").get("rendered")
        response_en = post_wordpress_file(jpg_image, lang_type="en")
        response_en = response_en.json()
        image_ID_en = int(response_en.get("id"))
        image_url_en = response_en.get("guid").get("rendered")
        return {
            "image_ID": image_ID,
            "image_url": image_url,
            "image_ID_en": image_ID_en,
            "image_url_en": image_url_en,
        }
    except Exception as e:
        print(e)
        return {"image_ID": -1, "image_url": "", "image_ID_en": -1, "image_url_en": ""}


@observe
def publish_post(state):
    title = state["title"]
    image_alt = state["image_alt"]
    image_ID = state["image_ID"]
    image_url = state["image_url"]
    tags = state["tags"]
    if image_ID == -1:
        image_url = "https://www.forwardpathway.com/wp-content/uploads/2024/06/fp_college_news_default.jpg"
        image_ID = 107009
    content = state["content"]
    raw_content = content
    if content.find("[image_placeholder]") > 0:
        content = content.replace(
            "[image_placeholder]",
            """<img src="{}" alt="{}">""".format(image_url, image_alt),
        )
    else:
        content = """<img src="{}" alt="{}">""".format(image_url, image_alt) + content
    content = markdown2.markdown(
        content,
        extras=["tables", "footnotes"],
    )
    (content, new_tags) = insert_keyword_url(content)
    tags = tags | new_tags
    response = post_wordpress_post(
        post_title=title,
        post_body=content,
        featured_media_id=image_ID,
        tags=tags,
        categories=[3627],
        comment_status="closed",
        lang_type="cn",
    )
    response = response.json()
    post_ID = response.get("id")
    update_summary_qa(post_ID, raw_content)
    return


@observe
def publish_post_en(state):
    title = state["title_en"]
    image_alt = state["image_alt_en"]
    image_ID = state["image_ID_en"]
    image_url = state["image_url_en"]
    tags = state["tags_en"]
    if image_ID == -1:
        image_url = "https://www.forwardpathway.us/wp-content/uploads/2024/07/fp_college_news_default.jpg"
        image_ID = 15899
    content = state["content_en"]
    if content.find("[image_placeholder]") > 0:
        content = content.replace(
            "[image_placeholder]",
            """<img src="{}" alt="{}">""".format(image_url, image_alt),
        )
    else:
        content = """<img src="{}" alt="{}">\n""".format(image_url, image_alt) + content
    content = markdown2.markdown(
        content,
        extras=["tables", "footnotes"],
    )
    (content, new_tags) = insert_keyword_url(content, lang_type="en")
    tags = tags | new_tags
    response = post_wordpress_post(
        post_title=title,
        post_body=content,
        featured_media_id=image_ID,
        tags=tags,
        categories=[9],
        comment_status="closed",
        lang_type="en",
    )
    response = response.json()
    return

In [17]:
######################## Build LangGraph ####################################
workflow = StateGraph(GraphState)
workflow.add_node("summary_node", summary)
workflow.add_node("more_topics", more_topics)
workflow.add_node("web_search", web_search)
workflow.add_node("write_outline_cn", write_outline_cn)
workflow.add_node("write_outline_en", write_outline_en)
workflow.add_node("write_article_cn", write_article_cn)
workflow.add_node("write_article_en", write_article_en)
workflow.add_node("add_reference", add_reference)
workflow.add_node("article_metas", article_metas)
workflow.add_node("generate_image", generate_image)
workflow.add_node("publish_post", publish_post)
workflow.add_node("publish_post_en", publish_post_en)

workflow.set_entry_point("summary_node")
workflow.add_edge("summary_node", "more_topics")
workflow.add_conditional_edges("more_topics", topics_to_search, ["web_search"])
workflow.add_edge("web_search", "write_outline_cn")
workflow.add_edge("web_search", "write_outline_en")
workflow.add_edge("write_outline_cn", "write_article_cn")
workflow.add_edge("write_outline_en", "write_article_en")
workflow.add_edge("write_article_cn", "add_reference")
workflow.add_edge("write_article_en", "add_reference")
workflow.add_edge("add_reference", "article_metas")
workflow.add_edge("article_metas", "generate_image")
workflow.add_edge("generate_image", "publish_post")
workflow.add_edge("generate_image", "publish_post_en")
workflow.add_edge("publish_post", END)
workflow.add_edge("publish_post_en", END)
app = workflow.compile()

from IPython.display import Image as IPImage
from IPython.display import display
from langchain_core.runnables.graph import CurveStyle, MermaidDrawMethod, NodeStyles

display(
    IPImage(
        app.get_graph(xray=1).draw_mermaid_png(
            curve_style=CurveStyle.BASIS,
            node_colors=NodeStyles(
                first="fill:#FDFFB6",
                last="fill:#FFADAD",
                default="fill:#CAFFBF,line-height:1",
            ),
            draw_method=MermaidDrawMethod.API,
        ),
        width=300,
    )
)

img = app.get_graph().draw_mermaid_png(
    curve_style=CurveStyle.BASIS,
    node_colors=NodeStyles(
        first="fill:#FDFFB6",
        last="fill:#FFADAD",
        default="fill:#CAFFBF,line-height:1",
    ),
    draw_method=MermaidDrawMethod.API,
)
with open("post_publish_flow_new.png", "wb") as png:
    png.write(img)

In [18]:
@observe
def run_post_publish():
    """
    Run the workflow for all the news URLs.
    """
    # Get the list of URLs to process
    urls = get_news_urls()
    for url in urls:
        try:
            app.invoke({"url": url})
            set_news_url_flag(url)
            print(url, "finished")
        except Exception as e:
            print("error for url: ", url)
            print(e)

run_post_publish()

Finish Initial Summary:  https://news.mit.edu/2025/new-model-predicts-chemical-reactions-no-return-point-0423
https://www.businesswire.com/news/home/20250129190397/en/Acellera-and-Psivant-Collaborate-to-Develop-Transformative-Computational-Drug-Discovery-Approaches-Using-AI-and-Quantum-Simulations  --- 403/404 response
https://pubs.acs.org/doi/10.1021/acssuschemeng.4c07930  --- 403/404 response
https://pubs.acs.org/doi/abs/10.1021/acs.inorgchem.5c01314  --- 403/404 response
https://www.genengnews.com/multimedia/webinars/strategies-for-successful-integration-of-computational-and-empirical-data-for-protein-drug-discovery/  --- done!
https://www.nature.com/articles/s44286-024-00165-8  --- done!
https://www.prnewswire.com/news-releases/xtalpi-launches-computational-chemistry-software-for-drug-discovery-xmolgen-and-xfep-302257086.html  --- done!
https://www.nature.com/articles/s43588-025-00790-0  --- done!
https://www.nature.com/articles/s41467-024-52481-5  --- done!
https://www.sciencedire