In [29]:
# !pip install -q requests==2.32.3
# !pip install -q openai==1.41.1
# !pip install -q langchain==0.2.2
# !pip install -q langchain_community==0.2.3
# !pip install -q langchain-openai==0.1.8
# !pip install -q langgraph==0.0.64

In [30]:
# !pip install refinitiv.data

# 1. Package

In [31]:
import refinitiv.data as rd
from refinitiv.data.content import news
from IPython.display import HTML
import pandas as pd
import numpy as np
from datetime import datetime,timedelta
import time
import warnings
warnings.filterwarnings("ignore")

In [32]:
from openai import OpenAI
import json

In [33]:
from pprint import pprint

set refinitive session

In [None]:
rd.open_session()

set open ai api key

In [35]:
OPENAI_API_KEY = ''

In [36]:
client = OpenAI(api_key=OPENAI_API_KEY)

# 2. define agent

2.1 screening agent

In [37]:
def screening_agent(prompt, text):
    completion = client.chat.completions.create(
        model="gpt-4o-2024-08-06",
        messages=[
            {"role": "system", "content": prompt},
            {"role": "user", "content": text}
        ],
        temperature=0,
        response_format={"type": "json_object"}
    )
    return json.loads(completion.choices[0].message.content)

2.2 cleaning agent

In [38]:
def cleaning_agent(prompt, text):
    completion = client.chat.completions.create(
        model="gpt-4o-2024-08-06",
        messages=[
            {"role": "system", "content": prompt},
            {"role": "user", "content": text}
        ],
        temperature=0,
    )
    return completion.choices[0].message.content

2.3 summarizer agent

In [39]:
def summarize_agent(prompt, text):
    completion = client.chat.completions.create(
        model="gpt-4o-2024-08-06",
        messages=[
            {"role": "system", "content": prompt},
            {"role": "user", "content": text}
        ],
        temperature=0.1
    )
    return completion.choices[0].message.content

2.4 translate agent

In [40]:
def translate_agent(prompt, text):
    completion = client.chat.completions.create(
        model="gpt-4o-2024-08-06",
        messages=[
            {"role": "system", "content": prompt},
            {"role": "user", "content": text}
        ],
        temperature=0.3,
    )
    return completion.choices[0].message.content

2.5 get data function

In [41]:
def get_date(code,start,end,num):
    # get headline
    df = rd.news.get_headlines(code, count=num, start=start, end=end).reset_index()
    # get story
    story_list = []
    for idx, storyId in enumerate(df['storyId'].values):
        try:
            story_news = rd.news.get_story(storyId, format='text')
            story_list.append(story_news)
        except:
            story_news = ''
            story_list.append(story_news)
    df['story'] = story_list
    return df

# 3. set prompt

In [42]:
screening_prompt = """
As an experienced <region> economics researcher, your task is to read news headlines and score their relevance to the overview of <region> economy.

Use the following scoring system:
- 0 means the headline is not related to the overview of <region> economy.
- 10 means the headline is highly related to the overview of <region> economy.
Respond in JSON format, where the key 'related_score' holds the value of the score.

Examples:
News headline: "<region> stock market record: Optimism."
Response: {'related_score': 9}

News headline: "Local festival attracts thousands of visitors."
Response: {'related_score': 0}
"""

In [43]:
clearning_prompt = """
You are tasked with cleaning up a messy article that may contain HTML tags, URLs, or advertisements.
Your job is to return the article in a readable format while maintaining any necessary numbers that may affect the content.

Please follow these guidelines:
Identify and retain important numerical data within the article.
Remove all unnecessary HTML tags, URLs, and advertisements.
Clean up any other extraneous content that does not contribute to the article's main message or context.

For example:
Input:
"
Attack submarines are designed to seek and destroy enemy submarines and surface ships;
project power ashore with Tomahawk cruise missiles and Special Operation Forces (SOF);
carry out Intelligence, Surveillance and Reconnaissance (ISR) missions; support battle group operations; and engage in mine warfare.
More information on attack submarines can be found here
(https://www.navy.mil/Resources/Fact-Files/Display-FactFiles/Article/2169558/attack-submarines-ssn/).
Disclaimer
The United States Navy published this content on September 27, 2024 and is solely responsible for the information contained herein.
Distributed by Public, unedited and unaltered, on September 27, 2024 at 23:59:38 UTC.
© Copyright 2024 - The United States Navy
"

Output:
"
Attack submarines are designed to seek and destroy enemy submarines and surface ships;
project power ashore with Tomahawk cruise missiles and Special Operation Forces (SOF);
carry out Intelligence, Surveillance and Reconnaissance (ISR) missions; support battle group operations; and engage in mine warfare.
"
"""

In [44]:
summarize_prompt = """
You are an expert <region> economics researcher. You will be provided lists of news.
Your task is to understand all of it then summarize all of it in your word into "Summary format" provided below.
Focus only news that impact on overview's economy.

Summary format:
[Summarize paragraph]
[Detail bullet 1, if necessary]
[Detail bullet 2, if necessary]
[Detail bullet 3, if necessary]

Rules:
- Explain using simple words so people who are not in the financial sector can understand.
- Do not use any idioms or phrases in the summary.
- Do not use any obscure words or sentences.
"""

In [45]:
translate_prompt = """
You are an expert Thai translator who specializes in economics.
You will be provided with a article that needs to be translated into Thai.
Your task is to translated into Thai.
- If the article contains specific financial terms, translate them while keeping the original English word in the format "Thai word (English word)".
- If the article contains name of person or market, translate them while keeping the original English word in the format "Thai word (English word)".
"""

In [46]:
search_keyword_us = """
(R:.DJI AND Topic:MCE AND Language:LEN)
OR (R:.SPX AND Topic:MCE AND Language:LEN)
OR (R:.NDX AND Topic:MCE AND Language:LEN)
OR (R:.NYA AND Topic:MCE AND Language:LEN)
"""

In [47]:
search_keyword_cn = """
(R:.SSEC AND Topic:MCE AND Language:LEN)
OR (R:.SZSC AND Topic:MCE AND Language:LEN)
OR (R:.CSI300 AND Topic:MCE AND Language:LEN)
OR (R:.HSI AND Topic:MCE AND Language:LEN)
"""

In [48]:
search_keyword_jp = 'R:.N225 AND Topic:MCE AND LEN'

# 4. summarizer function

In [49]:
# choice US or CN or JP

In [50]:
def news_summarize(start, end, region):

    # get news (maximum 1000 news each region)
    if region == 'US':
        df = get_date(search_keyword_us, start, end, 1000)
    elif region == 'CN':
        df = get_date(search_keyword_cn, start, end, 1000)
    elif region == 'JP':
        df = get_date(search_keyword_jp, start, end, 1000)

    # relationship score
    related_score_list = []
    for headline in df['headline']:
        if region == 'US':
            score = screening_agent(screening_prompt.replace('<region>','United State of America (U.S.)'), headline)['related_score']
        elif region == 'CN':
            score = screening_agent(screening_prompt.replace('<region>','China'), headline)['related_score']
        elif region == 'JP':
            score = screening_agent(screening_prompt.replace('<region>','Japan'), headline)['related_score']
        related_score_list.append(score)
    df['related_score'] = related_score_list

    # get top 10 headline
    df_top = df[df['related_score'] >= 8].sort_values(by='related_score', ascending=False).head(10)
    df_top = df_top.reset_index(drop='True')

    # combine all news
    long_message = ''
    for index, row in df_top.iterrows():
        index_plus = index + 1
        headline_number = '[News number' + str(index_plus) + ']'
        headline = 'Headline: ' + row['headline']
        story = 'Story: ' + row['story']
        full_story = headline_number + headline + story
        long_message = long_message + full_story

    # summarize to eng
    en_summarize = summarize_agent(summarize_prompt, long_message)

    # translate to thai
    th_summarize = translate_agent(translate_prompt, en_summarize)
    
    print('English version:')
    print(en_summarize)
    print('Thai version:')
    print(th_summarize)
    print('----------------------------------------------')

# 5. test

In [51]:
region_list = ['US','CN','JP']

In [52]:
for r in region_list:
    news_summarize('2024-10-04', '2024-10-05', r)

English version:
The U.S. economy showed unexpected strength in September, with 254,000 new jobs added, surpassing forecasts of 147,000. This strong job growth led to a decrease in the unemployment rate to 4.1% from 4.2%. The robust labor market performance has reduced the likelihood of the Federal Reserve implementing large interest rate cuts in the near future. The positive job data has boosted investor confidence, leading to a rise in stock markets and a strengthening of the U.S. dollar.

- The U.S. job market added significantly more jobs than expected, with revisions to previous months also showing higher job growth.
- The unemployment rate fell to 4.1%, indicating a resilient labor market.
- The strong job report has led to a decrease in expectations for large interest rate cuts by the Federal Reserve, with markets now anticipating smaller, more gradual rate reductions.
- The positive economic data has resulted in a surge in stock markets, with major indices like the Nasdaq and S