In [1]:
from datetime import datetime

def convert_to_timestamp(readable_time, time_format="%Y-%m-%d %H:%M:%S", milliseconds=False):
    """
    将可读时间转换为 UNIX 时间戳。
    
    Args:
        readable_time (str): 可读时间字符串，例如 "2024-11-29 10:20:30"
        time_format (str): 时间字符串的格式，默认为 "%Y-%m-%d %H:%M:%S"
        milliseconds (bool): 是否返回毫秒级时间戳，默认 False（返回秒级）

    Returns:
        int: 转换后的时间戳（秒或毫秒）
    """
    try:
        # 转换为 datetime 对象
        dt = datetime.strptime(readable_time, time_format)
        # 获取时间戳，秒级或毫秒级
        timestamp = int(dt.timestamp() * 1000) if milliseconds else int(dt.timestamp())
        return timestamp
    except ValueError as e:
        print(f"Error: {e}")
        return None

# 示例使用
readable_time = "2005-03-01 00:00:00"
print("秒级时间戳:", convert_to_timestamp(readable_time))
print("毫秒级时间戳:", convert_to_timestamp(readable_time, milliseconds=True))

秒级时间戳: 1109664000
毫秒级时间戳: 1109664000000


In [2]:
import os
import requests

NEWS_API_KEY = os.getenv('NEWS_API_KEY')
print(NEWS_API_KEY)


def get_seeking_alpha_news(symbol, since, until, size=40, number=1, api_key=None):
    """
    请求 Seeking Alpha 新闻 API。

    Args:
        symbol (str): 股票代码，例如 'TSLA'
        since (int): 起始时间戳
        until (int): 结束时间戳
        size (int): 每次返回的新闻数量，默认 20
        number (int): 请求的新闻页码，默认 1
        api_key (str): RapidAPI 密钥

    Returns:
        dict: 返回的 JSON 响应内容
    """
    print(f'Processing {symbol} from {datetime.fromtimestamp(since)} to {datetime.fromtimestamp(until)}')
    url = f'https://seeking-alpha.p.rapidapi.com/news/v2/list-by-symbol?until={until}&since={since}&size={size}&number={number}&id={symbol}'
    # url='https://seeking-alpha.p.rapidapi.com/news/v2/list-by-symbol?until=1705342830&since=1704133230&size=40&number=1&id=AAPL'

    print(f"url: {url}")

    headers = {
        'x-rapidapi-host': 'seeking-alpha.p.rapidapi.com',
        'x-rapidapi-key': api_key
    }

    try:
        # 发送 GET 请求
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # 如果响应码不是 2xx，会抛出异常

        # 返回 JSON 数据
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return None

# 示例调用
api_key = NEWS_API_KEY # 使用你自己的 API 密钥
symbol = 'AAPL'  # 例如特斯拉股票
since = 1704096000  # 起始时间戳
until = 1705305600  # 结束时间戳

# 获取新闻数据
news_data = get_seeking_alpha_news(symbol, since, until, api_key=api_key)

# 输出结果
if news_data:
    print(news_data)


2f165cbad2msh2a23762cd538f6cp1ce18ejsncafa850eb8af
Processing AAPL from 2024-01-01 00:00:00 to 2024-01-15 00:00:00
url: https://seeking-alpha.p.rapidapi.com/news/v2/list-by-symbol?until=1705305600&since=1704096000&size=40&number=1&id=AAPL
{'data': [{'id': '4054867', 'type': 'news', 'attributes': {'publishOn': '2024-01-12T18:31:07-05:00', 'isLockedPro': False, 'commentCount': 127, 'gettyImageUrl': 'https://static.seekingalpha.com/cdn/s3/uploads/getty_images/1443173998/image_1443173998.jpg', 'videoPreviewUrl': None, 'videoDuration': None, 'themes': {}, 'title': 'Microsoft passes Apple as most valuable public company', 'isPaywalled': False}, 'relationships': {'author': {'data': {'id': '427396', 'type': 'newsAuthorUser'}}, 'sentiments': {'data': []}, 'primaryTickers': {'data': [{'id': '146', 'type': 'tag'}]}, 'secondaryTickers': {'data': []}, 'otherTags': {'data': []}}, 'links': {'self': '/news/4054867-microsoft-passes-apple-as-most-valuable-public-company'}}, {'id': '4054789', 'type': 'ne

In [3]:
import pandas as pd

def process_seeking_alpha_data(data):
    """
    处理从 Seeking Alpha API 获取的数据，提取与情感分析相关的部分或其他有用信息。

    Args:
        data (dict): Seeking Alpha API 返回的 JSON 数据

    Returns:
        pd.DataFrame: 包含新闻相关数据的 DataFrame
    """
    # 提取新闻数据部分
    news_data = data.get('data', [])
    
    # 如果没有新闻数据
    if not news_data:
        return pd.DataFrame()

    # 提取相关数据
    processed_data = []

    for item in news_data:
        # 获取新闻标题和发布日期
        title = item['attributes']['title']
        publish_on = item['attributes']['publishOn']
        author = item['relationships']['author']['data']['id']
        author_name = data.get('included', [])[0]['attributes']['nick'] if 'included' in data else None
        getty_image_url = item['attributes'].get('gettyImageUrl', None)

        # 提取情感数据（当前为空，未来可以根据实际数据进行提取）
        sentiments = item['relationships'].get('sentiments', {}).get('data', [])
        
        sentiment_score = None  # 如果没有情感数据
        sentiment_label = None  # 如果没有情感数据
        
        # 如果有情感数据，提取情感分数或标签
        if sentiments:
            sentiment_score = sentiments[0].get('score')
            sentiment_label = sentiments[0].get('label')

        # 将数据添加到列表中
        processed_data.append({
            'title': title,
            'publish_on': publish_on,
            'author': author,
            'author_name': author_name,
            'getty_image_url': getty_image_url,
            'sentiment_score': sentiment_score,
            'sentiment_label': sentiment_label
        })

    # 返回 DataFrame
    return pd.DataFrame(processed_data)



# # 调用函数处理数据
# df = process_seeking_alpha_data(news_data)
# df.head()


In [4]:

from datetime import timedelta


def get_data_by_two_week_intervals(symbol, api_key, start_date, end_date):
    """
    获取指定时间范围内，按两周为单位的数据

    Args:
        symbol (str): 股票符号
        api_key (str): API 密钥
        start_date (str): 开始日期（YYYY-MM-DD HH:MM:SS）
        end_date (str): 结束日期（YYYY-MM-DD HH:MM:SS）

    Returns:
        pd.DataFrame: 包含新闻相关数据的 DataFrame
    """
    # 转换日期字符串为时间戳
    start_timestamp = convert_to_timestamp(start_date)
    end_timestamp = convert_to_timestamp(end_date)

    data_frames = []

    # 计算两周的时间段
    current_start_date = start_date
    while start_timestamp < end_timestamp:
        # 计算当前的结束日期（14天后）
        current_end_date = (datetime.strptime(current_start_date, "%Y-%m-%d %H:%M:%S") + timedelta(days=14)).strftime("%Y-%m-%d %H:%M:%S")
        if convert_to_timestamp(current_end_date) > end_timestamp:
            current_end_date = end_date  # 确保不超出结束日期
        print(f'grabbing data from {datetime.strptime(current_start_date, "%Y-%m-%d %H:%M:%S")} to {datetime.strptime(current_end_date, "%Y-%m-%d %H:%M:%S")}')
        # 获取当前时间段的新闻数据
        data = get_seeking_alpha_news(symbol, convert_to_timestamp(current_start_date), convert_to_timestamp(current_end_date), api_key=api_key)
        print(f'grabbed data from {datetime.strptime(current_start_date, "%Y-%m-%d %H:%M:%S")} to {datetime.strptime(current_end_date, "%Y-%m-%d %H:%M:%S")} finshed')
        # 处理数据并保存到数据框
        df = process_seeking_alpha_data(data)
        data_frames.append(df)
        
        # 更新起始日期，继续获取下一个两周的新闻
        current_start_date = (datetime.strptime(current_end_date, "%Y-%m-%d %H:%M:%S") + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
        # 更新起始时间戳
        start_timestamp = convert_to_timestamp(current_start_date)

    # 合并所有时间段的新闻数据
    result_df = pd.concat(data_frames, ignore_index=True)

    return result_df

In [5]:
api_key = os.getenv('NEWS_API_KEY_2')# 使用你自己的 API 密钥
print(api_key)
df = get_data_by_two_week_intervals('AAPL', api_key, '2024-01-01 00:00:00','2024-11-29 00:00:00')
df.head()


2f165cbad2msh2a23762cd538f6cp1ce18ejsncafa850eb8af
grabbing data from 2024-01-01 00:00:00 to 2024-01-15 00:00:00
Processing AAPL from 2024-01-01 00:00:00 to 2024-01-15 00:00:00
url: https://seeking-alpha.p.rapidapi.com/news/v2/list-by-symbol?until=1705305600&since=1704096000&size=40&number=1&id=AAPL
grabbed data from 2024-01-01 00:00:00 to 2024-01-15 00:00:00 finshed
grabbing data from 2024-01-16 00:00:00 to 2024-01-30 00:00:00
Processing AAPL from 2024-01-16 00:00:00 to 2024-01-30 00:00:00
url: https://seeking-alpha.p.rapidapi.com/news/v2/list-by-symbol?until=1706601600&since=1705392000&size=40&number=1&id=AAPL
grabbed data from 2024-01-16 00:00:00 to 2024-01-30 00:00:00 finshed
grabbing data from 2024-01-31 00:00:00 to 2024-02-14 00:00:00
Processing AAPL from 2024-01-31 00:00:00 to 2024-02-14 00:00:00
url: https://seeking-alpha.p.rapidapi.com/news/v2/list-by-symbol?until=1707897600&since=1706688000&size=40&number=1&id=AAPL
grabbed data from 2024-01-31 00:00:00 to 2024-02-14 00:00:00 

Unnamed: 0,title,publish_on,author,author_name,getty_image_url,sentiment_score,sentiment_label
0,Microsoft passes Apple as most valuable public...,2024-01-12T18:31:07-05:00,427396,"Jason Aycock, SA News Editor",https://static.seekingalpha.com/cdn/s3/uploads...,,
1,U.S. appeals court affirms tribunal decisions ...,2024-01-12T13:13:18-05:00,56228617,"Jason Aycock, SA News Editor",https://static.seekingalpha.com/cdn/s3/uploads...,,
2,Apple gains even as tech giant added as new sh...,2024-01-12T11:11:59-05:00,55407719,"Jason Aycock, SA News Editor",https://static.seekingalpha.com/cdn/s3/uploads...,,
3,Apple's Cook saw pay drop in 2023; still above...,2024-01-12T10:02:00-05:00,59008794,"Jason Aycock, SA News Editor",https://static.seekingalpha.com/cdn/s3/uploads...,,
4,Apple appoints Wanda Austin to board of direct...,2024-01-11T17:19:45-05:00,59695326,"Jason Aycock, SA News Editor",https://static.seekingalpha.com/cdn/s3/uploads...,,


In [6]:
df.to_csv('../data/AAPL_news_data_2024-01-01_2024-11-29.csv', index=False)
