In [2]:
from dotenv import load_dotenv
from datetime import datetime, timedelta
import requests
import os
import time
import pandas as pd 
from news_preprocessing import *

In [5]:
def fetch_news(api_key, ticker, start_date, end_date):
    base_url = os.environ.get("endpointnewsp")
    headers = {"Authorization": f"Bearer {api_key}"}
    all_news = []
    
    current_date = start_date

    while current_date <= end_date:
        batch_end_date = current_date + timedelta(days=50)
        if batch_end_date > end_date:
            batch_end_date = end_date

        params = {
            "ticker": ticker,
            "published_utc.gte": current_date.strftime('%Y-%m-%d'),
            "published_utc.lte": batch_end_date.strftime('%Y-%m-%d'),
            "limit": 50,
            "sort": "published_utc"
        }

        try:
            response = requests.get(base_url, headers=headers, params=params)
            if response.status_code == 200:
                data = response.json()
                articles = data.get('results', [])
                all_news.extend(articles)
                print(f"Fetched {len(articles)} articles from {current_date.strftime('%Y-%m-%d')} to {batch_end_date.strftime('%Y-%m-%d')}")
                current_date = batch_end_date + timedelta(days=1)
            elif response.status_code == 429:
                print("Rate limit reached. Waiting to retry...")
                time.sleep(60)  # Wait for 60 seconds or as recommended by the API
                continue  # Retry the current request
            else:
                print(f"Failed to fetch data: {response.status_code}, {response.text}")
                break
        except Exception as e:
            print(f"An error occurred: {e}")
            break

    return all_news

# Example usage
api_key = os.environ.get('newsp_api')
ticker = 'TSLA'
start_date = datetime(2022, 4, 1)  # start date
end_date = datetime(2024, 4, 1)
news_articles = fetch_news(api_key, ticker, start_date, end_date)
print(f"Total articles fetched: {len(news_articles)}")


Fetched 50 articles from 2022-04-01 to 2022-05-21
Fetched 50 articles from 2022-05-22 to 2022-07-11
Fetched 50 articles from 2022-07-12 to 2022-08-31
Fetched 50 articles from 2022-09-01 to 2022-10-21
Fetched 50 articles from 2022-10-22 to 2022-12-11
Rate limit reached. Waiting to retry...
Fetched 50 articles from 2022-12-12 to 2023-01-31
Fetched 50 articles from 2023-02-01 to 2023-03-23
Fetched 50 articles from 2023-03-24 to 2023-05-13
Fetched 50 articles from 2023-05-14 to 2023-07-03
Fetched 50 articles from 2023-07-04 to 2023-08-23
Rate limit reached. Waiting to retry...
Fetched 50 articles from 2023-08-24 to 2023-10-13
Fetched 50 articles from 2023-10-14 to 2023-12-03
Fetched 50 articles from 2023-12-04 to 2024-01-23
Fetched 50 articles from 2024-01-24 to 2024-03-14
Fetched 50 articles from 2024-03-15 to 2024-04-01
Total articles fetched: 750


In [6]:
# Process the news articles
df = process_news_articles(news_articles)

In [8]:
df.to_csv('news_articles.csv', index=False)
