In [5]:
import requests
import pandas as pd
import datetime
import time
import random

# Configuration
api_key = 'OTl2SM9_8xGEkqop_pj57cYyS4gjsurl'
news_url = "https://api.polygon.io/v2/reference/news"  # Replace with the appropriate API URL if needed

def get_news(ticker, start_date, end_date, limit=100):
    """
    Fetch news for a ticker within a given date range.
    :param ticker: Stock ticker symbol.
    :param start_date: Start date (YYYY-MM-DD).
    :param end_date: End date (YYYY-MM-DD).
    :param limit: Number of articles to retrieve per request.
    :return: List of news articles.
    """
    params = {
        "ticker": ticker,
        "published_utc.gte": start_date,
        "published_utc.lte": end_date,
        "limit": limit,
        "apiKey": api_key
    }
    r_sleep = random.uniform(12, 13)  # Random sleep to avoid triggering rate limits
    time.sleep(r_sleep)
    
    response = requests.get(news_url, params=params)
    if response.status_code == 200:
        return response.json().get("results", [])
    else:
        raise Exception(f"Failed to get news: {response.status_code} - {response.text}")

def generate_weekly_intervals(start_date, end_date):
    """
    Generate weekly date intervals within the specified date range.
    :param start_date: Earliest date (YYYY-MM-DD).
    :param end_date: Latest date (YYYY-MM-DD).
    :return: List of (start_date, end_date) tuples for each week.
    """
    intervals = []
    current_end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d")
    current_start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d")
    
    while current_end_date > current_start_date:
        week_start_date = max(current_start_date, current_end_date - datetime.timedelta(days=6))
        intervals.append((week_start_date.strftime("%Y-%m-%d"), current_end_date.strftime("%Y-%m-%d")))
        current_end_date -= datetime.timedelta(days=7)
        
    return intervals

# Parameters
tickers = ["JNJ", "JPM", "XOM", "AAPL"]
start_date = "2023-11-30"
end_date = "2023-12-31"
all_news = []

# Generate weekly intervals
weekly_intervals = generate_weekly_intervals(start_date, end_date)

# Fetch news data for each ticker and each week
for ticker in tickers:
    for week_start, week_end in weekly_intervals:
        try:
            print(f"Fetching news for {ticker} from {week_start} to {week_end}...")
            weekly_news = get_news(ticker, week_start, week_end, limit=100)
            for article in weekly_news:
                article['ticker'] = ticker  # Add ticker to the news record
            all_news.extend(weekly_news)
        except Exception as e:
            print(f"Error fetching news for {ticker} from {week_start} to {week_end}: {e}")

# Convert news data to a DataFrame
news_df = pd.DataFrame(all_news)

# Clean and format the DataFrame
news_df = news_df.drop(columns=['id', 'image_url', 'author', 'amp_url'], errors='ignore')
news_df['published_utc'] = pd.to_datetime(news_df['published_utc'], errors='coerce')
news_df['published_utc'] = news_df['published_utc'].dt.date
news_df = news_df.drop_duplicates(subset=['title', 'published_utc', 'ticker'])

# Save to CSV
output_file = "news_data_nov_to_dec_2023.csv"
news_df.to_csv(output_file, index=False)
print(f"News data saved to {output_file}.")


Fetching news for JNJ from 2023-12-25 to 2023-12-31...
Fetching news for JNJ from 2023-12-18 to 2023-12-24...
Fetching news for JNJ from 2023-12-11 to 2023-12-17...
Fetching news for JNJ from 2023-12-04 to 2023-12-10...
Fetching news for JNJ from 2023-11-30 to 2023-12-03...
Fetching news for JPM from 2023-12-25 to 2023-12-31...
Fetching news for JPM from 2023-12-18 to 2023-12-24...
Fetching news for JPM from 2023-12-11 to 2023-12-17...
Fetching news for JPM from 2023-12-04 to 2023-12-10...
Fetching news for JPM from 2023-11-30 to 2023-12-03...
Fetching news for XOM from 2023-12-25 to 2023-12-31...
Fetching news for XOM from 2023-12-18 to 2023-12-24...
Fetching news for XOM from 2023-12-11 to 2023-12-17...
Fetching news for XOM from 2023-12-04 to 2023-12-10...
Fetching news for XOM from 2023-11-30 to 2023-12-03...
Fetching news for AAPL from 2023-12-25 to 2023-12-31...
Fetching news for AAPL from 2023-12-18 to 2023-12-24...
Fetching news for AAPL from 2023-12-11 to 2023-12-17...
Fetchin