In [4]:
import requests
import pandas as pd
import time
from datetime import datetime, timedelta
import os
from dotenv import load_dotenv

load_dotenv()

API_KEY = os.getenv("GNEWS_API")
BASE_URL = "https://gnews.io/api/v4/search"
QUERY = "AAPL stock OR Apple stock"
MAX_RESULTS_PER_CALL = 100

# Generate 1-month windows from today back to 2020-01-01
end_date = datetime.today()
start_date = datetime(2020, 1, 1)

date_ranges = []
while end_date > start_date:
    range_start = end_date - timedelta(days=30)
    if range_start < start_date:
        range_start = start_date
    date_ranges.append((range_start.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d')))
    end_date = range_start - timedelta(days=1)

# Load previously saved articles if they exist
try:
    existing_df = pd.read_csv("../data/gnews_partial.csv")
    completed_dates = set(pd.to_datetime(existing_df['publishedAt']).dt.strftime('%Y-%m'))
    all_articles = existing_df.to_dict("records")
except FileNotFoundError:
    all_articles = []
    completed_dates = set()

print(f"🟡 Skipping {len(completed_dates)} previously fetched windows.")

# Fetch articles month-by-month
for start, end in date_ranges:
    range_key = pd.to_datetime(start).strftime('%Y-%m')
    if range_key in completed_dates:
        print(f"✅ Skipping {range_key} (already done)")
        continue

    params = {
        'q': QUERY,
        'lang': 'en',
        'from': start,
        'to': end,
        'max': MAX_RESULTS_PER_CALL,
        'token': API_KEY,
    }

    print(f"📅 Fetching {start} to {end}...")
    try:
        response = requests.get(BASE_URL, params=params)
    except Exception as e:
        print(f"❌ Request error: {e}")
        continue

    if response.status_code == 200:
        data = response.json()
        articles = data.get('articles', [])
        print(f"🔹 Got {len(articles)} articles for {start} to {end}")

        for article in articles:
            all_articles.append({
                'title': article['title'],
                'description': article.get('description'),
                'content': article.get('content'),
                'publishedAt': article['publishedAt'],
                'source': article['source']['name'],
                'url': article['url']
            })

        # Save after each loop
        df = pd.DataFrame(all_articles)
        df.to_csv("../data/gnews_partial.csv", index=False)
        print(f"💾 Saved progress. Total articles: {len(df)}")

    elif response.status_code == 429:
        print("🚫 Rate limit hit! Stopping for today.")
        break

    else:
        print(f"⚠️ Failed to fetch ({response.status_code})")

    time.sleep(1)  # safety delay

print("✅ Done for now. You can rerun tomorrow to continue.")


🟡 Skipping 0 previously fetched windows.
📅 Fetching 2025-06-20 to 2025-07-20...
🔹 Got 10 articles for 2025-06-20 to 2025-07-20
💾 Saved progress. Total articles: 10
📅 Fetching 2025-05-20 to 2025-06-19...
🔹 Got 10 articles for 2025-05-20 to 2025-06-19
💾 Saved progress. Total articles: 20
📅 Fetching 2025-04-19 to 2025-05-19...
🔹 Got 10 articles for 2025-04-19 to 2025-05-19
💾 Saved progress. Total articles: 30
📅 Fetching 2025-03-19 to 2025-04-18...
🔹 Got 10 articles for 2025-03-19 to 2025-04-18
💾 Saved progress. Total articles: 40
📅 Fetching 2025-02-16 to 2025-03-18...
🔹 Got 10 articles for 2025-02-16 to 2025-03-18
💾 Saved progress. Total articles: 50
📅 Fetching 2025-01-16 to 2025-02-15...
🔹 Got 10 articles for 2025-01-16 to 2025-02-15
💾 Saved progress. Total articles: 60
📅 Fetching 2024-12-16 to 2025-01-15...
🔹 Got 10 articles for 2024-12-16 to 2025-01-15
💾 Saved progress. Total articles: 70
📅 Fetching 2024-11-15 to 2024-12-15...
🔹 Got 10 articles for 2024-11-15 to 2024-12-15
💾 Saved prog

In [45]:
import pandas as pd

daily_news = pd.read_csv("../data/gnews_partial.csv")
stock_df = pd.read_csv('../data/stock_data_4years.csv', skiprows=2)

stock_df.columns = ['Date', 'Close', 'High', 'Low', 'Open', 'Volume']

daily_news.head()

Unnamed: 0,title,description,content,publishedAt,source,url
0,Today in Apple history,"On June 18, 1993, Apple CEO John Sculley gave ...","June 18, 1993: John Sculley steps down as Appl...",2025-06-18T14:30:22Z,Cult of Mac,https://www.cultofmac.com/apple-history/john-s...
1,The AI Trade Is Back in Play: 2 Stocks to Buy ...,Consider Apple (NASDAQ:AAPL) and another cheap...,The AI (artificial intelligence) trade is gett...,2025-06-13T23:00:00Z,The Motley Fool Canada,https://www.fool.ca/2025/06/13/the-ai-trade-is...
2,China Tariff Easing Spurs Stock Market Surge A...,The “dream scenario” for tech investors helped...,Topline Stocks surged across the board Monday ...,2025-05-12T04:00:00Z,Forbes,https://www.forbes.com/sites/dereksaul/2025/05...
3,Apple Faces New iPhone Fiasco as Update Bricks...,"Apple (AAPL) faces another wave of challenges,...","Apple (AAPL) faces another wave of challenges,...",2025-04-27T16:35:02Z,Business Insider,https://markets.businessinsider.com/news/stock...
4,2 Reasons I'm Considering Apple Stock for a $2...,Apple (NASDAQ:AAPL) stock looks like a deep-va...,The stock market has continued to experience a...,2025-04-23T14:00:00Z,The Motley Fool Canada,https://www.fool.ca/2025/04/23/2-reasons-im-co...


In [46]:
stock_df.head()

Unnamed: 0,Date,Close,High,Low,Open,Volume
0,2021-07-19,139.404251,140.989624,138.64093,140.676459,121434600
1,2021-07-20,143.025162,143.954862,139.90338,140.392689,96350000
2,2021-07-21,142.291168,143.005571,141.537642,142.418393,74993500
3,2021-07-22,143.661285,145.031346,142.692447,142.819672,77338200
4,2021-07-23,145.383606,145.540189,143.778672,144.395206,71447400


In [47]:
print(stock_df.shape)
print(stock_df.columns)

(1004, 6)
Index(['Date', 'Close', 'High', 'Low', 'Open', 'Volume'], dtype='object')


In [48]:
print(daily_news.shape)
print(daily_news.columns)

(620, 6)
Index(['title', 'description', 'content', 'publishedAt', 'source', 'url'], dtype='object')


In [49]:
stock_df['date'] = pd.to_datetime(stock_df['Date']).dt.date

daily_news['date'] = pd.to_datetime(daily_news['publishedAt']).dt.date

In [50]:
stock_df

Unnamed: 0,Date,Close,High,Low,Open,Volume,date
0,2021-07-19,139.404251,140.989624,138.640930,140.676459,121434600,2021-07-19
1,2021-07-20,143.025162,143.954862,139.903380,140.392689,96350000,2021-07-20
2,2021-07-21,142.291168,143.005571,141.537642,142.418393,74993500,2021-07-21
3,2021-07-22,143.661285,145.031346,142.692447,142.819672,77338200,2021-07-22
4,2021-07-23,145.383606,145.540189,143.778672,144.395206,71447400,2021-07-23
...,...,...,...,...,...,...,...
999,2025-07-11,211.160004,212.130005,209.860001,210.570007,39765800,2025-07-11
1000,2025-07-14,208.619995,210.910004,207.539993,209.929993,38840100,2025-07-14
1001,2025-07-15,209.110001,211.889999,208.919998,209.220001,42296300,2025-07-15
1002,2025-07-16,210.160004,212.399994,208.639999,210.300003,47490500,2025-07-16


In [51]:
daily_news

Unnamed: 0,title,description,content,publishedAt,source,url,date
0,Today in Apple history,"On June 18, 1993, Apple CEO John Sculley gave ...","June 18, 1993: John Sculley steps down as Appl...",2025-06-18T14:30:22Z,Cult of Mac,https://www.cultofmac.com/apple-history/john-s...,2025-06-18
1,The AI Trade Is Back in Play: 2 Stocks to Buy ...,Consider Apple (NASDAQ:AAPL) and another cheap...,The AI (artificial intelligence) trade is gett...,2025-06-13T23:00:00Z,The Motley Fool Canada,https://www.fool.ca/2025/06/13/the-ai-trade-is...,2025-06-13
2,China Tariff Easing Spurs Stock Market Surge A...,The “dream scenario” for tech investors helped...,Topline Stocks surged across the board Monday ...,2025-05-12T04:00:00Z,Forbes,https://www.forbes.com/sites/dereksaul/2025/05...,2025-05-12
3,Apple Faces New iPhone Fiasco as Update Bricks...,"Apple (AAPL) faces another wave of challenges,...","Apple (AAPL) faces another wave of challenges,...",2025-04-27T16:35:02Z,Business Insider,https://markets.businessinsider.com/news/stock...,2025-04-27
4,2 Reasons I'm Considering Apple Stock for a $2...,Apple (NASDAQ:AAPL) stock looks like a deep-va...,The stock market has continued to experience a...,2025-04-23T14:00:00Z,The Motley Fool Canada,https://www.fool.ca/2025/04/23/2-reasons-im-co...,2025-04-23
...,...,...,...,...,...,...,...
615,Microsoft stock or Apple shares: Which is the ...,Apple Inc (NASDAQ: AAPL) shares used to have t...,Microsoft stock or Apple shares: Which is the ...,2025-04-11T04:06:46Z,Motley Fool Australia,https://www.fool.com.au/2025/04/11/microsoft-s...,2025-04-11
616,"AAPL, JPM, GM: U.S. Stock Market Has Lost $11 ...",A total of $11.1 trillion has been wiped off t...,A total of $11.1 trillion has been wiped off t...,2025-04-05T01:14:54Z,Business Insider,https://markets.businessinsider.com/news/stock...,2025-04-05
617,Morgan Stanley cut Apple price target over tar...,Morgan Stanley cut its price target on Apple (...,00:00 Speaker A\nApple getting a price target ...,2025-03-12T14:31:51Z,Yahoo Finance,https://finance.yahoo.com/video/morgan-stanley...,2025-03-12
618,"1 Essential ""Magnificent 7"" Stock for Canadian...",Apple (NASDAQ:AAPL) stands out as an intriguin...,It’s hard to avoid the buzz about the Magnific...,2025-02-20T16:45:00Z,The Motley Fool Canada,https://www.fool.ca/2025/02/20/1-essential-mag...,2025-02-20


In [52]:
# Combine title, description, and content into one text column
daily_news['text'] = (
    daily_news['title'].fillna('') + '. ' +
    daily_news['description'].fillna('') + '. ' +
    daily_news['content'].fillna('')
)

# Group all news per date into one big string (this gives one row per date)
grouped_news = daily_news.groupby('date')['text'].apply(lambda x: ' '.join(x)).reset_index()
grouped_news

Unnamed: 0,date,text
0,2024-12-14,Apple vs. Shopify: Which Stock Is the Better B...
1,2025-02-20,"1 Essential ""Magnificent 7"" Stock for Canadian..."
2,2025-03-12,Morgan Stanley cut Apple price target over tar...
3,2025-04-05,"AAPL, JPM, GM: U.S. Stock Market Has Lost $11 ..."
4,2025-04-11,Microsoft stock or Apple shares: Which is the ...
5,2025-04-23,2 Reasons I'm Considering Apple Stock for a $2...
6,2025-04-27,Apple Faces New iPhone Fiasco as Update Bricks...
7,2025-05-12,China Tariff Easing Spurs Stock Market Surge A...
8,2025-06-13,The AI Trade Is Back in Play: 2 Stocks to Buy ...
9,2025-06-18,"Today in Apple history. On June 18, 1993, Appl..."


In [55]:
merged_df = pd.merge(stock_df, grouped_news, on='date', how='left')

merged_df['text'] = merged_df['text'].fillna('')

In [56]:
merged_df

Unnamed: 0,Date,Close,High,Low,Open,Volume,date,text
0,2021-07-19,139.404251,140.989624,138.640930,140.676459,121434600,2021-07-19,
1,2021-07-20,143.025162,143.954862,139.903380,140.392689,96350000,2021-07-20,
2,2021-07-21,142.291168,143.005571,141.537642,142.418393,74993500,2021-07-21,
3,2021-07-22,143.661285,145.031346,142.692447,142.819672,77338200,2021-07-22,
4,2021-07-23,145.383606,145.540189,143.778672,144.395206,71447400,2021-07-23,
...,...,...,...,...,...,...,...,...
999,2025-07-11,211.160004,212.130005,209.860001,210.570007,39765800,2025-07-11,
1000,2025-07-14,208.619995,210.910004,207.539993,209.929993,38840100,2025-07-14,
1001,2025-07-15,209.110001,211.889999,208.919998,209.220001,42296300,2025-07-15,
1002,2025-07-16,210.160004,212.399994,208.639999,210.300003,47490500,2025-07-16,
