In [4]:
import yfinance as yf
import pandas as pd


In [5]:
apple = yf.download("AAPL", period = "3wk")
nvidia = yf.download("NVDA", period = "3wk")
walmart = yf.download("WMT", period = "3wk")
eliLilly = yf.download("LLY", period = "3wk")
jpm = yf.download("JPM", period = "3wk")
exxon = yf.download("XOM", period = "3wk")
mcd = yf.download("MCD", period = "3wk")
tesla = yf.download("TSLA", period = "3wk")
delta = yf.download("DAL", period = "3wk")
marriott = yf.download("MAR", period = "3wk")
goldman = yf.download("GS", period = "3wk")
netflix = yf.download("NFLX", period = "3wk")
meta = yf.download("META", period = "3wk") 
oracle = yf.download("ORCL", period = "3wk")
palantir = yf.download("PLTR", period = "3wk")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

In [6]:
!pip install yfinance finnhub-python

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m26.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m


In [None]:
import os
import time
import sqlite3
from datetime import datetime, timedelta, timezone

import pandas as pd
from finnhub import Client

FINNHUB_API_KEY = "d63q9r9r01ql6dj13d4gd63q9r9r01ql6dj13d50"
finnhub_client = Client(api_key=FINNHUB_API_KEY)

PROJECT_COMPANIES = {
    "AAPL": "Apple",
    "NVDA": "NVIDIA",
    "WMT": "Walmart",
    "LLY": "Eli Lilly",
    "JPM": "JPMorgan Chase",
    "XOM": "Exxon Mobil",
    "MCD": "McDonald's",
    "TSLA": "Tesla",
    "DAL": "Delta Air Lines",
    "MAR": "Marriott International",
    "GS": "Goldman Sachs Group",
    "NFLX": "Netflix",
    "META": "Meta",
    "ORCL": "Oracle",
    "PLTR": "Palantir"
}

DB_PATH = "news.db"
SLEEP_SECONDS = 0.35

# 3-week window ending today (UTC date)
end_date = datetime.now(timezone.utc).date()
start_date = end_date - timedelta(days=21)
FROM = start_date.strftime("%Y-%m-%d")
TO = end_date.strftime("%Y-%m-%d")

print("Pulling news from", FROM, "to", TO)

# DB setup (rerun-safe)
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()

cur.execute("""
CREATE TABLE IF NOT EXISTS articles (
    url TEXT PRIMARY KEY,
    ticker TEXT,
    company TEXT,
    published_ts INTEGER,
    published_at_utc TEXT,
    source TEXT,
    headline TEXT,
    summary TEXT,
    category TEXT,
    related TEXT,
    finnhub_id INTEGER
);
""")
conn.commit()

# Collect + insert
rows = []
errors = []

insert_sql = """
INSERT OR IGNORE INTO articles
(url, ticker, company, published_ts, published_at_utc, source, headline, summary, category, related, finnhub_id)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
"""

for ticker, company_name in PROJECT_COMPANIES.items():
    try:
        articles = finnhub_client.company_news(ticker, _from=FROM, to=TO)
        print(f"{ticker}: {len(articles)} articles")

        for a in articles:
            ts = a.get("datetime")
            published_at_utc = (
                datetime.fromtimestamp(ts, tz=timezone.utc).isoformat()
                if isinstance(ts, (int, float))
                else None
            )

            url = a.get("url")
            if not url:
                continue  # skip malformed entries

            row = (
                url,
                ticker,
                company_name,
                int(ts) if isinstance(ts, (int, float)) else None,
                published_at_utc,
                a.get("source"),
                a.get("headline"),
                a.get("summary"),
                a.get("category"),
                a.get("related"),
                a.get("id"),
            )
            cur.execute(insert_sql, row)

            rows.append({
                "ticker": ticker,
                "company": company_name,
                "published_at_utc": published_at_utc,
                "published_ts": ts,
                "source": a.get("source"),
                "headline": a.get("headline"),
                "url": url,
                "summary": a.get("summary"),
                "category": a.get("category"),
                "related": a.get("related"),
                "id": a.get("id"),
            })

        conn.commit()

    except Exception as e:
        errors.append({"ticker": ticker, "error": repr(e)})
        print(f"{ticker}: ERROR -> {e}")

    time.sleep(SLEEP_SECONDS)

df = pd.DataFrame(rows).drop_duplicates(subset=["url"]).reset_index(drop=True)

out_parquet = "finnhub_company_news_3wk.parquet"
df.to_parquet(out_parquet, index=False)
print("Saved parquet:", out_parquet)

out_csv = "finnhub_company_news_3wk.csv"
df.to_csv(out_csv, index=False)
print("Saved csv:", out_csv)

if errors:
    print("\nErrors:", errors)

conn.close()
print("Done. DB saved at:", DB_PATH)


Pulling news from 2026-01-24 to 2026-02-14
AAPL: 238 articles
NVDA: 249 articles
WMT: 237 articles
LLY: 231 articles
JPM: 239 articles
XOM: 239 articles
MCD: 248 articles
TSLA: 233 articles
DAL: 131 articles
MAR: 108 articles
GS: 247 articles
NFLX: 232 articles
META: 241 articles
ORCL: 244 articles
PLTR: 240 articles
Saved parquet: finnhub_company_news_3wk.parquet
Saved csv: finnhub_company_news_3wk.csv
Done. DB saved at: news.db
