In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from datetime import datetime, timedelta
from dotenv import load_dotenv

import yfinance as yf
from newsapi import NewsApiClient

load_dotenv()

True

In [18]:
NEWS_API = os.getenv('NEWS_API')

def get_stock_data(ticker="AAPL", start_date="2020-01-01", end_date="2024-01-01", path="../data/stock_data_4years.csv"):
    print(f"Fetching stock data for {ticker} from {start_date} to {end_date}...")
    stock_data = yf.download(ticker, start=start_date, end=end_date) # code to extract stock data
    print(f"Stock data for {ticker} fetched successfully!")
    
    # small summary of fetched stock data
    print(f"Stock data shape: {stock_data.shape}")
    print(f"Date range: {stock_data.index[0]} to {stock_data.index[-1]}")
    print("\nFirst 5 rows:")
    print(stock_data.head())

    print("\nLast 5 rows:")
    print(stock_data.tail())
    
    # save stock data to csv
    stock_data.to_csv(path)
    print(f"Stock data saved to {path}")

def get_news_data(from_date, to_date, api_key=NEWS_API, q="Apple", language="en", sort_by="relevancy", page_size=100, path="../data/news_data.csv"):
    print(f"Fetching news data for query '{q}' from {from_date} to {to_date}...")
    news_api = NewsApiClient(api_key=api_key)
    news = news_api.get_everything(
        q = q,
        from_param = from_date,
        to = to_date,
        language = language,
        sort_by = sort_by,
        page_size = page_size 
    )
    print(f"Found {len(news['articles'])} news articles for query '{q}' from {from_date} to {to_date}")
    
    news_df = pd.DataFrame(news['articles'])
    news_df.to_csv(path)
    print(f"News data saved to {path}")

In [19]:
to_date = datetime.today().date()
from_date = to_date - timedelta(days=30)

get_stock_data()
get_news_data(from_date=from_date, to_date=to_date)

  stock_data = yf.download(ticker, start=start_date, end=end_date) # code to extract stock data
[*********************100%***********************]  1 of 1 completed

Fetching stock data for AAPL from 2020-01-01 to 2024-01-01...
Stock data for AAPL fetched successfully!
Stock data shape: (1006, 5)
Date range: 2020-01-02 00:00:00 to 2023-12-29 00:00:00

First 5 rows:
Price           Close       High        Low       Open     Volume
Ticker           AAPL       AAPL       AAPL       AAPL       AAPL
Date                                                             
2020-01-02  72.620842  72.681289  71.373218  71.627092  135480400
2020-01-03  71.914825  72.676454  71.689965  71.847125  146322800
2020-01-06  72.487862  72.526549  70.783263  71.034724  118387200
2020-01-07  72.146919  72.753800  71.926892  72.497506  108872000
2020-01-08  73.307510  73.609745  71.849533  71.849533  132079200

Last 5 rows:
Price            Close        High         Low        Open    Volume
Ticker            AAPL        AAPL        AAPL        AAPL      AAPL
Date                                                                
2023-12-22  192.192566  193.989405  191.567141  1




Found 100 news articles for query 'Apple' from 2025-06-17 to 2025-07-17
News data saved to ../data/news_data.csv
