## Extract Historical Stock Data from Yahoo Finance (openBB)

### Import Libraries

In [1]:
import sys
import os
current_dir = os.path.abspath(os.path.join(os.getcwd(), '../..', 'py')) 
sys.path.append(current_dir)
from fetch_price_history import fetch_price_history_openbb      

import pandas as pd
import logging
from dotenv import load_dotenv

load_dotenv()

from openbb import obb
import polars as pl
obb.user.credentials.fmp_api_key = os.getenv("FMP_API_KEY")

Extensions to add: federal_reserve@1.4.3, yfinance@1.4.6
Extensions to remove: federal_reserve@1.4.2, yfinance@1.4.3

Building...


### Define Variables

In [2]:
from pandas.tseries.offsets import BDay
from datetime import datetime, timedelta

# Time Range adjustment
end_date = (datetime.today() - BDay(1)).to_pydatetime()  # Last business day
start_date = end_date - timedelta(days=10*365)  # 10 years of data

# Convert datetime objects to Unix timestamps (seconds since Jan 1, 1970)
start_timestamp = int(start_date.timestamp())
end_timestamp = int(end_date.timestamp())

# Print the date range
days_difference = (end_date - start_date).days
print(f"Date Range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
print(f"Time span: {days_difference} days ({days_difference/365:.2f} years)")

Date Range: 2015-05-31 to 2025-05-28
Time span: 3650 days (10.00 years)


### Stocks

In [3]:
tickers_file = '../tickers_sp_500.txt'
with open(tickers_file, 'r') as f:
    tickers = [line.strip() for line in f if line.strip()]

# Replace '.' with '-' for Yahoo Finance compatibility
tickers = [t.replace('.', '-') for t in tickers]
tickers = list(dict.fromkeys(tickers))  # Remove duplicates

###########################################################
# DAILY DATA
###########################################################

daily_output = '../datasets/daily_stock_quotes.csv'

df_daily, df_daily_failed = fetch_price_history_openbb(
    tickers, 
    start_date, 
    end_date,
    data_file=daily_output,
    interval='1d',                 # options: ['1d', '1w', '1M']
    provider='fmp',                # options: ['fmp', 'yfinance']
    row_threshold_pct=0.05,        # Filter out rows with fewer than 5% of columns containing values
    column_threshold=0.2,          # Filter out columns with less than 20% of values
    validate_recent_data=True,     # Enable recent data validation
    recent_data_percentage=0.2     # Check the last 20% of rows
)

###########################################################
# MONTHLY DATA
###########################################################

monthly_output = '../datasets/monthly_stock_quotes.csv'

df = (df_daily.set_index(pd.to_datetime(df_daily.pop('Date')))
      if 'Date' in df_daily.columns else df_daily.copy())
df.index = pd.to_datetime(df.index)              
(df.resample('MS').last()
   .reset_index()
   .to_csv(monthly_output, index=False))

📂 Reading existing data from: ../datasets/daily_stock_quotes.csv
🗓 Existing data: 2015-05-19 to 2025-05-27
✅ Found 500 tickers, ❌ Missing 3 tickers
⏳ Fetching data using provider: fmp...
Will fetch 3 missing tickers from 2015-05-19 to 2025-05-28
Will update existing tickers from 2025-05-28 to 2025-05-28
Using FMP batch processing for 503 tickers
Fetching history for 3 missing tickers...
Processing attempt 1/2
Fetching batch of 3 tickers...
Retrieved data for 3 of 3 missing tickers
Updating 500 existing tickers...
Processing attempt 1/2
Fetching batch of 100 tickers...
Fetching batch of 100 tickers...
Fetching batch of 100 tickers...
Fetching batch of 100 tickers...
Rate limit reached. Identified 100 failed tickers
Fetching batch of 100 tickers...
Rate limit reached. Identified 100 failed tickers
⏳ Round 1 complete with 200 failed tickers
Waiting 30 seconds before retrying...
Processing attempt 2/2
Fetching batch of 100 tickers...
Fetching batch of 100 tickers...
Updated data for 498 ti

### Bonds

In [4]:
tickers_file = '../tickers_bond.txt'
with open(tickers_file, 'r') as f:
    tickers = [line.strip() for line in f if line.strip()]

# Replace '.' with '-' for Yahoo Finance compatibility
tickers = [t.replace('.', '-') for t in tickers]
tickers = list(dict.fromkeys(tickers))  # Remove duplicates

###########################################################
# DAILY DATA
###########################################################

daily_output = '../datasets/daily_bond_quotes.csv'

bonds_daily_df, failed_daily = fetch_price_history_openbb(
    tickers, 
    start_date, 
    end_date,
    data_file=daily_output,
    interval='1d',                 # options: ['1d', '1w', '1M']
    provider='fmp',                # options: ['fmp', 'yfinance']
    row_threshold_pct=0.05,        # Filter out rows with fewer than 5% of columns containing values
    column_threshold=0.2,          # Filter out columns with less than 20% of values
    validate_recent_data=True,     # Enable recent data validation
    recent_data_percentage=0.2     # Check the last 20% of rows
)

###########################################################
# MONTHLY DATA
###########################################################

monthly_output = '../datasets/monthly_bond_quotes.csv'

bonds_monthly_prices = (bonds_daily_df.set_index(pd.to_datetime(bonds_daily_df.pop('Date')))
      if 'Date' in bonds_daily_df.columns else bonds_daily_df.copy())
bonds_monthly_prices.index = pd.to_datetime(bonds_monthly_prices.index)              
bonds_monthly_prices = (bonds_monthly_prices.resample('MS').last()
   .reset_index()
   .rename(columns={'index': 'Date'}))

# Save to CSV
bonds_monthly_prices.to_csv(monthly_output, index=False)

display(bonds_monthly_prices.head())

📂 Reading existing data from: ../datasets/daily_bond_quotes.csv
🗓 Existing data: 2015-05-19 to 2025-05-27
✅ Found 101 tickers, ❌ Missing 46 tickers
⏳ Fetching data using provider: fmp...
Will fetch 46 missing tickers from 2015-05-19 to 2025-05-28
Will update existing tickers from 2025-05-28 to 2025-05-28
Using FMP batch processing for 147 tickers
Fetching history for 46 missing tickers...
Processing attempt 1/2
Fetching batch of 46 tickers...
Retrieved data for 27 of 46 missing tickers
Updating 101 existing tickers...
Processing attempt 1/2
Fetching batch of 100 tickers...
Fetching batch of 1 tickers...
Rate limit reached. Identified 1 failed tickers
⏳ Round 1 complete with 1 failed tickers
Waiting 30 seconds before retrying...
Processing attempt 2/2
Fetching batch of 1 tickers...
Rate limit reached. Identified 1 failed tickers
❌ Max retries (1) reached. Skipping 1 tickers
Updated data for 49 tickers
⚠️ Column 'AGIH' missing 1 values (0.2%) in recent data
⚠️ Column 'BMOPX' missing 504 

Unnamed: 0,Date,AGG,AGIH,AGRH,AGZ,BEMB,BGRN,BMOIX,BYLD,CEMB,...,SUB,SUSB,SUSC,TFLO,TIP,TLH,TLT,TLTW,USHY,USIG
0,2015-05-01,85.03,,,92.41,,,7.83,17.45,32.96,...,93.31,,,41.86,87.28,105.66,94.83,,,39.68
1,2015-06-01,84.11,,,92.06,,,7.75,17.23,32.12,...,93.44,,,41.83,86.39,103.55,90.97,,,39.09
2,2015-07-01,84.84,,,92.44,,,7.8,17.36,32.32,...,93.75,,,41.74,86.85,106.14,95.1,,,39.42
3,2015-08-01,84.55,,,92.57,,,7.79,17.27,31.66,...,93.52,,,41.72,86.03,105.57,94.44,,,39.02
4,2015-09-01,85.24,,,93.05,,,7.84,17.21,30.8,...,93.84,,,41.78,85.58,107.51,96.3,,,39.37


### Benchmarks

In [5]:
tickers_file = '../tickers_benchmark.txt'
with open(tickers_file, 'r') as f:
    tickers = [line.strip() for line in f if line.strip()]

# Replace '.' with '-' for Yahoo Finance compatibility
tickers = [t.replace('.', '-') for t in tickers]
tickers = list(dict.fromkeys(tickers))  # Remove duplicates

###########################################################
# DAILY DATA
###########################################################

daily_output = '../datasets/daily_benchmark_quotes.csv'

df_daily, df_daily_failed = fetch_price_history_openbb(
    tickers, 
    start_date, 
    end_date,
    data_file=daily_output,
    interval='1d',                 # options: ['1d', '1w', '1M']
    provider='fmp',                # options: ['fmp', 'yfinance']
    row_threshold_pct=0.05,        # Filter out rows with fewer than 5% of columns containing values
    column_threshold=0.2,          # Filter out columns with less than 20% of values
    validate_recent_data=True,     # Enable recent data validation
    recent_data_percentage=0.2     # Check the last 20% of rows
)

###########################################################
# MONTHLY DATA
###########################################################

monthly_output = '../datasets/monthly_benchmark_quotes.csv'

df = (df_daily.set_index(pd.to_datetime(df_daily.pop('Date')))
      if 'Date' in df_daily.columns else df_daily.copy())
df.index = pd.to_datetime(df.index)              
(df.resample('MS').last()
   .reset_index()
   .to_csv(monthly_output, index=False))

📂 Reading existing data from: ../datasets/daily_benchmark_quotes.csv
🗓 Existing data: 2015-05-19 to 2025-05-27
✅ Found 506 tickers, ❌ Missing 0 tickers
⏳ Fetching data using provider: fmp...
Will update existing tickers from 2025-05-28 to 2025-05-28
Using FMP batch processing for 505 tickers
Updating 505 existing tickers...
Processing attempt 1/2
Fetching batch of 100 tickers...
Fetching batch of 100 tickers...
Fetching batch of 100 tickers...
Fetching batch of 100 tickers...
Rate limit reached. Identified 100 failed tickers
Fetching batch of 100 tickers...
Rate limit reached. Identified 100 failed tickers
Fetching batch of 5 tickers...
Rate limit reached. Identified 5 failed tickers
⏳ Round 1 complete with 205 failed tickers
Waiting 30 seconds before retrying...
Processing attempt 2/2
Fetching batch of 100 tickers...
Rate limit reached. Identified 100 failed tickers
Fetching batch of 100 tickers...
Rate limit reached. Identified 100 failed tickers
Fetching batch of 5 tickers...
Rate l

### Treasury Rates

In [6]:
tickers_file = '../tickers_treasury.txt'
with open(tickers_file, 'r') as f:
    tickers = [line.strip() for line in f if line.strip()]

# Replace '.' with '-' for Yahoo Finance compatibility
tickers = [t.replace('.', '-') for t in tickers]
tickers = list(dict.fromkeys(tickers))  # Remove duplicates

###########################################################
# DAILY DATA
###########################################################

daily_output = '../datasets/daily_treasury_rates.csv'

df_daily, df_daily_failed = fetch_price_history_openbb(
    tickers, 
    start_date, 
    end_date,
    data_file=daily_output,
    interval='1d',                 # options: ['1d', '1w', '1M']
    provider='fmp',                # options: ['fmp', 'yfinance']
    row_threshold_pct=0.05,        # Filter out rows with fewer than 5% of columns containing values
    column_threshold=0.2,          # Filter out columns with less than 20% of values
    validate_recent_data=True,     # Enable recent data validation
    recent_data_percentage=0.2     # Check the last 20% of rows
)

###########################################################
# MONTHLY DATA
###########################################################

monthly_output = '../datasets/monthly_treasury_rates.csv'

df = (df_daily.set_index(pd.to_datetime(df_daily.pop('Date')))
      if 'Date' in df_daily.columns else df_daily.copy())
df.index = pd.to_datetime(df.index)              
(df.resample('MS').last()
   .reset_index()
   .to_csv(monthly_output, index=False))

📂 Reading existing data from: ../datasets/daily_treasury_rates.csv
🗓 Existing data: 2015-05-19 to 2025-05-27
✅ Found 4 tickers, ❌ Missing 0 tickers
⏳ Fetching data using provider: fmp...
Will update existing tickers from 2025-05-28 to 2025-05-28
Using FMP batch processing for 4 tickers
Updating 4 existing tickers...
Processing attempt 1/2
Fetching batch of 4 tickers...
Updated data for 4 tickers
💾 Saved updated data to ../datasets/daily_treasury_rates.csv


### Sectors

In [7]:
sectors = [
    'XLE',
    'CLF',
    'XLF',
    'GDX'
]