## Import Libraries

In [9]:
# %pip install -r requirements.txt
import numpy as np
import pandas as pd
from yfinance import Lookup
import yfinance as yf
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from scipy.optimize import minimize
from typing import List

## S&P500 Tickers

In [10]:
sp_500_df = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
sp_500_tickers = sp_500_df['Symbol'].tolist()

# Sort the list in-place
sp_500_tickers.sort()

# Remove None/NaN values (if any exist)
sp_500_tickers = [ticker for ticker in sp_500_tickers if ticker is not None and pd.notna(ticker)]

# Write all tickers in one operation
with open('tickers_sp_500.txt', 'w') as f:
    f.write('\n'.join(sp_500_tickers))

print(f"Successfully wrote {len(sp_500_tickers)} tickers to tickers.txt")

Successfully wrote 503 tickers to tickers.txt


## World Indices

In [11]:
lookup = Lookup(query="^")
indices = lookup.get_index(count=100) 
indices_tickers = indices.reset_index()['symbol']
indices_tickers = [ticker for ticker in indices_tickers if ticker is not None and pd.notna(ticker)]
indices_tickers.sort()

# Write all tickers in one operation
with open('tickers_index.txt', 'w') as f:
    f.write('\n'.join(indices_tickers))

print(f"Successfully wrote {len(indices_tickers)} tickers to tickers.txt")

Successfully wrote 100 tickers to tickers.txt


## Treasury Bonds

In [12]:
# List of queries to try for finding Treasury bonds
bond_queries = ["treasury", "^T", "bond"]
treasury_tickers = []

# Try different queries to gather Treasury bond tickers
for query in bond_queries:
    lookup = Lookup(query=query)
    
    # Use different lookup types to capture various bond instruments
    for lookup_type in ["index", "future", "all"]:
        results = getattr(lookup, f"get_{lookup_type}")(count=100)
        if not results.empty:
            # Extract symbols
            tickers = results.reset_index()['symbol'].tolist()
            treasury_tickers.extend(tickers)

# Clean the results: remove duplicates and null values
treasury_tickers = list(set([ticker for ticker in treasury_tickers 
                          if ticker is not None and pd.notna(ticker)]))

# Sort for readability
treasury_tickers.sort()

# Optional: Filter for likely Treasury bonds (those with typical patterns)
treasury_patterns = ['IRX', 'FVX', 'TYX', 'TNX']
filtered_tickers = [ticker for ticker in treasury_tickers 
                   if any(pattern in ticker for pattern in treasury_patterns)]

# Sort for readability
filtered_tickers.sort()

# Write all tickers in one operation
with open('tickers_treasury.txt', 'w') as f:
    f.write('\n'.join(filtered_tickers))

print(f"Successfully wrote {len(filtered_tickers)} tickers to tickers_treasury.txt")

filtered_tickers

Successfully wrote 4 tickers to tickers_treasury.txt


['^FVX', '^IRX', '^TNX', '^TYX']

## Benchmark (ETF) List

### Yahoo Finance

In [13]:
import requests
from bs4 import BeautifulSoup

base_url = "https://finance.yahoo.com/markets/etfs/top/?start={}&count=100"
benchmark_tickers = []
benchmark_names = []

for page in range(10):  # 0 to 9, for 10 pages
    start = page * 100
    url = base_url.format(start)
    headers = {
        "User-Agent": "Mozilla/5.0"
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")
    rows = soup.select('table tbody tr')
    for row in rows:
        cells = row.find_all('td')
        if len(cells) >= 2:
            symbol = cells[0].text.strip()
            name = cells[1].text.strip()
            benchmark_tickers.append(symbol)
            benchmark_names.append(name)

# Create DataFrame for easier filtering
benchmarks_df = pd.DataFrame({'Symbol': benchmark_tickers, 'Name': benchmark_names}).sort_values(by='Symbol')
benchmarks_tickers = benchmarks_df['Symbol'].unique().tolist()

# Write all tickers in one operation
with open('tickers_benchmark.txt', 'w') as f:
    f.write('\n'.join(benchmarks_tickers))

print(f"Successfully wrote {len(benchmarks_tickers)} tickers to benchmark_tickers.txt")

Successfully wrote 505 tickers to benchmark_tickers.txt


### BlackRock

In [14]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

# Fetch the webpage
url = 'https://www.blackrock.com/us/individual/products/investment-funds#/?productView=ishares'
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

# Get the table data using pandas (as you were doing before)
blackrock_etf_df = pd.read_html(response.text)[0]

# Extract product IDs and URLs
product_info = {}
table = soup.find('table', class_='products-table')
if table:
    for row in table.find_all('tr'):
        ticker_cell = row.find('th', scope='row')
        if ticker_cell and ticker_cell.find('a'):
            link = ticker_cell.find('a')
            ticker = link.text.strip()
            href = link.get('href', '')
            
            # Extract product ID using regex
            product_id_match = re.search(r'/products/(\d+)/', href)
            product_id = product_id_match.group(1) if product_id_match else None
            
            # Store in dictionary with ticker as key
            product_info[ticker] = {
                'Product_ID': product_id,
                'URL_Path': href
            }

# Add the product ID and URL info to the dataframe
blackrock_etf_df['Product_ID'] = blackrock_etf_df['Ticker'].map(
    lambda ticker: product_info.get(ticker, {}).get('Product_ID', None)
)
blackrock_etf_df['URL'] = blackrock_etf_df['Ticker'].map(
    lambda ticker: product_info.get(ticker, {}).get('URL_Path', None)
)
# Add full URL column for convenience
blackrock_etf_df['URL'] = blackrock_etf_df['URL'].apply(
    lambda path: f"https://www.blackrock.com{path}" if pd.notna(path) else None
)

# First drop duplicates based on Ticker column
blackrock_etf_df = blackrock_etf_df.drop_duplicates(subset=['Ticker'], keep='first')

display(blackrock_etf_df)

  blackrock_etf_df = pd.read_html(response.text)[0]


Unnamed: 0,Ticker,Name,YTD (%),1Y (%),3Y (%),5Y (%),10Y (%),Incept (%),Perf. as of,Inception Date,Net Assets,Product_ID,URL
0,BMPCX,U.S. Mortgage Fund,2.70,9.01,2.31,0.84,1.29,,"Apr 30, 2025","Dec 06, 2010",631700,227349,https://www.blackrock.com/us/individual/produc...
1,BMPAX,U.S. Mortgage Fund,2.95,9.83,3.08,1.59,1.90,,"Apr 30, 2025","Dec 06, 2010",14783541,227355,https://www.blackrock.com/us/individual/produc...
2,MSUMX,U.S. Mortgage Fund,3.15,10.22,3.37,1.85,2.16,,"Apr 30, 2025","Jul 29, 2005",392018665,227356,https://www.blackrock.com/us/individual/produc...
3,BACAX,Energy Opportunities Fund,-3.40,-9.42,4.78,17.90,1.45,,"Apr 30, 2025","Feb 16, 2005",159086683,227359,https://www.blackrock.com/us/individual/produc...
4,PCBAX,Tactical Opportunities Fund,0.21,4.64,6.76,5.13,3.52,,"Apr 30, 2025","Dec 29, 1988",180120548,227384,https://www.blackrock.com/us/individual/produc...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1127,XMUNX,BlackRock Municipal Credit Alpha Portfolio,-1.29,4.26,2.53,1.28,1.94,,"Apr 30, 2025","Mar 26, 2025",489352,342395,https://www.blackrock.com/us/individual/produc...
1128,MUNEX,BlackRock Municipal Credit Alpha Portfolio,-1.05,5.05,3.30,2.04,2.71,,"Apr 30, 2025","Aug 01, 2003",461813791,342396,https://www.blackrock.com/us/individual/produc...
1129,MUNUX,BlackRock Municipal Credit Alpha Portfolio,-1.29,4.26,2.53,1.28,1.94,,"Apr 30, 2025","Mar 26, 2025",3515772,342401,https://www.blackrock.com/us/individual/produc...
1130,TOPC,iShares S&P 500 3% Capped ETF,-,-,-,-,-,-,"Apr 30, 2025","Apr 15, 2025",9519394,342676,https://www.blackrock.com/us/individual/produc...


## bond

### BlackRock

In [15]:
# Filter for iShares AND (bonds OR fixed income) in Name (case-insensitive)
ishares_filter = blackrock_etf_df['Name'].str.contains(r'ishares', case=False, regex=True)
bond_filter = blackrock_etf_df['Name'].str.contains(r'bond|fixed income', case=False, regex=True)
bond_funds_df = blackrock_etf_df[ishares_filter & bond_filter].reset_index(drop=True)

# Filter out Tickers containing spaces
no_space_filter = ~bond_funds_df['Ticker'].str.contains(" ")
bond_funds_df = bond_funds_df[no_space_filter].reset_index(drop=True)

bond_tickers = bond_funds_df['Ticker'].tolist()

# Print Outputs
print("Number of iShares bond/fixed income funds:", len(bond_funds_df))
print(bond_tickers)
bond_funds_df.head(10)

Number of iShares bond/fixed income funds: 147
['WFBIX', 'BMOIX', 'BMOAX', 'IGLB', 'ILTB', 'QLTA', 'STIP', 'IGSB', 'SHY', 'TLH', 'TLT', 'IEI', 'IEF', 'AGZ', 'AGG', 'USIG', 'GNMA', 'GBF', 'IGIB', 'GVI', 'SHV', 'TIP', 'GOVT', 'CEMB', 'EMHY', 'LEMB', 'FLOT', 'HYXU', 'GHYG', 'HYG', 'LQD', 'EMB', 'CMF', 'MUB', 'NYF', 'SUB', 'ISHG', 'IGOV', 'NEAR', 'ISTB', 'SLQD', 'SHYG', 'ICSH', 'TFLO', 'BYLD', 'LQDH', 'HYGH', 'IUSB', 'FIBR', 'MEAR', 'IBDQ', 'ICVT', 'IGBH', 'IAGG', 'BAIPX', 'BIIPX', 'BKIPX', 'IBMN', 'FALN', 'HYXF', 'IBDR', 'IMTB', 'IGEB', 'HYDB', 'SUSC', 'SUSB', 'IBDS', 'USHY', 'LQDI', 'BMOPX', 'IBDT', 'EAGG', 'HYBB', 'BGRN', 'BIDPX', 'BIDAX', 'BIDIX', 'BIDKX', 'IBMO', 'IBMP', 'IBMQ', 'IBHE', 'IBDU', 'IBTF', 'IBTG', 'IBTH', 'IBTI', 'IBTJ', 'SGOV', 'IBDV', 'EUSB', 'IBTK', 'GOVZ', 'IBHF', 'LQDB', 'IBDW', 'IBTL', 'IBHG', 'ELQD', 'IBHH', 'IBHI', 'HYGI', 'AGIH', 'AGRH', 'IBDX', 'IBTM', 'TLTW', 'HYGW', 'LQDW', 'BEMB', 'IBMR', 'IBHJ', 'IBDY', 'IBTO', 'IBIJ', 'IBII', 'IBIC', 'IBIH', 'IBIG', 'IBIE',

Unnamed: 0,Ticker,Name,YTD (%),1Y (%),3Y (%),5Y (%),10Y (%),Incept (%),Perf. as of,Inception Date,Net Assets,Product_ID,URL
0,WFBIX,iShares U.S. Aggregate Bond Index Fund,3.19,8.03,1.98,-0.68,1.49,,"Apr 30, 2025","Feb 02, 2010",2690949902,227787,https://www.blackrock.com/us/individual/produc...
1,BMOIX,iShares U.S. Aggregate Bond Index Fund,3.17,7.98,1.96,-0.73,1.44,,"Apr 30, 2025","Mar 31, 2011",183405040,227788,https://www.blackrock.com/us/individual/produc...
2,BMOAX,iShares U.S. Aggregate Bond Index Fund,3.08,7.71,1.71,-0.98,1.19,,"Apr 30, 2025","Mar 31, 2011",172916588,227790,https://www.blackrock.com/us/individual/produc...
3,IGLB,iShares 10+ Year Investment Grade Corporate Bo...,1.1,6.19,0.87,-1.95,2.09,4.27,"Apr 30, 2025","Dec 08, 2009",2370889678,239423,https://www.blackrock.com/us/individual/produc...
4,ILTB,iShares Core 10+ Year USD Bond ETF,2.2,6.73,-1.03,-3.92,1.47,3.87,"Apr 30, 2025","Dec 08, 2009",592240607,239424,https://www.blackrock.com/us/individual/produc...
5,QLTA,iShares Aaa - A Rated Corporate Bond ETF,2.51,7.33,2.46,-0.45,2.01,2.37,"Apr 30, 2025","Feb 14, 2012",1668844925,239431,https://www.blackrock.com/us/individual/produc...
6,STIP,iShares 0-5 Year TIPS Bond ETF,3.86,7.96,3.54,4.04,2.85,2.31,"Apr 30, 2025","Dec 01, 2010",12556294961,239450,https://www.blackrock.com/us/individual/produc...
7,IGSB,iShares 1-5 Year Investment Grade Corporate Bo...,2.72,7.88,4.51,2.45,2.41,2.77,"Apr 30, 2025","Jan 05, 2007",21149463428,239451,https://www.blackrock.com/us/individual/produc...
8,SHY,iShares 1-3 Year Treasury Bond ETF,2.39,6.52,3.15,1.15,1.43,1.93,"Apr 30, 2025","Jul 22, 2002",24226844578,239452,https://www.blackrock.com/us/individual/produc...
9,TLH,iShares 10-20 Year Treasury Bond ETF,4.02,7.73,-2.63,-6.71,-0.35,2.95,"Apr 30, 2025","Jan 05, 2007",10871384503,239453,https://www.blackrock.com/us/individual/produc...


In [16]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import concurrent.futures
import time
from tqdm.notebook import tqdm  # For a nice progress bar

# Function to extract bond metrics from a BlackRock product page
def extract_bond_metrics(row):
    ticker = row['Ticker']
    url = row['URL']
    
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    
    metrics = {
        'ticker': ticker,
        'yield_to_maturity': None,
        'weighted_avg_maturity': None,
        'convexity': None,
        'weighted_avg_coupon': None
    }
    
    try:
        response = requests.get(url, headers=headers, timeout=10)
        if response.status_code != 200:
            # Try fallback URL
            product_id_match = re.search(r'/products/(\d+)/', url)
            if product_id_match:
                product_id = product_id_match.group(1)
                fallback_url = f"https://www.blackrock.com/us/individual/products/{product_id}"
                response = requests.get(fallback_url, headers=headers, timeout=10)
                if response.status_code != 200:
                    return metrics
            else:
                return metrics
            
        soup = BeautifulSoup(response.text, "html.parser")
        
        # Extract metrics using the same logic as before
        ytm_div = soup.find('div', class_='product-data-item col-yieldToWorst')
        if ytm_div and ytm_div.find('div', class_='data'):
            metrics['yield_to_maturity'] = ytm_div.find('div', class_='data').text.strip()
        
        wam_div = soup.find('div', class_='product-data-item col-weightedAvgLife')
        if wam_div and wam_div.find('div', class_='data'):
            metrics['weighted_avg_maturity'] = wam_div.find('div', class_='data').text.strip()
        
        convexity_div = soup.find('div', class_='product-data-item col-convexity')
        if convexity_div and convexity_div.find('div', class_='data'):
            metrics['convexity'] = convexity_div.find('div', class_='data').text.strip()
        
        wac_div = soup.find('div', class_='product-data-item col-weightedAvgCouponFi')
        if wac_div and wac_div.find('div', class_='data'):
            metrics['weighted_avg_coupon'] = wac_div.find('div', class_='data').text.strip()
        
    except Exception as e:
        print(f"Error extracting metrics for {ticker}: {str(e)}")
    
    return metrics

# Set the maximum number of concurrent workers
# Adjust based on your system and network capabilities
MAX_WORKERS = 10

print(f"Extracting bond metrics for {len(bond_funds_df)} funds using {MAX_WORKERS} parallel workers...")

# Create a list to store the results
metrics_data = []

# Convert DataFrame rows to dictionaries for processing
rows = bond_funds_df.to_dict('records')

# Process in parallel using ThreadPoolExecutor
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
    # Submit all tasks and track with tqdm
    futures = [executor.submit(extract_bond_metrics, row) for row in rows]
    
    # Use tqdm to show progress
    for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
        try:
            result = future.result()
            metrics_data.append(result)
        except Exception as exc:
            print(f"A task generated an exception: {str(exc)}")

# Create a DataFrame from the results
metrics_df = pd.DataFrame(metrics_data)

# Merge the results with the original DataFrame
bond_funds_df = pd.merge(
    bond_funds_df,
    metrics_df[['ticker', 'yield_to_maturity', 'weighted_avg_maturity', 'convexity', 'weighted_avg_coupon']],
    left_on='Ticker',
    right_on='ticker',
    how='left'
).drop('ticker', axis=1)

# Convert to numeric values where possible
bond_funds_df['Yield_To_Maturity'] = pd.to_numeric(bond_funds_df['yield_to_maturity'].str.replace('%', ''), errors='coerce') / 100
bond_funds_df['Weighted_Avg_Maturity'] = pd.to_numeric(bond_funds_df['weighted_avg_maturity'].str.replace(' yrs', ''), errors='coerce')
bond_funds_df['Convexity'] = pd.to_numeric(bond_funds_df['convexity'], errors='coerce')
bond_funds_df['Weighted_Avg_Coupon'] = pd.to_numeric(bond_funds_df['weighted_avg_coupon'].str.replace('%', '').replace('-', 'NaN'), errors='coerce') / 100

# Drop the original string columns
bond_funds_df = bond_funds_df.drop(['yield_to_maturity', 'weighted_avg_maturity', 'convexity', 'weighted_avg_coupon'], axis=1)
bond_funds_df = bond_funds_df.sort_values(by="Ticker", ascending=True).reset_index(drop=True)

# Print the updated DataFrame
print(f"Successfully extracted metrics for {len(metrics_data)} out of {len(bond_funds_df)} funds")
bond_funds_df.head()

Extracting bond metrics for 147 funds using 10 parallel workers...


  0%|          | 0/147 [00:00<?, ?it/s]

Successfully extracted metrics for 147 out of 147 funds


Unnamed: 0,Ticker,Name,YTD (%),1Y (%),3Y (%),5Y (%),10Y (%),Incept (%),Perf. as of,Inception Date,Net Assets,Product_ID,URL,Yield_To_Maturity,Weighted_Avg_Maturity,Convexity,Weighted_Avg_Coupon
0,AGG,iShares Core U.S. Aggregate Bond ETF,3.16,8.0,1.96,-0.68,1.50,3.1,"Apr 30, 2025","Sep 22, 2003",123886933466,239458,https://www.blackrock.com/us/individual/produc...,0.0481,8.1,0.52,0.0355
1,AGIH,iShares Inflation Hedged U.S. Aggregate Bond ETF,3.42,7.17,-,-,-,2.72,"Apr 30, 2025","Jun 22, 2022",2462074,328179,https://www.blackrock.com/us/individual/produc...,0.0476,7.79,0.51,
2,AGRH,iShares Interest Rate Hedged U.S. Aggregate Bo...,0.77,4.5,-,-,-,5.19,"Apr 30, 2025","Jun 22, 2022",7752588,328180,https://www.blackrock.com/us/individual/produc...,0.0532,8.11,-0.16,
3,AGZ,iShares Agency Bond ETF,2.85,6.98,2.84,0.48,1.74,2.34,"Apr 30, 2025","Nov 05, 2008",609078743,239457,https://www.blackrock.com/us/individual/produc...,0.0431,4.11,0.24,0.0347
4,BAIPX,iShares Short-Term TIPS Bond Index Fund,3.76,7.54,3.19,3.67,-,2.83,"Apr 30, 2025","Feb 16, 2016",5159871,282302,https://www.blackrock.com/us/individual/produc...,,,,


In [17]:
data_output = 'fundamentals_blackrock_bonds.csv'
bond_funds_df.to_csv(data_output, index=False)

In [18]:
bond_tickers = bond_funds_df['Ticker'].unique().tolist()
bond_tickers.sort()

# Write all tickers in one operation
with open('tickers_bond.txt', 'w') as f:
    f.write('\n'.join(bond_tickers))

print(f"Successfully wrote {len(bond_tickers)} tickers to bond_tickers.txt")

Successfully wrote 147 tickers to bond_tickers.txt
