In [497]:
import logging
from urllib.parse import urljoin
from io import StringIO
from typing import Sequence
from datetime import date, datetime

import requests
import pandas as pd
import numpy as np


logger = logging.getLogger(__name__)


def check_missing_cols(exp_cols : Sequence, returned_cols : Sequence, raise_error : bool=False) -> None:
    """Convenience function to log if we are missing columns from a request.
    
    raises: ValueError if raise_error=True and if we are missing expected columns
    """
    missing_cols = [k for k in exp_cols if k not in returned_cols]
    
    if missing_cols:
        logger.error(f"Missing expectd columns {missing_cols}")
        
        if raise_error:
            raise ValueError(
                f"Missing required columns from response. Got {returned_cols}"
                f"Was expecting at least all of {exp_cols}"
                )
        

class SecurityListing:
    """Describes a single security (ETF/Mutual Fund etc), including its product page"""
    provider : str
    fund_name : str
    inception_date : date
    product_url: str
    product_id: str
    ticker : str
    cusip: str
    isin: str
    asset_class: str
    country: str
    region: str
    net_assets: float


class FundHolding:
    """Describes a holding for a single fund (eg AAPL in SPY)"""
    fund_ticker : str
    as_of_date : date
    ticker : str
    amount : float
    market_value : float
    notional_value : float # != market_value eg for futures
    weight : float
    price : float
    location : str
    exchange : str
    currency : str
    fx_rate : float
    market_currency : str

class ProviderListings:
    """Encapsulate how to retrieve all listings from an ETF/Mutual Fund provider"""
    provider : str
    
    @classmethod
    def retrieve_listings(cls) -> pd.DataFrame:
        """Query the provider website for all listings and return as a dataframe.
        Should return with all columns in SecurityListing
        """
        raise NotImplementedError
    
    @classmethod
    def retrieve_holdings(cls, sec_listing : SecurityListing, holdings_date : date) -> pd.DataFrame:
        """Query the provider website for the holdings of the given security
        and returns as a dataframe.
        
        Should return with all columns in FundHolding
        """
        raise NotImplementedError



class ISharesListings(ProviderListings):
    provider = "IShares"
    host = 'https://www.ishares.com'
    listing_endpoint = (
        "/us/product-screener/product-screener-v3.1.jsn?dcrPath=/templatedata/config/product-screener-v3/"
        "data/en/us-ishares/ishares-product-screener-backend-config&siteEntryPassthrough=true"
    )
    ajax_endpoint = "1467271812596.ajax"
    
    response_mapping = {
        'fundName': 'fund_name',
        "inceptionDate": "inception_date",
        'localExchangeTicker': 'ticker',
        'cusip': 'cusip',
        'isin': 'isin',
        'aladdinAssetClass': 'asset_class',
        'aladdinSubAssetClass': 'subasset_class',
        'aladdinCountry': 'country',
        'aladdinRegion': 'region',
        'productPageUrl': 'product_url',
        "portfolioId": "product_id",
        "totalNetAssets": "net_assets",
    }
    exp_cols = ['productPageUrl', 'localExchangeTicker'] # bare minimum to be returned
    
    holding_col_mapping = {
        'Ticker': "ticker",
        'Name': 'name',
        'Sector': 'sector',
        'Asset Class': 'asset_class',
        'Market Value': "market_value",
        'Weight (%)' : "weight",
        'Notional Value': "notional_value",
        'Shares': "amount",
        'Price' : 'price',
        'Location': "location",
        'Exchange': "exchange",
        'Currency': "currency",
        'FX Rate': "fx_rate",
        'Market Currency': "market_currency",
    }
    exp_holding_cols = ["Ticker", "Shares", 'Market Value']
    

    @classmethod
    def retrieve_listings(cls):
        listing_url = urljoin(cls.host, cls.listing_endpoint)
        resp = requests.get(listing_url)
        resp.raise_for_status()
                
        resp_df = pd.DataFrame(resp.json())
        
        check_missing_cols(cls.response_mapping, resp_df.index)
        check_missing_cols(cls.exp_cols, resp_df.index, raise_error=True)            
        
        resp_df_ = resp_df.reindex(cls.response_mapping).rename(index=cls.response_mapping).T
        
        build_url = lambda x: f"{cls.host}/{x.lstrip('/')}"
        resp_df_.loc[:, 'product_url'] = resp_df_['product_url'].apply(build_url)
        
        def parse_date(x):
            if (date_:=x.get('r', None)):
                try:
                    return datetime.strptime(str(date_), "%Y%m%d").date()
                except:
                    return pd.NaT
            return pd.NaT
        
        def parse_net(x):
            try:
                return float(x.get('r'))
            except:
                return np.nan            
                
                
        resp_df_.loc[:, 'inception_date'] = resp_df_['inception_date'].apply(parse_date)
        resp_df_.loc[:, 'net_assets'] = resp_df_['net_assets'].apply(parse_net)
        
        return resp_df_.reset_index(drop=True)
    
    
    @classmethod
    def retrieve_holdings(cls, sec_listing : SecurityListing, holdings_date : date):
        endpoint = f'{sec_listing.product_url.rstrip("/")}/{cls.ajax_endpoint}'
        req_params = {
            "fileType": "csv",
            "fileName": f"{sec_listing.ticker}_holdings",
            "dataType": "fund",
            "asOfDate": holdings_date.strftime("%Y%m%d"),
        }
        
        logger.info(f"Querying {cls.provider} for {sec_listing.ticker} holdings as of {holdings_date}")
        resp = requests.get(endpoint, params=req_params)
        resp.raise_for_status()
    
        raw_data = StringIO(resp.content.decode(encoding="UTF-8-SIG"))
        summary_data = [raw_data.readline().rstrip('\n') for _ in range(9)]
        
        date_info = {k.split(",", 1)[-1].strip("'\"") for k in summary_data if "Fund Holdings as of".lower() in k.lower()}
        if date_info == "-":
            raise ValueError(f"Found '-' as holdings date, no data received")
        elif len(date_info) != 1:
            raise ValueError(f"Was expecting an 'as of date' indicator, instead found: {date_info}")
        
        logger.info(f"Found reported holdings date string {date_info}, attempting to parse")
        date_info = datetime.strptime(date_info.pop(), "%b %d, %Y").date() # eg "Jan 03, 2022"

        if date_info != holdings_date:
            raise ValueError(f"Queried for date {holdings_date} but received holdings for {date_info} instead")

                
        if summary_data[-1] != '\xa0':
            logger.warning(
                f"Was expecting \xa0 as the last line in the summary block."
                f" Found {summary_data[-1]} instead."
            )
            
        data_df = pd.read_csv(raw_data, thousands=",", na_values='-') # shouldn't need to skip any rows now
        
        check_missing_cols(cls.exp_holding_cols, data_df.columns, raise_error=True)
        data_df = data_df.rename(columns=cls.holding_col_mapping)
        
        data_df = data_df[~data_df['weight'].isna()]
        data_df.loc[:, "fund_ticker"] = sec_listing.ticker
        data_df.loc[:, "as_of_date"] = holdings_date
        return data_df
        
        
ishares_df = ISharesListings.retrieve_listings()

  resp_df_.loc[:, 'net_assets'] = resp_df_['net_assets'].apply(parse_net)


In [451]:
urls = list(ishares_df['product_url'])
urls[0]

'https://www.ishares.com/us/products/239619/ishares-msci-china-etf'

In [458]:
ishares_df[ishares_df['product_url'].str.contains('500')]

Unnamed: 0,fund_name,inception_date,ticker,cusip,isin,asset_class,subasset_class,country,region,product_url,product_id
107,iShares Core S&P 500 ETF,"{'d': 'May 15, 2000', 'r': 20000515}",IVV,464287200,US4642872000,Equity,Large Cap,United States,North America,https://www.ishares.com/us/products/239726/ish...,239726
108,iShares S&P 500 Growth ETF,"{'d': 'May 22, 2000', 'r': 20000522}",IVW,464287309,US4642873099,Equity,Large Cap,United States,North America,https://www.ishares.com/us/products/239725/ish...,239725
114,iShares S&P 500 Value ETF,"{'d': 'May 22, 2000', 'r': 20000522}",IVE,464287408,US4642874089,Equity,Large Cap,United States,North America,https://www.ishares.com/us/products/239728/ish...,239728
184,iShares Russell 2500 ETF,"{'d': 'Jul 06, 2017', 'r': 20170706}",SMMD,46435G268,US46435G2681,Equity,Mid/Small Cap,United States,North America,https://www.ishares.com/us/products/288024/ish...,288024
217,iShares S&P 500 Index Fund,"{'d': 'Aug 06, 2018', 'r': 20180806}",BSPPX,066923236,US0669232367,Equity,Large Cap,United States,North America,https://www.ishares.com/us/products/298415/ish...,298415
247,iShares S&P 500 Index Fund,"{'d': 'Apr 11, 2013', 'r': 20130411}",BSPAX,066923566,US0669235667,Equity,Large Cap,United States,North America,https://www.ishares.com/us/products/251379/bla...,251379
248,iShares S&P 500 Index Fund,"{'d': 'Apr 11, 2013', 'r': 20130411}",BSPIX,066923558,US0669235584,Equity,Large Cap,United States,North America,https://www.ishares.com/us/products/251378/bla...,251378
256,iShares ESG Screened S&P 500 ETF,"{'d': 'Sep 22, 2020', 'r': 20200922}",XVV,46436E569,US46436E5693,Equity,Large Cap,United States,North America,https://www.ishares.com/us/products/315917/ish...,315917
263,iShares S&P 500 Index Fund,"{'d': 'Jul 01, 2019', 'r': 20190701}",BSPGX,066923194,US0669231948,Equity,Large Cap,United States,North America,https://www.ishares.com/us/products/308765/ish...,308765
290,iShares S&P 500 Index Fund,"{'d': 'Apr 22, 2013', 'r': 20130422}",BSPSX,066923533,US0669235337,Equity,Large Cap,United States,North America,https://www.ishares.com/us/products/251601/bla...,251601


In [459]:
b = ishares_df.iloc[107]
a = SecurityListing()
a.ticker = b.ticker
a.product_url = b.product_url

In [508]:
df = ISharesListings.retrieve_holdings(a, datetime.strptime('20131220', "%Y%m%d").date())
df

Unnamed: 0,ticker,name,sector,asset_class,market_value,weight,notional_value,amount,price,location,exchange,currency,fx_rate,market_currency,Accrual Date,fund_ticker,as_of_date
0,AAPL*,APPLE INC,Information Technology,Equity,1.594925e+09,3.07,,2906735.0,548.70,United States,Bolsa Mexicana De Valores,USD,1.0,USD,,IVV,2013-12-20
1,XOM,EXXON MOBIL CORP,Energy,Equity,1.389716e+09,2.68,,14083058.0,98.68,United States,New York Stock Exchange Inc.,USD,1.0,USD,,IVV,2013-12-20
2,GOOGL,GOOGLE INC CLASS A,Information Technology,Equity,9.851003e+08,1.90,,895025.0,1100.64,United States,Spot Regulated Market - Bvb,USD,1.0,USD,,IVV,2013-12-20
3,MSFT,MICROSOFT CORP,Information Technology,Equity,8.917892e+08,1.72,,24253174.0,36.77,United States,NASDAQ,USD,1.0,USD,,IVV,2013-12-20
4,GEC,GENERAL ELECTRIC,Industrials,Equity,8.914768e+08,1.72,,32583217.0,27.36,United States,Deutsche Boerse Xetra,USD,1.0,USD,,IVV,2013-12-20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
496,TER,TERADYNE INC,Information Technology,Equity,1.025474e+07,0.02,,612955.0,16.73,United States,NASDAQ,USD,1.0,USD,,IVV,2013-12-20
497,UNS1,VIAVI SOLUTIONS INC,Information Technology,Equity,9.536256e+06,0.02,,760467.0,12.54,United States,Boerse Stuttgart,USD,1.0,USD,,IVV,2013-12-20
498,JBL,JABIL INC,Information Technology,Equity,9.517345e+06,0.02,,590406.0,16.12,United States,New York Stock Exchange Inc.,USD,1.0,USD,,IVV,2013-12-20
499,GHC,GRAHAM HOLDINGS COMPANY CLASS B,Consumer Discretionary,Equity,9.039303e+06,0.02,,14046.0,643.55,United States,New York Stock Exchange Inc.,USD,1.0,USD,,IVV,2013-12-20


Unnamed: 0,ticker,name,sector,asset_class,market_value,weight,notional_value,amount,price,location,exchange,currency,fx_rate,market_currency,Accrual Date,fund_ticker,as_of_date
0,AAPL,APPLE INC,Information Technology,Equity,1.676285e+10,5.85,1.676285e+10,134081317.0,125.02,United States,NASDAQ,USD,1.0,USD,,IVV,2023-01-05
1,MSFT,MICROSOFT CORP,Information Technology,Equity,1.485890e+10,5.18,1.485890e+10,66838628.0,222.31,United States,NASDAQ,USD,1.0,USD,,IVV,2023-01-05
2,AMZN,AMAZON COM INC,Consumer Discretionary,Equity,6.614604e+09,2.31,6.614604e+09,79578965.0,83.12,United States,NASDAQ,USD,1.0,USD,,IVV,2023-01-05
3,BRKB,BERKSHIRE HATHAWAY INC CLASS B,Financials,Equity,5.054807e+09,1.76,5.054807e+09,16154705.0,312.90,United States,New York Stock Exchange Inc.,USD,1.0,USD,,IVV,2023-01-05
4,GOOGL,ALPHABET INC CLASS A,Communication,Equity,4.616517e+09,1.61,4.616517e+09,53555877.0,86.20,United States,NASDAQ,USD,1.0,USD,,IVV,2023-01-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
503,FOX,FOX CORP CLASS B,Communication,Equity,3.616718e+07,0.01,3.616718e+07,1249298.0,28.95,United States,NASDAQ,USD,1.0,USD,,IVV,2023-01-05
504,DISH,DISH NETWORK CORP CLASS A,Communication,Equity,3.384813e+07,0.01,3.384813e+07,2253537.0,15.02,United States,NASDAQ,USD,1.0,USD,,IVV,2023-01-05
505,NWS,NEWS CORP CLASS B,Communication,Equity,2.018840e+07,0.01,2.018840e+07,1068735.0,18.89,United States,NASDAQ,USD,1.0,USD,,IVV,2023-01-05
506,ETD_USD,ETD USD BALANCE WITH R93531,Cash and/or Derivatives,Cash,3.490680e+03,0.00,3.490680e+03,3491.0,100.00,United States,,USD,1.0,USD,"Jan 01, 2000",IVV,2023-01-05


In [446]:
list(df.columns)

['Ticker',
 'Name',
 'Sector',
 'Asset Class',
 'Market Value',
 'Weight (%)',
 'Notional Value',
 'Shares',
 'Price',
 'Location',
 'Exchange',
 'Currency',
 'FX Rate',
 'Market Currency',
 'Accrual Date']

In [None]:
url = f'https://www.ishares.com/us/products/239726/ishares-core-sp-500-etf/1467271812596.ajax?fileType=csv&fileName=IVV_holdings&dataType=fund&asOfDate={date_str}'

In [334]:
a = [k for k in urls if '500' in k][0]

url = urljoin(ISHARES_HOST, a)

query_dates = pd.bdate_range('2022-01-01', '2023-01-01')

def get_holdings(date):
    date_str = date.strftime("%Y%m%d")
    url = f'https://www.ishares.com/us/products/239726/ishares-core-sp-500-etf/1467271812596.ajax?fileType=csv&fileName=IVV_holdings&dataType=fund&asOfDate={date_str}'
    return requests.get(url)
    

In [420]:
r = get_holdings(query_dates[0])
s = StringIO(r.content.decode(encoding="UTF-8-SIG"))

l = [s.readline().rstrip('\n') for _ in range(9)]

df1 = pd.read_csv(s, thousands=",", na_values='-')

Unnamed: 0,Fund Holdings as of,"Jan 03, 2022"


In [418]:
df1

Unnamed: 0,Ticker,Name,Sector,Asset Class,Market Value,Weight (%),Notional Value,Shares,Price,Location,Exchange,Currency,FX Rate,Market Currency,Accrual Date
0,AAPL,APPLE INC,Information Technology,Equity,2.365912e+10,6.97,2.365912e+10,129988034.0,182.01,United States,NASDAQ,USD,1.0,USD,
1,MSFT,MICROSOFT CORP,Information Technology,Equity,2.095969e+10,6.17,2.095969e+10,62612968.0,334.75,United States,NASDAQ,USD,1.0,USD,
2,AMZN,AMAZON COM INC,Consumer Discretionary,Equity,1.239589e+10,3.65,1.239589e+10,3637195.0,3408.09,United States,NASDAQ,USD,1.0,USD,
3,TSLA,TESLA INC,Consumer Discretionary,Equity,8.138834e+09,2.40,8.138834e+09,6783605.0,1199.78,United States,NASDAQ,USD,1.0,USD,
4,GOOGL,ALPHABET INC CLASS A,Communication,Equity,7.274485e+09,2.14,7.274485e+09,2508590.0,2899.83,United States,NASDAQ,USD,1.0,USD,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
506,GPS,GAP INC,Consumer Discretionary,Equity,2.965429e+07,0.01,2.965429e+07,1611646.0,18.40,United States,New York Stock Exchange Inc.,USD,1.0,USD,
507,NWS,NEWS CORP CLASS B,Communication,Equity,2.089424e+07,0.01,2.089424e+07,913210.0,22.88,United States,NASDAQ,USD,1.0,USD,
508,ESH2,S&P500 EMINI MAR 22,Cash and/or Derivatives,Futures,0.000000e+00,0.00,7.636063e+08,3191.0,4786.00,,Index And Options Market,USD,1.0,USD,
509,,,,,,,,,,,,,,,


In [414]:
[len(x) for x in l]

[26, 35, 30, 36, 10, 9, 9, 10, 2]

In [415]:
df1

Unnamed: 0,Ticker,Name,Sector,Asset Class,Market Value,Weight (%),Notional Value,Shares,Price,Location,Exchange,Currency,FX Rate,Market Currency,Accrual Date
0,AAPL,APPLE INC,Information Technology,Equity,2.365912e+10,6.97,2.365912e+10,129988034.0,182.01,United States,NASDAQ,USD,1.0,USD,
1,MSFT,MICROSOFT CORP,Information Technology,Equity,2.095969e+10,6.17,2.095969e+10,62612968.0,334.75,United States,NASDAQ,USD,1.0,USD,
2,AMZN,AMAZON COM INC,Consumer Discretionary,Equity,1.239589e+10,3.65,1.239589e+10,3637195.0,3408.09,United States,NASDAQ,USD,1.0,USD,
3,TSLA,TESLA INC,Consumer Discretionary,Equity,8.138834e+09,2.40,8.138834e+09,6783605.0,1199.78,United States,NASDAQ,USD,1.0,USD,
4,GOOGL,ALPHABET INC CLASS A,Communication,Equity,7.274485e+09,2.14,7.274485e+09,2508590.0,2899.83,United States,NASDAQ,USD,1.0,USD,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
506,GPS,GAP INC,Consumer Discretionary,Equity,2.965429e+07,0.01,2.965429e+07,1611646.0,18.40,United States,New York Stock Exchange Inc.,USD,1.0,USD,
507,NWS,NEWS CORP CLASS B,Communication,Equity,2.089424e+07,0.01,2.089424e+07,913210.0,22.88,United States,NASDAQ,USD,1.0,USD,
508,ESH2,S&P500 EMINI MAR 22,Cash and/or Derivatives,Futures,0.000000e+00,0.00,7.636063e+08,3191.0,4786.00,,Index And Options Market,USD,1.0,USD,
509,,,,,,,,,,,,,,,


In [336]:
dfs = {}

for query_date in query_dates[:10]:
    r = get_holdings(query_date)
    dfs[query_date] = pd.read_csv(StringIO(r.content.decode()), skiprows=9, skipfooter=2)

  dfs[query_date] = pd.read_csv(StringIO(r.content.decode()), skiprows=9, skipfooter=2)
  dfs[query_date] = pd.read_csv(StringIO(r.content.decode()), skiprows=9, skipfooter=2)
  dfs[query_date] = pd.read_csv(StringIO(r.content.decode()), skiprows=9, skipfooter=2)
  dfs[query_date] = pd.read_csv(StringIO(r.content.decode()), skiprows=9, skipfooter=2)
  dfs[query_date] = pd.read_csv(StringIO(r.content.decode()), skiprows=9, skipfooter=2)
  dfs[query_date] = pd.read_csv(StringIO(r.content.decode()), skiprows=9, skipfooter=2)
  dfs[query_date] = pd.read_csv(StringIO(r.content.decode()), skiprows=9, skipfooter=2)
  dfs[query_date] = pd.read_csv(StringIO(r.content.decode()), skiprows=9, skipfooter=2)
  dfs[query_date] = pd.read_csv(StringIO(r.content.decode()), skiprows=9, skipfooter=2)
  dfs[query_date] = pd.read_csv(StringIO(r.content.decode()), skiprows=9, skipfooter=2)


In [231]:
ssga_url = 'https://www.ssga.com/bin/v1/ssmp/fund/fundfinder?country=us&language=en&role=intermediary&product=@all&ui=fund-finder'
r = requests.get(ssga_url)
r_ = r.json()

In [252]:
l = r_['data']['funds'].keys()
pd.concat([pd.DataFrame(r_['data']['funds'][i]['datas']) for i in l])

Unnamed: 0,fundName,fundTicker,fundUri,netRatio,ter,nav,dailyChange,asOfDate,fundNamePerf,PerfAsOf,...,asOfDate_1,mo3,mo3_1,benchmark,domicile,aum,primaryExchange,closePrice,bidAsk,premiumDiscount
0,State Street Aggregate Bond Index Fund - Class A,SSFCX,/us/en/intermediary/ic/funds/state-street-aggr...,"[0.475%, 0.475]","[0.607%, 0.607]","[$85.83, 85.83]",+$0.92 (+1.07%),"[Jan 05 2023, 2023-01-05]",Fund at NAV,"[Nov 30 2022, 2022-11-30]",...,,,,,,,,,,
1,State Street Aggregate Bond Index Fund - Class I,SSFDX,/us/en/intermediary/ic/funds/state-street-aggr...,"[0.225%, 0.225]","[0.357%, 0.357]","[$85.58, 85.58]",+$0.93 (+1.09%),"[Jan 05 2023, 2023-01-05]",Fund at NAV,"[Nov 30 2022, 2022-11-30]",...,,,,,,,,,,
2,State Street Aggregate Bond Index Fund - Class K,SSFEX,/us/en/intermediary/ic/funds/state-street-aggr...,"[0.025%, 0.025]","[0.157%, 0.157]","[$85.58, 85.58]",+$0.92 (+1.08%),"[Jan 05 2023, 2023-01-05]",Fund at NAV,"[Nov 30 2022, 2022-11-30]",...,,,,,,,,,,
3,State Street Emerging Markets Equity Index Fun...,SSKEX,/us/en/intermediary/ic/funds/state-street-emer...,"[0.17%, 0.17]","[0.30%, 0.3]","[$60.13, 60.13]",+$0.72 (+1.21%),"[Jan 05 2023, 2023-01-05]",Fund at NAV,"[Nov 30 2022, 2022-11-30]",...,,,,,,,,,,
4,State Street Equity 500 Index Fund - Administr...,STFAX,/us/en/intermediary/ic/funds/state-street-equi...,"[0.17%, 0.17]","[0.27%, 0.27]","[$285.20, 285.2]",+$6.50 (+2.28%),"[Jan 05 2023, 2023-01-05]",Fund at NAV,"[Nov 30 2022, 2022-11-30]",...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136,The Industrial Select Sector SPDR® Fund,XLI,/us/en/intermediary/etfs/funds/the-industrial-...,,"[0.10%, 0.1]","[$98.25, 98.25]",,"[Jan 05 2023, 2023-01-05]",,"[Nov 30 2022, 2022-11-30]",...,,,,,US,"[$13,664.21 M, 13664.21]",NYSE ARCA EXCHANGE,"[$98.22, 98.22]","[$98.24, 98.24]","[-0.02%, -0.02]"
137,The Materials Select Sector SPDR® Fund,XLB,/us/en/intermediary/etfs/funds/the-materials-s...,,"[0.10%, 0.1]","[$77.69, 77.69]",,"[Jan 05 2023, 2023-01-05]",,"[Nov 30 2022, 2022-11-30]",...,,,,,US,"[$5,265.61 M, 5265.61]",NYSE ARCA EXCHANGE,"[$77.70, 77.7]","[$77.69, 77.69]","[-0.01%, -0.01]"
138,The Real Estate Select Sector SPDR® Fund,XLRE,/us/en/intermediary/etfs/funds/the-real-estate...,,"[0.10%, 0.1]","[$36.81, 36.81]",,"[Jan 05 2023, 2023-01-05]",,"[Nov 30 2022, 2022-11-30]",...,,,,,US,"[$4,885.16 M, 4885.16]",NYSE ARCA EXCHANGE,"[$36.81, 36.81]","[$36.82, 36.82]","[0.02%, 0.02]"
139,The Technology Select Sector SPDR® Fund,XLK,/us/en/intermediary/etfs/funds/the-technology-...,,"[0.10%, 0.1]","[$121.15, 121.15]",,"[Jan 05 2023, 2023-01-05]",,"[Nov 30 2022, 2022-11-30]",...,,,,,US,"[$37,205.45 M, 37205.45]",NYSE ARCA EXCHANGE,"[$121.18, 121.18]","[$121.16, 121.16]","[0.01%, 0.01]"


In [264]:
r = requests.get('https://investor.vanguard.com/investment-products/list/funddetail', headers={"user-agent": 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'})
r_ = r.json()

In [278]:
r_['fund']['entity'][0]#.keys()

dict_keys(['type', 'profile', 'minimum', 'risk', 'fees', 'link', 'dailyPrice', 'yield', 'ytd', 'monthEndAvgAnnualRtn'])

In [283]:
r_['fund']['entity'][0]#['profile']

{'type': 'priceMonthEndPerformance',
 'profile': {'fundId': '0924',
  'ticker': 'BSV',
  'instrumentId': 2845959,
  'shortName': 'Short-Term Bond ETF',
  'longName': 'Vanguard Short-Term Bond ETF',
  'cusip': '921937827',
  'IOVTicker': 'BSV.IV',
  'inceptionDate': '2007-04-03T00:00:00-04:00',
  'newspaperAbbreviation': 'Short-Term Bond     ',
  'style': 'Bond Funds',
  'type': 'Short-Term Bond',
  'category': 'Short-Term Bond',
  'customizedStyle': 'Bond - Short-term Investment',
  'fixedIncomeInvestmentStyleId': '1',
  'fixedIncomeInvestmentStyleName': 'Short-term Treasury',
  'secDesignation': '',
  'maximumYearlyInvestment': '',
  'expenseRatio': '0.0400',
  'expenseRatioAsOfDate': '2022-04-29T00:00:00-04:00',
  'isInternalFund': True,
  'isExternalFund': False,
  'isMutualFund': False,
  'isETF': True,
  'isVLIP': False,
  'isVVAP': False,
  'is529': False,
  'hasAssociatedInvestorFund': True,
  'hasMoreThan1ShareClass': True,
  'isPESite': False,
  'fundFact': {'isActiveFund': Tr