In [324]:
import logging
from urllib.parse import urljoin
from io import StringIO
from typing import Sequence

import requests
import pandas as pd


logger = logging.getLogger(__name__)

class SecurityListing:
    """Describes a single security (ETF/Mutual Fund etc), including it's product page"""
    provider : str
    fund_name : str
    product_url: str
    product_id: str
    ticker : str
    cusip: str
    isin: str
    asset_class: str
    country: str
    region: str


class FundHoldings:
    """Describes the """
    fund_ticker : str
    fund_isin : str
    as_of_date : str # FIXME
    holding_ticker : str
    

class ProviderListings:
    """Encapsulate how to retrieve all listings from an ETF/Mutual Fund provider"""
    provider : str
    
    @classmethod
    def retrieve_listings(cls, *args, **kwargs) -> pd.DataFrame:
        """Query the provider website for all listings and return as a dataframe.
        Should return with all columns in SecurityListing
        """
        raise NotImplementedError
    
    @classmethod
    def retrieve_holdings(cls, *args, **kwargs) -> pd.DataFrame:
        """Query the provider website for the holdings of the given security
        and returns as a dataframe.
        
        Should return with all columns in 
        """



def log_missing_cols(exp_cols : Sequence, returned_cols : Sequence, raise_error : bool=False) -> None:
    """Convenience function to log if we are missing columns from a request.
    
    raises: ValueError if raise_error=True and if we are missing expected columns
    """
    missing_cols = [k for k in exp_cols if k not in returned_cols]
    
    if missing_cols:
        logger.error(f"Missing expectd columns {missing_cols}")
        
        if raise_error:
            raise ValueError(
                f"Missing required columns from response. Got {returned_cols}"
                f"Was expecting at least all of {exp_cols}"
                )
        


class ISharesListings(ProviderListings):
    provider = "IShares"
    host = 'https://www.ishares.com'
    listing_endpoint = (
        "/us/product-screener/product-screener-v3.1.jsn?dcrPath=/templatedata/config/product-screener-v3/"
        "data/en/us-ishares/ishares-product-screener-backend-config&siteEntryPassthrough=true"
    )
    response_mapping = {
        'aladdinAssetClass': 'asset_class',
        'aladdinCountry': 'country',
        'aladdinMarketType': 'mkt_type',
        'aladdinRegion': 'region',
        'aladdinSubAssetClass': 'subasset_class',
        'cusip': 'cusip',
        'fundName': 'fund_name',
        'isin': 'isin',
        'localExchangeTicker': 'ticker',
        'productPageUrl': 'product_url',
    }
    exp_cols = ['productPageUrl', 'localExchangeTicker'] # bare minimum to be returned

    @classmethod
    def retrieve_listings(cls):
        listing_url = urljoin(cls.host, cls.listing_endpoint)
        resp = requests.get(listing_url)
        resp.raise_for_status()
                
        resp_df = pd.DataFrame(resp.json())
        
        log_missing_cols(cls.response_mapping, resp_df.index)
        log_missing_cols(cls.exp_cols, resp_df.index, raise_error=True)            
        
        resp_df_ = resp_df.reindex(cls.response_mapping).rename(index=cls.response_mapping).T
        
        build_url = lambda x: f"{cls.host}/{x.lstrip('/')}"
        resp_df_.loc[:, 'product_url'] = resp_df_['product_url'].apply(build_url)
        return resp_df_.reset_index(drop=True)
    
    
ishares_df = ISharesListings.retrieve_listings()

In [325]:
ishares_df

Unnamed: 0,asset_class,country,mkt_type,region,subasset_class,cusip,fund_name,isin,ticker,product_url
0,Equity,China,Emerging,Asia Pacific,Large/Mid Cap,46429B671,iShares MSCI China ETF,US46429B6719,MCHI,https://www.ishares.com/us/products/239619/ish...
1,Equity,Chile,Emerging,Latin America,All Cap,464286640,iShares MSCI Chile ETF,US4642866408,ECH,https://www.ishares.com/us/products/239618/ish...
2,Equity,Broad,Developed,Global,Large/Mid Cap,464287465,iShares MSCI EAFE ETF,US4642874659,EFA,https://www.ishares.com/us/products/239623/ish...
3,Equity,Broad,Developed,Global,Large/Mid Cap,464288885,iShares MSCI EAFE Growth ETF,US4642888857,EFG,https://www.ishares.com/us/products/239622/ish...
4,Equity,Denmark,Developed,Europe,All Cap,46429B523,iShares MSCI Denmark ETF,US46429B5232,EDEN,https://www.ishares.com/us/products/239621/ish...
...,...,...,...,...,...,...,...,...,...,...
431,Equity,Broad,Emerging,Global,Large/Mid Cap,464286657,iShares MSCI BIC ETF,US4642866572,BKF,https://www.ishares.com/us/products/239614/ish...
432,Equity,Brazil,Emerging,Latin America,Small Cap,464289131,iShares MSCI Brazil Small-Cap ETF,US4642891315,EWZS,https://www.ishares.com/us/products/239613/ish...
433,Equity,Broad,Developed,Global,Large Cap,46436E759,iShares ESG Advanced MSCI EAFE ETF,US46436E7590,DMXF,https://www.ishares.com/us/products/314362/ish...
434,Equity,Brazil,Emerging,Latin America,Large/Mid Cap,464286400,iShares MSCI Brazil ETF,US4642864007,EWZ,https://www.ishares.com/us/products/239612/ish...


In [327]:
urls = list(ishares_df['product_url'])

In [286]:
resp_df.T.to_clipboard()

In [121]:
a = [k for k in urls if '500' in k][0]

url = urljoin(ISHARES_HOST, a)

query_dates = pd.bdate_range('2022-01-01', '2023-01-01')

def get_holdings(date):
    date_str = date.strftime("%Y%m%d")
    url = f'https://www.ishares.com/us/products/239726/ishares-core-sp-500-etf/1467271812596.ajax?fileType=csv&fileName=IVV_holdings&dataType=fund&asOfDate={date_str}'
    return requests.get(url)
    

'/us/products/239726/ishares-core-sp-500-etf'

In [214]:
r = get_holdings(query_dates[0])
pd.read_csv(StringIO(r.content.decode()), skiprows=9, skipfooter=2)

In [231]:
ssga_url = 'https://www.ssga.com/bin/v1/ssmp/fund/fundfinder?country=us&language=en&role=intermediary&product=@all&ui=fund-finder'
r = requests.get(ssga_url)
r_ = r.json()

In [252]:
l = r_['data']['funds'].keys()
pd.concat([pd.DataFrame(r_['data']['funds'][i]['datas']) for i in l])

Unnamed: 0,fundName,fundTicker,fundUri,netRatio,ter,nav,dailyChange,asOfDate,fundNamePerf,PerfAsOf,...,asOfDate_1,mo3,mo3_1,benchmark,domicile,aum,primaryExchange,closePrice,bidAsk,premiumDiscount
0,State Street Aggregate Bond Index Fund - Class A,SSFCX,/us/en/intermediary/ic/funds/state-street-aggr...,"[0.475%, 0.475]","[0.607%, 0.607]","[$85.83, 85.83]",+$0.92 (+1.07%),"[Jan 05 2023, 2023-01-05]",Fund at NAV,"[Nov 30 2022, 2022-11-30]",...,,,,,,,,,,
1,State Street Aggregate Bond Index Fund - Class I,SSFDX,/us/en/intermediary/ic/funds/state-street-aggr...,"[0.225%, 0.225]","[0.357%, 0.357]","[$85.58, 85.58]",+$0.93 (+1.09%),"[Jan 05 2023, 2023-01-05]",Fund at NAV,"[Nov 30 2022, 2022-11-30]",...,,,,,,,,,,
2,State Street Aggregate Bond Index Fund - Class K,SSFEX,/us/en/intermediary/ic/funds/state-street-aggr...,"[0.025%, 0.025]","[0.157%, 0.157]","[$85.58, 85.58]",+$0.92 (+1.08%),"[Jan 05 2023, 2023-01-05]",Fund at NAV,"[Nov 30 2022, 2022-11-30]",...,,,,,,,,,,
3,State Street Emerging Markets Equity Index Fun...,SSKEX,/us/en/intermediary/ic/funds/state-street-emer...,"[0.17%, 0.17]","[0.30%, 0.3]","[$60.13, 60.13]",+$0.72 (+1.21%),"[Jan 05 2023, 2023-01-05]",Fund at NAV,"[Nov 30 2022, 2022-11-30]",...,,,,,,,,,,
4,State Street Equity 500 Index Fund - Administr...,STFAX,/us/en/intermediary/ic/funds/state-street-equi...,"[0.17%, 0.17]","[0.27%, 0.27]","[$285.20, 285.2]",+$6.50 (+2.28%),"[Jan 05 2023, 2023-01-05]",Fund at NAV,"[Nov 30 2022, 2022-11-30]",...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136,The Industrial Select Sector SPDR® Fund,XLI,/us/en/intermediary/etfs/funds/the-industrial-...,,"[0.10%, 0.1]","[$98.25, 98.25]",,"[Jan 05 2023, 2023-01-05]",,"[Nov 30 2022, 2022-11-30]",...,,,,,US,"[$13,664.21 M, 13664.21]",NYSE ARCA EXCHANGE,"[$98.22, 98.22]","[$98.24, 98.24]","[-0.02%, -0.02]"
137,The Materials Select Sector SPDR® Fund,XLB,/us/en/intermediary/etfs/funds/the-materials-s...,,"[0.10%, 0.1]","[$77.69, 77.69]",,"[Jan 05 2023, 2023-01-05]",,"[Nov 30 2022, 2022-11-30]",...,,,,,US,"[$5,265.61 M, 5265.61]",NYSE ARCA EXCHANGE,"[$77.70, 77.7]","[$77.69, 77.69]","[-0.01%, -0.01]"
138,The Real Estate Select Sector SPDR® Fund,XLRE,/us/en/intermediary/etfs/funds/the-real-estate...,,"[0.10%, 0.1]","[$36.81, 36.81]",,"[Jan 05 2023, 2023-01-05]",,"[Nov 30 2022, 2022-11-30]",...,,,,,US,"[$4,885.16 M, 4885.16]",NYSE ARCA EXCHANGE,"[$36.81, 36.81]","[$36.82, 36.82]","[0.02%, 0.02]"
139,The Technology Select Sector SPDR® Fund,XLK,/us/en/intermediary/etfs/funds/the-technology-...,,"[0.10%, 0.1]","[$121.15, 121.15]",,"[Jan 05 2023, 2023-01-05]",,"[Nov 30 2022, 2022-11-30]",...,,,,,US,"[$37,205.45 M, 37205.45]",NYSE ARCA EXCHANGE,"[$121.18, 121.18]","[$121.16, 121.16]","[0.01%, 0.01]"


In [264]:
r = requests.get('https://investor.vanguard.com/investment-products/list/funddetail', headers={"user-agent": 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'})
r_ = r.json()

In [278]:
r_['fund']['entity'][0]#.keys()

dict_keys(['type', 'profile', 'minimum', 'risk', 'fees', 'link', 'dailyPrice', 'yield', 'ytd', 'monthEndAvgAnnualRtn'])

In [283]:
r_['fund']['entity'][0]#['profile']

{'type': 'priceMonthEndPerformance',
 'profile': {'fundId': '0924',
  'ticker': 'BSV',
  'instrumentId': 2845959,
  'shortName': 'Short-Term Bond ETF',
  'longName': 'Vanguard Short-Term Bond ETF',
  'cusip': '921937827',
  'IOVTicker': 'BSV.IV',
  'inceptionDate': '2007-04-03T00:00:00-04:00',
  'newspaperAbbreviation': 'Short-Term Bond     ',
  'style': 'Bond Funds',
  'type': 'Short-Term Bond',
  'category': 'Short-Term Bond',
  'customizedStyle': 'Bond - Short-term Investment',
  'fixedIncomeInvestmentStyleId': '1',
  'fixedIncomeInvestmentStyleName': 'Short-term Treasury',
  'secDesignation': '',
  'maximumYearlyInvestment': '',
  'expenseRatio': '0.0400',
  'expenseRatioAsOfDate': '2022-04-29T00:00:00-04:00',
  'isInternalFund': True,
  'isExternalFund': False,
  'isMutualFund': False,
  'isETF': True,
  'isVLIP': False,
  'isVVAP': False,
  'is529': False,
  'hasAssociatedInvestorFund': True,
  'hasMoreThan1ShareClass': True,
  'isPESite': False,
  'fundFact': {'isActiveFund': Tr