In [1]:
%load_ext autoreload
%autoreload 2

import logging
from functools import partial

import pandas as pd

from etf_scraper import ETFScraper, load_listings
from etf_scraper.utils import get_interval_query_dates
from etf_scraper.storage import query_range, save_func

logger = logging.getLogger()
logger.setLevel(logging.INFO)

query_dates = get_interval_query_dates('2020-01-01', '2023-01-20', False, False)
save_func_ = partial(save_func, out_dir="/tmp/out/")

listings = load_listings()

!mkdir /tmp/out/

mkdir: cannot create directory ‘/tmp/out/’: File exists


In [2]:
query_tickers = listings.ticker.unique()
query_dates = [None]
len(query_tickers)

1240

In [3]:
a = query_range(query_dates, query_tickers, ETFScraper(), save_func_)

INFO:etf_scraper.api:Querying for MCHI holdings as of latest from IShares
INFO:etf_scraper.api:Querying for EIDO holdings as of latest from IShares
INFO:etf_scraper.api:Querying for BKTSX holdings as of latest from IShares
INFO:etf_scraper.api:Querying for IWV holdings as of latest from IShares
INFO:etf_scraper.api:Querying for IXC holdings as of latest from IShares
INFO:etf_scraper.api:Querying for MUB holdings as of latest from IShares
INFO:etf_scraper.api:Querying for ISTB holdings as of latest from IShares
INFO:etf_scraper.scrapers:Querying IShares for MCHI holdings as of None
INFO:etf_scraper.api:Querying for BSPPX holdings as of latest from IShares
INFO:etf_scraper.api:Querying for BSPIX holdings as of latest from IShares
INFO:etf_scraper.api:Querying for BMED holdings as of latest from IShares
INFO:etf_scraper.scrapers:Querying IShares for EIDO holdings as of None
INFO:etf_scraper.scrapers:Querying IShares for BKTSX holdings as of None
INFO:etf_scraper.scrapers:Querying IShares 

In [5]:
df = pd.DataFrame(a).T.rename_axis(index=['ticker', 'date']).reset_index()
df_ = df.merge(listings, on='ticker')
df['error_class'].value_counts(dropna=False)

NaN                      743
ValueError               257
InvalidParameterError    191
KeyError                  38
EmptyDataError             6
HTTPError                  5
Name: error_class, dtype: int64

In [5]:
df_[~df_['error'].isna()][['provider', "asset_class"]].value_counts(dropna=False).sort_index()

provider  asset_class   
IShares   Commodity           3
          Equity             29
          Fixed Income      126
          Real Estate         3
Invesco   NaN                86
SSGA      Alternative         2
          Fixed Income       38
          NaN                69
Vanguard  Balanced Funds     10
          Bond Funds         55
          Stock Funds        57
dtype: int64

In [15]:
df_[
    (~df_['error'].isna())&(df_['provider']=='SSGA')
    # &(df_['asset_class']=='Equity')
    &(df_['asset_class'].isna())
    # &(df_['fund_type']=='ETF')
    # &(df_['ticker']=='SMMD')
    ]#['error'].iloc[0]

Unnamed: 0,ticker,date,save_path,n_holdings,error,error_class,fund_name,inception_date,cusip,isin,...,subasset_class,country,region,product_url,product_id,net_assets,fund_type,provider,benchmark,exchange
1030,SSFCX,,,,"Traceback (most recent call last):\n File ""/h...",InvalidParameterError,State Street Aggregate Bond Index Fund - Class A,2014-09-19 00:00:00,,,...,,,,https://www.ssga.com/us/en/intermediary/ic/fun...,,,MF,SSGA,,
1031,SSFDX,,,,"Traceback (most recent call last):\n File ""/h...",InvalidParameterError,State Street Aggregate Bond Index Fund - Class I,2014-09-19 00:00:00,,,...,,,,https://www.ssga.com/us/en/intermediary/ic/fun...,,,MF,SSGA,,
1032,SSFEX,,,,"Traceback (most recent call last):\n File ""/h...",InvalidParameterError,State Street Aggregate Bond Index Fund - Class K,2014-09-19 00:00:00,,,...,,,,https://www.ssga.com/us/en/intermediary/ic/fun...,,,MF,SSGA,,
1033,SSKEX,,,,"Traceback (most recent call last):\n File ""/h...",InvalidParameterError,State Street Emerging Markets Equity Index Fun...,2015-12-18 00:00:00,,,...,,,,https://www.ssga.com/us/en/intermediary/ic/fun...,,,MF,SSGA,,
1034,STFAX,,,,"Traceback (most recent call last):\n File ""/h...",InvalidParameterError,State Street Equity 500 Index Fund - Administr...,2001-04-18 00:00:00,,,...,,,,https://www.ssga.com/us/en/intermediary/ic/fun...,,,MF,SSGA,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1094,SSFOX,,,,"Traceback (most recent call last):\n File ""/h...",InvalidParameterError,State Street Target Retirement Fund - Class K,2014-09-30 00:00:00,,,...,,,,https://www.ssga.com/us/en/intermediary/ic/fun...,,,MF,SSGA,,
1095,SSFQX,,,,"Traceback (most recent call last):\n File ""/h...",InvalidParameterError,State Street Target Retirement Fund - Class R3,2014-09-30 00:00:00,,,...,,,,https://www.ssga.com/us/en/intermediary/ic/fun...,,,MF,SSGA,,
1096,SSTIX,,,,"Traceback (most recent call last):\n File ""/h...",InvalidParameterError,State Street Total Return V.I.S. Fund - Class 1,1985-07-01 00:00:00,,,...,,,,https://www.ssga.com/us/en/intermediary/ic/fun...,,,MF,SSGA,,
1097,SSTTX,,,,"Traceback (most recent call last):\n File ""/h...",InvalidParameterError,State Street Total Return V.I.S. Fund - Class 3,2006-05-01 00:00:00,,,...,,,,https://www.ssga.com/us/en/intermediary/ic/fun...,,,MF,SSGA,,


In [36]:
from etf_scraper.utils import _get_trd_dates
from pandas.tseries.offsets import BDay
start_date, end_date, exchange = '2010-01-01', '2023-01-20', 'NYSE'
end_date_ = pd.to_datetime(end_date) + BDay(1)
date_range = _get_trd_dates(start_date, end_date_, exchange)
day_series = pd.Series(date_range, index=date_range)
day_series

2010-01-04 00:00:00+00:00   2010-01-04 00:00:00+00:00
2010-01-05 00:00:00+00:00   2010-01-05 00:00:00+00:00
2010-01-06 00:00:00+00:00   2010-01-06 00:00:00+00:00
2010-01-07 00:00:00+00:00   2010-01-07 00:00:00+00:00
2010-01-08 00:00:00+00:00   2010-01-08 00:00:00+00:00
                                       ...           
2023-01-17 00:00:00+00:00   2023-01-17 00:00:00+00:00
2023-01-18 00:00:00+00:00   2023-01-18 00:00:00+00:00
2023-01-19 00:00:00+00:00   2023-01-19 00:00:00+00:00
2023-01-20 00:00:00+00:00   2023-01-20 00:00:00+00:00
2023-01-23 00:00:00+00:00   2023-01-23 00:00:00+00:00
Length: 3286, dtype: datetime64[ns, UTC]

In [38]:
day_series.groupby(pd.Grouper(freq="M")).last()

2010-01-31 00:00:00+00:00   2010-01-29 00:00:00+00:00
2010-02-28 00:00:00+00:00   2010-02-26 00:00:00+00:00
2010-03-31 00:00:00+00:00   2010-03-31 00:00:00+00:00
2010-04-30 00:00:00+00:00   2010-04-30 00:00:00+00:00
2010-05-31 00:00:00+00:00   2010-05-28 00:00:00+00:00
                                       ...           
2022-09-30 00:00:00+00:00   2022-09-30 00:00:00+00:00
2022-10-31 00:00:00+00:00   2022-10-31 00:00:00+00:00
2022-11-30 00:00:00+00:00   2022-11-30 00:00:00+00:00
2022-12-31 00:00:00+00:00   2022-12-30 00:00:00+00:00
2023-01-31 00:00:00+00:00   2023-01-23 00:00:00+00:00
Freq: M, Length: 157, dtype: datetime64[ns, UTC]

In [40]:
query_dates = get_interval_query_dates('2010-01-01', '2023-01-20', True, True)

ishares_eq_etfs = listings[(listings['provider']=='IShares')&(listings['asset_class']=='Equity')&(listings['fund_type']=='ETF')]
ishares_eq_etf_tickers = list(ishares_eq_etfs['ticker'])
print(len(ishares_eq_etf_tickers), len(query_dates), len(ishares_eq_etf_tickers) * len(query_dates))

ishares_eq_etf_tickers[:5]

254 156 39624


['MCHI', 'ECH', 'EFA', 'EFG', 'EDEN']

In [22]:
rpt = query_range(query_dates, query_tickers, ETFScraper(), save_func_)

254


['MCHI', 'ECH', 'EFA', 'EFG', 'EDEN']