In [1]:
# add root path to sys.path
import os
import sys
from pathlib import Path
ROOT_DIR = Path(os.getcwd()).parent
if not str(ROOT_DIR) in sys.path:
    sys.path.append(str(ROOT_DIR))

In [2]:
# connect to local DB
from src.config import DB_DIR
import sqlite3
conn = sqlite3.connect(str(DB_DIR/'etf.db'))

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
table_list = pd.read_sql("SELECT name FROM sqlite_master WHERE type='table'", conn).name.tolist()

In [4]:
table_list

['header',
 'average_annual_performance',
 'cumulative_performance',
 'calendar_year_performance',
 'fee_table',
 'keyFundFacts',
 'esgAnalytics',
 'productInvolvement']

In [5]:
etf_data = {}
for t in table_list:
    etf_data[t] = pd.read_sql("SELECT * FROM {}".format(t), conn)

In [6]:
etf_data['header'].set_index(['ticker','ytd_perf_date']).ytd_perf_value.unstack()

#import plotly.express as px
#fig = px.histogram(etf_data['header'], x="ytd_perf_value")

ytd_perf_date,"Nov 28, 2023"
ticker,Unnamed: 1_level_1
igf,0.0159


In [None]:
etf_data['header'].groupby(['ticker','nav_change_date']).apply(lambda x: x['nav_change_value'].unique().tolist()).unstack()

In [None]:
etf_data['header'].groupby(['ticker','nav_date']).apply(lambda x: x['nav_value'].unique().tolist() ).unstack()

In [None]:
etf_data['header'].groupby(['ticker','ytd_perf_date']).apply(lambda x: x['ytd_perf_value'].unique().tolist()).unstack()

In [None]:
etf_data['header'].columns

In [None]:
date_col = 'ytd_perf_date'
for c in etf_data['header'].columns:
    print(c)
    df=etf_data['header'].groupby(['ticker',date_col]).apply(lambda x: x[c].count())
    df2=etf_data['header'].groupby(['ticker',date_col]).apply(lambda x: x[c].unique().tolist())
    display(df2[df>1])

In [None]:
def calc_active_total_return(df):
    try:
        df_dict = df.set_index('performance_type').cumulative_performance.to_dict()
        total_ret = df_dict.get('Total Return (%)', np.nan)
        benchmark_ret = df_dict.get('Benchmark (%)', np.nan)    
        return total_ret - benchmark_ret
    except:
        return np.nan

perf_horizons = etf_data['cumulative_performance'].performance_horizon.unique().tolist()
ncols=3
nrows=(perf_horizons.__len__()//3)
fig, axs = plt.subplots(ncols=ncols, nrows=nrows, figsize=(6*ncols, 4*nrows))
iter_axs = axs.flatten()
for i, ax in enumerate(iter_axs):
    #ax.set_xlim(-50, 50)
    #ax.set_ylim(0, 0.1)
    ax.set_xlabel('Active Total Return (%)')
    ax.set_ylabel('Frequency')
    ax.set_title('Performance Horizon: {}'.format(perf_horizons[i].upper()))
    ax.grid(True)
    ax.set_axisbelow(True)
    ax.tick_params(axis='both', which='major', labelsize=12)
    ax.tick_params(axis='both', which='minor', labelsize=10)
    ax.minorticks_on()
    #ax.grid(b=True, which='minor', color='lightgrey', linestyle='--', alpha=0.5)
    #ax.grid(b=True, which='major', color='grey', linestyle='-', alpha=0.5)
    active_ret_data = etf_data['cumulative_performance'].groupby(['ticker','performance_horizon'])\
        .apply(calc_active_total_return).rename('Active Total Return (%)')\
        .unstack()[perf_horizons[i]]
    # share of positive active returns
    hit_rate = active_ret_data[active_ret_data>0].size/active_ret_data.size    

    active_ret_data.hist(bins=50, edgecolor='white', color='tomato', ax=ax)
    #plt.text(0.05, 0.95, 'Hit Rate: {:.2f}%'.format(hit_rate*100), 
    #         transform=ax.transAxes, fontsize=10, color='red',
    #         verticalalignment='top', 
    #         bbox=dict(boxstyle='round', facecolor='white', alpha=0.5))

    if i == perf_horizons.__len__()-1:
        break
    
# clear emtpy plots
for i in range(iter_axs.__len__(), axs.size):
    fig.delaxes(axs.flatten()[i])
plt.tight_layout()
plt.savefig('active_etf_returns.png', dpi=300, bbox_inches='tight')

In [None]:
#base on this full Xpath, read it from soup /html/body/div[6]/div[2]/div[1]/div[4]/div/div[1]/div[5]/svg/g[5]/g[3]/path[1]

In [None]:
url = 'https://www.ishares.com/us/products/244049/ishares-core-msci-eafe-etf'

In [None]:
def get_etf_response(url):
    import requests
    from bs4 import BeautifulSoup, NavigableString
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    return soup

In [None]:
soup = get_etf_response(url)

In [None]:
soup.find('h1').contents[0].strip()