In [6]:
import yfinance as yf
import pandas as pd

## EFTs in USD - biggest/oldest ones available

In [86]:
usd_etf_codes = {
    'United States': 'SPY',
    'Japan': 'EWJ',
    'United Kingdom': 'EWU',
    'Canada': 'EWC',
    'France': 'EWQ',
    'Switzerland': 'EWL',
    'Germany': 'EWG',
    'Australia': 'EWA',
    'Netherlands': 'EWN',
    'Denmark': 'EDEN',
    'Sweden': 'EWD',
    'Spain': 'EWP',
    'Hong Kong': 'EWH',
    'Italy': 'EWI',
    'Singapore': 'EWS',
    'Finland': 'EFNL',
    'Belgium': 'EWK',
    'Norway': 'NORW',
    'Israel': 'EIS',
    'Ireland': 'EIRL',
    'New Zealand': 'ENZL',
    'Austria': 'EWO',
    'Portugal': 'PGAL',
    'Eurozona': 'FEZ',

    'China': 'MCHI',
    'Taiwan': 'EWT',
    'India': 'INDA',
    'Korea': 'EWY',
    'Brazil': 'EWZ',
    'Saudi Arabia': 'KSA',
    'South Africa': 'EZA',
    'Mexico': 'EWW',
    'Thailand': 'THD',
    'Indonesia': 'EIDO',
    'Malaysia': 'EWM',
    'United Arab Emirates': 'UAE',
    'Qatar': 'QAT',
    'Kuwait': 'KWT',
    'Turkiye': 'TUR',
    'Philippines': 'EPHE',
    'Poland': 'EPOL',
    'Chile': 'ECH',
    'Greece': 'GREK',
    'Peru': 'EPU',
    'Hungary': '',
    'Czechia': '',
    'Egypt': 'EGPT',
    'Colombia': 'GXG',
    'Argentina': 'ARGT',
    'Russia': 'ERUS',
}

In [87]:
df = pd.DataFrame(index=pd.date_range('1999-01-04', '2023-10-01', freq='B'),
                  columns=usd_etf_codes)

for country in usd_etf_codes:
    index_ticker = usd_etf_codes[country]
    if index_ticker != '':
        index_data = yf.download(index_ticker, '1999-1-1')
        print(country, index_data.shape)
        df[country] = index_data['Adj Close']

[*********************100%%**********************]  1 of 1 completed
United States (6226, 6)
[*********************100%%**********************]  1 of 1 completed
Japan (6226, 6)
[*********************100%%**********************]  1 of 1 completed
United Kingdom (6226, 6)
[*********************100%%**********************]  1 of 1 completed
Canada (6226, 6)
[*********************100%%**********************]  1 of 1 completed
France (6226, 6)
[*********************100%%**********************]  1 of 1 completed
Switzerland (6226, 6)
[*********************100%%**********************]  1 of 1 completed
Germany (6226, 6)
[*********************100%%**********************]  1 of 1 completed
Australia (6226, 6)
[*********************100%%**********************]  1 of 1 completed
Netherlands (6226, 6)
[*********************100%%**********************]  1 of 1 completed
Denmark (2939, 6)
[*********************100%%**********************]  1 of 1 completed
Sweden (6226, 6)
[*********************100

In [88]:
df = df.dropna(thresh=15)
df.to_csv('USD_ETFs.csv')
df

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Denmark,...,Poland,Chile,Greece,Peru,Hungary,Czechia,Egypt,Colombia,Argentina,Russia
1999-01-04,78.891632,28.450510,16.065809,5.851786,11.390851,11.481215,12.743690,3.448380,14.495271,,...,,,,,,,,,,
1999-01-05,79.793343,28.275969,16.212755,5.942748,11.452589,11.644073,12.609546,3.448380,14.699428,,...,,,,,,,,,,
1999-01-06,81.717087,28.625055,16.898495,6.276268,11.699545,11.888352,13.615628,3.560339,14.971641,,...,,,,,,,,,,
1999-01-07,81.316284,28.799595,16.604607,6.185308,11.606934,11.806925,12.676618,3.627515,14.529295,,...,,,,,,,,,,
1999-01-08,81.917435,28.799595,16.506639,6.276268,11.421714,11.806925,12.710157,3.627515,14.359162,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-09-25,432.230011,61.720001,31.850000,34.099998,35.730000,44.169998,26.440001,21.620001,38.099998,101.290001,...,17.469999,25.860001,34.070000,30.42,,,21.650000,20.860001,42.509998,8.06
2023-09-26,425.880005,60.770000,31.549999,33.560001,35.279999,43.639999,25.969999,21.340000,37.599998,100.019997,...,17.309999,25.709999,33.900002,29.91,,,21.370001,20.510000,41.630001,8.06
2023-09-27,426.049988,61.029999,31.500000,33.389999,35.119999,43.099998,25.860001,21.250000,37.360001,99.650002,...,16.930000,25.330000,33.130001,29.76,,,21.120001,20.610001,41.439999,8.06
2023-09-28,428.519989,61.150002,31.730000,33.730000,35.520000,43.480000,26.120001,21.629999,37.700001,100.139999,...,16.840000,25.830000,33.009998,29.93,,,21.280001,20.940001,41.910000,8.06


In [89]:
# check any NA values in the middle

for country in df.columns:
    if usd_etf_codes[country] == '':
        continue
    
    first_valid = df[country].first_valid_index()
    if df[country].loc[first_valid:].isna().sum() > 0:
        print(country, df[country].loc[first_valid:].isna().sum())

In [90]:
for country in df.columns:
    if usd_etf_codes[country] == '':
        print(country, 'No data')
        continue
    
    first_valid = df[country].first_valid_index()
    if not str(first_valid).startswith('1999-01-04'):
        print(country, first_valid)

Denmark 2012-01-26 00:00:00
Finland 2012-01-26 00:00:00
Norway 2009-08-19 00:00:00
Israel 2008-03-28 00:00:00
Ireland 2010-05-11 00:00:00
New Zealand 2010-09-02 00:00:00
Portugal 2013-11-13 00:00:00
Eurozona 2002-10-21 00:00:00
China 2011-03-31 00:00:00
Taiwan 2000-06-23 00:00:00
India 2012-02-03 00:00:00
Korea 2000-05-12 00:00:00
Brazil 2000-07-14 00:00:00
Saudi Arabia 2015-09-17 00:00:00
South Africa 2003-02-07 00:00:00
Thailand 2008-04-01 00:00:00
Indonesia 2010-05-07 00:00:00
United Arab Emirates 2014-05-01 00:00:00
Qatar 2014-05-01 00:00:00
Kuwait 2020-09-03 00:00:00
Turkiye 2008-03-28 00:00:00
Philippines 2010-09-29 00:00:00
Poland 2010-05-26 00:00:00
Chile 2007-11-20 00:00:00
Greece 2011-12-08 00:00:00
Peru 2009-06-22 00:00:00
Hungary No data
Czechia No data
Egypt 2010-02-18 00:00:00
Colombia 2009-02-09 00:00:00
Argentina 2011-03-03 00:00:00
Russia 2010-11-09 00:00:00


In [91]:
for country in df.columns:
    last_valid = df[country].last_valid_index()
    if not str(last_valid).startswith('2023-09-29'):
        print(country, last_valid)

Hungary None
Czechia None


## EFTs in EUR

In [92]:
eur_etf_codes = {
    'United States': 'SXR8.DE',
    'Japan': 'IJPA.AS',
    'United Kingdom': 'SXRW.DE',
    'Canada': 'SXR2.DE',
    'France': 'CAC.PA',
    'Switzerland': 'XSMI.MI',
    'Germany': 'DBXD.DE',
    'Australia': 'IBC6.DE',
    'Netherlands': 'IAEX.AS',
    'Denmark': '',
    'Sweden': '',
    'Spain': 'XESP.DE',
    'Hong Kong': 'HKDE.AS',
    'Italy': 'ETFMIB.MI',
    'Singapore': 'XBAS.DE',
    'Finland': 'SLG-OMXH25.HE',
    'Belgium': 'BEL.BR',
    'Norway': 'OBXD.OL',
    'Israel': '',
    'Ireland': '',
    'New Zealand': '',
    'Austria': 'EXXX.DE',
    'Portugal': '',
    'Eurozona': 'C50.PA',

    'China': 'XCS6.DE',
    'Taiwan': 'IQQT.DE',
    'India': 'INDI.MI',
    'Korea': 'XMKO.MI',
    'Brazil': 'XMBR.DE',
    'Saudi Arabia': 'IUSW.DE',
    'South Africa': 'IBC4.DE',
    'Mexico': 'XMEX.MI',
    'Thailand': 'XCS4.DE',
    'Indonesia': 'INDO.PA',
    'Malaysia': 'XCS3.DE',
    'United Arab Emirates': '',
    'Qatar': '',
    'Kuwait': '',
    'Turkiye': 'ITKY.AS',
    'Philippines': 'XPQP.DE',
    'Poland': 'IBCJ.DE',
    'Chile': '',
    'Greece': 'GRE.PA',
    'Peru': '',
    'Hungary': '',
    'Czechia': 'CZX.DE',
    'Egypt': '',
    'Colombia': '',
    'Argentina': '',
    'Russia': 'RUS.PA',
}

In [93]:
df_eur = pd.DataFrame(index=pd.date_range('2008-01-02', '2023-10-01', freq='B'),
                      columns=eur_etf_codes)

for country in eur_etf_codes:
    index_ticker = eur_etf_codes[country]
    if index_ticker != '':
        index_data = yf.download(index_ticker, '2008-01-02')
        print(country, index_data.shape)
        df_eur[country] = index_data['Adj Close']

[*********************100%%**********************]  1 of 1 completed
United States (3399, 6)
[*********************100%%**********************]  1 of 1 completed
Japan (3590, 6)
[*********************100%%**********************]  1 of 1 completed
United Kingdom (3271, 6)
[*********************100%%**********************]  1 of 1 completed
Canada (3446, 6)
[*********************100%%**********************]  1 of 1 completed
France (4031, 6)
[*********************100%%**********************]  1 of 1 completed
Switzerland (3997, 6)
[*********************100%%**********************]  1 of 1 completed
Germany (4000, 6)
[*********************100%%**********************]  1 of 1 completed
Australia (3480, 6)
[*********************100%%**********************]  1 of 1 completed
Netherlands (4033, 6)
[*********************100%%**********************]  1 of 1 completed
Spain (3128, 6)
[*********************100%%**********************]  1 of 1 completed
Hong Kong (1961, 6)
[*********************10

In [94]:
df_eur['United Kingdom'].loc[:'2010-10'] = None

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_eur['United Kingdom'].loc[:'2010-10'] = None


In [95]:
df_eur = df_eur.dropna(thresh=15)
df_eur = df_eur.ffill(limit=2)
df_eur.to_csv('EUR_ETFs.csv')
df_eur

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Denmark,...,Poland,Chile,Greece,Peru,Hungary,Czechia,Egypt,Colombia,Argentina,Russia
2008-01-02,,,,,56.270000,51.500000,79.461098,,50.875401,,...,,,14.9000,,,,,,,43.950001
2008-01-03,,,,,55.639999,50.650002,79.054100,,50.763802,,...,,,14.9500,,,,,,,43.650002
2008-01-04,,,,,55.590000,50.090000,78.056702,,49.958302,,...,,,14.7200,,,,,,,43.660000
2008-01-07,,,,,54.599998,49.349998,78.141296,,50.066299,,...,,,14.6800,,,,,,,43.520000
2008-01-08,,,,,54.660000,49.970001,78.469101,,49.650799,,...,,,14.7000,,,,,,,43.669998
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-09-25,427.130005,45.639999,164.300003,158.440002,72.019997,118.260002,145.720001,40.064999,72.360001,,...,13.984,,1.3164,,,1.1572,,,,
2023-09-26,423.489990,45.209999,163.699997,156.619995,71.459999,117.500000,144.300003,39.820000,71.830002,,...,13.948,,1.3064,,,1.1512,,,,
2023-09-27,424.540009,45.560001,163.699997,156.720001,71.430000,116.580002,143.899994,39.799999,71.879997,,...,13.730,,1.2942,,,1.1572,,,,
2023-09-28,425.470001,45.279999,164.160004,157.080002,71.930000,116.580002,145.000000,40.115002,72.290001,,...,13.578,,1.2750,,,1.1574,,,,


In [96]:
df_eur = df_eur.ffill(limit=2)

In [97]:
# check any NA values in the middle

for country in df_eur.columns:
    if eur_etf_codes[country] == '':
        continue
    
    first_valid = df_eur[country].first_valid_index()
    if df_eur[country].loc[first_valid:].isna().sum() > 0:
        print(country, df_eur[country].loc[first_valid:].isna().sum())

Hong Kong 1
Indonesia 3
Greece 19
Russia 400


In [98]:
for country in df_eur.columns:
    if eur_etf_codes[country] == '':
        #print(country, 'No data')
        continue
    
    first_valid = df_eur[country].first_valid_index()
    if not str(first_valid).startswith('2008-01-02'):
        print(country, first_valid)

United States 2010-05-19 00:00:00
Japan 2009-09-25 00:00:00
United Kingdom 2010-11-02 00:00:00
Canada 2010-03-11 00:00:00
Australia 2010-01-22 00:00:00
Spain 2011-04-27 00:00:00
Hong Kong 2016-01-27 00:00:00
Singapore 2011-09-19 00:00:00
Eurozona 2008-09-16 00:00:00
China 2011-10-04 00:00:00
Saudi Arabia 2019-05-06 00:00:00
South Africa 2010-01-22 00:00:00
Mexico 2010-07-26 00:00:00
Thailand 2011-08-19 00:00:00
Indonesia 2011-07-04 00:00:00
Malaysia 2011-08-24 00:00:00
Philippines 2011-04-12 00:00:00
Poland 2011-01-21 00:00:00
Czechia 2018-03-19 00:00:00


In [99]:
for country in df_eur.columns:
    if eur_etf_codes[country] == '':
        #print(country, 'No data')
        continue
    
    last_valid = df_eur[country].last_valid_index()
    if not str(last_valid).startswith('2023-09-29'):
        print(country, last_valid)

Russia 2022-03-14 00:00:00
