In [1]:
import yfinance as yf
import pandas as pd

## EFTs in USD - biggest/oldest ones available

In [2]:
usd_etf_codes = {
    'United States': 'SPY',
    'Japan': 'EWJ',
    'United Kingdom': 'EWU',
    'Canada': 'EWC',
    'France': 'EWQ',
    'Switzerland': 'EWL',
    'Germany': 'EWG',
    'Australia': 'EWA',
    'Netherlands': 'EWN',
    'Denmark': 'EDEN',
    'Sweden': 'EWD',
    'Spain': 'EWP',
    'Hong Kong': 'EWH',
    'Italy': 'EWI',
    'Singapore': 'EWS',
    'Finland': 'EFNL',
    'Belgium': 'EWK',
    'Norway': 'NORW',
    'Israel': 'EIS',
    'Ireland': 'EIRL',
    'New Zealand': 'ENZL',
    'Austria': 'EWO',
    'Portugal': 'PGAL',
    'Eurozona': 'FEZ',

    'China': 'MCHI',
    'Taiwan': 'EWT',
    'India': 'INDA',
    'Korea': 'EWY',
    'Brazil': 'EWZ',
    'Saudi Arabia': 'KSA',
    'South Africa': 'EZA',
    'Mexico': 'EWW',
    'Thailand': 'THD',
    'Indonesia': 'EIDO',
    'Malaysia': 'EWM',
    'United Arab Emirates': 'UAE',
    'Qatar': 'QAT',
    'Kuwait': 'KWT',
    'Turkiye': 'TUR',
    'Philippines': 'EPHE',
    'Poland': 'EPOL',
    'Chile': 'ECH',
    'Greece': 'GREK',
    'Peru': 'EPU',
    'Hungary': '',
    'Czechia': '',
    'Egypt': 'EGPT',
    'Colombia': 'GXG',
    'Argentina': 'ARGT',
    'Russia': 'ERUS',
}

In [3]:
df = pd.DataFrame(index=pd.date_range('1999-01-04', '2023-10-05', freq='B'),
                  columns=usd_etf_codes)

for country in usd_etf_codes:
    index_ticker = usd_etf_codes[country]
    if index_ticker != '':
        index_data = yf.download(index_ticker, '1999-1-1')
        print(country, index_data.shape)
        df[country] = index_data['Adj Close']

[*********************100%%**********************]  1 of 1 completed
United States (6280, 6)
[*********************100%%**********************]  1 of 1 completed
Japan (6280, 6)
[*********************100%%**********************]  1 of 1 completed
United Kingdom (6280, 6)
[*********************100%%**********************]  1 of 1 completed
Canada (6280, 6)
[*********************100%%**********************]  1 of 1 completed
France (6280, 6)
[*********************100%%**********************]  1 of 1 completed
Switzerland (6280, 6)
[*********************100%%**********************]  1 of 1 completed
Germany (6280, 6)
[*********************100%%**********************]  1 of 1 completed
Australia (6280, 6)
[*********************100%%**********************]  1 of 1 completed
Netherlands (6280, 6)
[*********************100%%**********************]  1 of 1 completed
Denmark (2993, 6)
[*********************100%%**********************]  1 of 1 completed
Sweden (6280, 6)
[*********************100

In [4]:
df = df.round(6)

In [5]:
df = df.dropna(thresh=15)
df.to_csv('market_data/etfs_in_usd.csv')
df

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Denmark,...,Poland,Chile,Greece,Peru,Hungary,Czechia,Egypt,Colombia,Argentina,Russia
1999-01-04,78.573097,28.450512,16.065813,5.851786,11.390850,11.481214,12.743692,3.448380,14.495274,,...,,,,,,,,,,
1999-01-05,79.471138,28.275961,16.212753,5.942748,11.452591,11.644071,12.609548,3.448380,14.699425,,...,,,,,,,,,,
1999-01-06,81.387100,28.625050,16.898493,6.276269,11.699544,11.888354,13.615626,3.560340,14.971642,,...,,,,,,,,,,
1999-01-07,80.987930,28.799599,16.604607,6.185308,11.606936,11.806927,12.676620,3.627517,14.529297,,...,,,,,,,,,,
1999-01-08,81.586670,28.799599,16.506639,6.276269,11.421720,11.806927,12.710156,3.627517,14.359162,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-09-29,425.753845,60.290001,31.639999,33.459999,35.590000,43.570000,26.200001,21.510000,38.020000,100.510002,...,17.110001,26.190001,33.230000,30.180000,,,21.200001,21.08,41.279999,8.06
2023-10-02,425.584503,59.770000,31.040001,32.689999,34.900002,43.130001,25.740000,21.070000,37.410000,98.790001,...,16.680000,25.299999,32.619999,29.330000,,,20.250000,20.67,40.509998,8.06
2023-10-03,419.887604,58.730000,30.870001,32.299999,34.630001,42.619999,25.490000,20.790001,36.910000,96.860001,...,16.490000,24.920000,31.870001,29.180000,,,20.450001,20.09,39.320000,8.06
2023-10-04,422.945221,58.090000,30.860001,32.209999,34.840000,42.970001,25.690001,20.900000,37.529999,97.650002,...,16.840000,24.900000,32.299999,29.059999,,,20.690001,19.91,39.509998,8.06


In [6]:
# check any NA values in the middle

for country in df.columns:
    if usd_etf_codes[country] == '':
        continue
    
    first_valid = df[country].first_valid_index()
    if df[country].loc[first_valid:].isna().sum() > 0:
        print(country, df[country].loc[first_valid:].isna().sum())

In [7]:
for country in df.columns:
    if usd_etf_codes[country] == '':
        print(country, 'No data')
        continue
    
    first_valid = df[country].first_valid_index()
    if not str(first_valid).startswith('1999-01-04'):
        print(country, first_valid)

Denmark 2012-01-26 00:00:00
Finland 2012-01-26 00:00:00
Norway 2009-08-19 00:00:00
Israel 2008-03-28 00:00:00
Ireland 2010-05-11 00:00:00
New Zealand 2010-09-02 00:00:00
Portugal 2013-11-13 00:00:00
Eurozona 2002-10-21 00:00:00
China 2011-03-31 00:00:00
Taiwan 2000-06-23 00:00:00
India 2012-02-03 00:00:00
Korea 2000-05-12 00:00:00
Brazil 2000-07-14 00:00:00
Saudi Arabia 2015-09-17 00:00:00
South Africa 2003-02-07 00:00:00
Thailand 2008-04-01 00:00:00
Indonesia 2010-05-07 00:00:00
United Arab Emirates 2014-05-01 00:00:00
Qatar 2014-05-01 00:00:00
Kuwait 2020-09-03 00:00:00
Turkiye 2008-03-28 00:00:00
Philippines 2010-09-29 00:00:00
Poland 2010-05-26 00:00:00
Chile 2007-11-20 00:00:00
Greece 2011-12-08 00:00:00
Peru 2009-06-22 00:00:00
Hungary No data
Czechia No data
Egypt 2010-02-18 00:00:00
Colombia 2009-02-09 00:00:00
Argentina 2011-03-03 00:00:00
Russia 2010-11-09 00:00:00


In [8]:
for country in df.columns:
    last_valid = df[country].last_valid_index()
    if not str(last_valid).startswith('2023-10-05'):
        print(country, last_valid)

Hungary None
Czechia None


## EFTs in EUR

In [9]:
eur_etf_codes = {
    'United States': 'SXR8.DE',
    'Japan': 'IJPA.AS',
    'United Kingdom': 'SXRW.DE',
    'Canada': 'SXR2.DE',
    'France': 'CAC.PA',
    'Switzerland': 'XSMI.MI',
    'Germany': 'DBXD.DE',
    'Australia': 'IBC6.DE',
    'Netherlands': 'IAEX.AS',
    'Denmark': '',
    'Sweden': '',
    'Spain': 'XESP.DE',
    'Hong Kong': 'HKDE.AS',
    'Italy': 'ETFMIB.MI',
    'Singapore': 'XBAS.DE',
    'Finland': 'SLG-OMXH25.HE',
    'Belgium': 'BEL.BR',
    'Norway': 'OBXD.OL',
    'Israel': '',
    'Ireland': '',
    'New Zealand': '',
    'Austria': 'EXXX.DE',
    'Portugal': '',
    'Eurozona': 'C50.PA',

    'China': 'XCS6.DE',
    'Taiwan': 'IQQT.DE',
    'India': 'INDI.MI',
    'Korea': 'XMKO.MI',
    'Brazil': 'XMBR.DE',
    'Saudi Arabia': 'IUSW.DE',
    'South Africa': 'IBC4.DE',
    'Mexico': 'XMEX.MI',
    'Thailand': 'XCS4.DE',
    'Indonesia': 'INDO.PA',
    'Malaysia': 'XCS3.DE',
    'United Arab Emirates': '',
    'Qatar': '',
    'Kuwait': '',
    'Turkiye': 'ITKY.AS',
    'Philippines': 'XPQP.DE',
    'Poland': 'IBCJ.DE',
    'Chile': '',
    'Greece': 'GRE.PA',
    'Peru': '',
    'Hungary': '',
    'Czechia': 'CZX.DE',
    'Egypt': '',
    'Colombia': '',
    'Argentina': '',
    'Russia': 'RUS.PA',
}

In [10]:
df_eur = pd.DataFrame(index=pd.date_range('2008-01-02', '2023-10-05', freq='B'),
                      columns=eur_etf_codes)

for country in eur_etf_codes:
    index_ticker = eur_etf_codes[country]
    if index_ticker != '':
        index_data = yf.download(index_ticker, '2008-01-02')
        print(country, index_data.shape)
        df_eur[country] = index_data['Adj Close']

[*********************100%%**********************]  1 of 1 completed
United States (3454, 6)
[*********************100%%**********************]  1 of 1 completed
Japan (3645, 6)
[*********************100%%**********************]  1 of 1 completed
United Kingdom (3326, 6)
[*********************100%%**********************]  1 of 1 completed
Canada (3501, 6)
[*********************100%%**********************]  1 of 1 completed
France (4086, 6)
[*********************100%%**********************]  1 of 1 completed
Switzerland (4052, 6)
[*********************100%%**********************]  1 of 1 completed
Germany (4055, 6)
[*********************100%%**********************]  1 of 1 completed
Australia (3535, 6)
[*********************100%%**********************]  1 of 1 completed
Netherlands (4088, 6)
[*********************100%%**********************]  1 of 1 completed
Spain (3183, 6)
[*********************100%%**********************]  1 of 1 completed
Hong Kong (2016, 6)
[*********************10

In [11]:
df_eur['United Kingdom'].loc[:'2010-10'] = None

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_eur['United Kingdom'].loc[:'2010-10'] = None


In [12]:
df_eur = df_eur.round(6)

In [13]:
df_eur = df_eur.dropna(thresh=15)
df_eur = df_eur.ffill(limit=2)
df_eur.to_csv('market_data/etfs_in_eur.csv')
df_eur

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Denmark,...,Poland,Chile,Greece,Peru,Hungary,Czechia,Egypt,Colombia,Argentina,Russia
2008-01-02,,,,,40.466129,51.500000,79.461098,,38.880493,,...,,,12.338554,,,,,,,43.950001
2008-01-03,,,,,40.013062,50.650002,79.054100,,38.795219,,...,,,12.379960,,,,,,,43.650002
2008-01-04,,,,,39.977104,50.090000,78.056702,,38.179642,,...,,,12.189499,,,,,,,43.660000
2008-01-07,,,,,39.265156,49.349998,78.141296,,38.262177,,...,,,12.156377,,,,,,,43.520000
2008-01-08,,,,,39.308300,49.970001,78.469101,,37.944633,,...,,,12.172935,,,,,,,43.669998
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-09-29,426.269989,44.794998,164.059998,156.660004,70.129219,117.239998,145.559998,40.154999,72.369812,,...,13.792,,1.259842,,,1.1628,,,,
2023-10-02,427.329987,44.590000,162.139999,153.880005,69.438721,116.580002,144.240005,39.555000,71.931213,,...,13.590,,1.235001,,,1.1586,,,,
2023-10-03,422.380005,43.830002,161.080002,151.339996,68.845474,115.459999,142.660004,38.939999,71.432800,,...,13.424,,1.204684,,,1.1494,,,,
2023-10-04,421.470001,43.134998,159.880005,149.919998,68.699593,115.440002,142.800003,38.840000,71.582314,,...,13.568,,1.216420,,,1.1484,,,,


In [14]:
df_eur = df_eur.ffill(limit=2)

In [15]:
# check any NA values in the middle

for country in df_eur.columns:
    if eur_etf_codes[country] == '':
        continue
    
    first_valid = df_eur[country].first_valid_index()
    if df_eur[country].loc[first_valid:].isna().sum() > 0:
        print(country, df_eur[country].loc[first_valid:].isna().sum())

Hong Kong 1
Finland 4000
Indonesia 3
Greece 19
Russia 403


In [16]:
for country in df_eur.columns:
    if eur_etf_codes[country] == '':
        #print(country, 'No data')
        continue
    
    first_valid = df_eur[country].first_valid_index()
    if not str(first_valid).startswith('2008-01-02'):
        print(country, first_valid)

United States 2010-05-19 00:00:00
Japan 2009-09-25 00:00:00
United Kingdom 2010-11-02 00:00:00
Canada 2010-03-11 00:00:00
Australia 2010-01-22 00:00:00
Spain 2011-04-27 00:00:00
Hong Kong 2016-01-27 00:00:00
Singapore 2011-09-19 00:00:00
Finland None
Eurozona 2008-09-16 00:00:00
China 2011-10-04 00:00:00
Saudi Arabia 2019-05-06 00:00:00
South Africa 2010-01-22 00:00:00
Mexico 2010-07-26 00:00:00
Thailand 2011-08-19 00:00:00
Indonesia 2011-07-04 00:00:00
Malaysia 2011-08-24 00:00:00
Philippines 2011-04-12 00:00:00
Poland 2011-01-21 00:00:00
Czechia 2018-03-19 00:00:00


In [17]:
for country in df_eur.columns:
    if eur_etf_codes[country] == '':
        #print(country, 'No data')
        continue
    
    last_valid = df_eur[country].last_valid_index()
    if not str(last_valid).startswith('2023-10-05'):
        print(country, last_valid)

Finland None
Russia 2022-03-14 00:00:00
