In [1]:
import yfinance as yf
import pandas as pd
from datetime import date

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
import time
from io import StringIO

In [2]:
historic_data = pd.read_csv('market_data/stock_indices.csv', parse_dates=True, index_col=0)
historic_data.tail()

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Denmark,...,Turkiye,Philippines,Poland,Chile,Greece,Hungary,Czechia,Colombia,Argentina,Russia
2023-11-24,4559.34,33625.53,7488.2,20103.1,7292.8,10879.52,16029.49,7040.8,765.66,2255.36,...,7960.0,6269.5,2228.01,5776.74,1258.73,4573.02,1387.64,3618.83,917599.31,3217.76
2023-11-27,4550.43,33447.67,7460.7,20032.7,7265.49,10821.06,15966.37,6987.6,764.22,2244.04,...,8107.2,6269.5,2208.05,5757.42,1265.36,4545.15,1380.91,3596.39,843665.19,3191.05
2023-11-28,4554.89,33408.39,7455.2,20036.8,7250.13,10760.38,15992.67,7015.2,761.37,2189.2,...,8093.5,6309.57,2239.69,5776.56,1264.69,4676.63,1384.06,3600.67,773419.81,3196.36
2023-11-29,4550.58,33321.22,7423.5,20116.2,7267.64,10802.88,16166.45,7035.3,761.99,2206.11,...,8009.3,6265.14,2222.44,5792.78,1271.2,4698.58,1389.18,3603.34,790376.62,3173.52
2023-11-30,4567.8,33486.89,7453.8,20236.3,7310.77,10854.32,16215.43,7087.3,765.04,2226.45,...,7948.6,6223.73,2215.25,5818.51,1275.13,4749.02,1389.16,3632.12,813393.88,3165.79


In [3]:
start_date = historic_data.last_valid_index()
start_date = str(start_date.date())
start_date

'2023-11-30'

In [4]:
end_date = date.today()
end_date = str(end_date)
end_date

'2023-12-17'

In [5]:
yahoo_references = {
    'United States': '^GSPC',  # S&P 500
    'Japan': '^N225',  # Nikkei 225
    'United Kingdom': '^FTSE',  # FTSE 100
    'Canada': '^GSPTSE',  # S&P/TSX Composite index
    'France': '^FCHI',  # CAC 40
    'Switzerland': '^SSMI',  # SMI PR
    'Germany': '^GDAXI',  # DAX PERFORMANCE-INDEX
    'Australia': '^AXJO',  # S&P/ASX 200
    'Netherlands': '^AEX',  # AEX-Index
    'Denmark': '',
    'Sweden': '^OMX',  # OMX Stockholm 30 Index
    'Spain': '^IBEX',  # IBEX 35
    'Hong Kong': '^HSI',  # HANG SENG INDEX
    'Italy': 'FTSEMIB.MI',  # FTSE MIB Index
    'Singapore': '^STI',  # STI Index
    'Finland': '^OMXH25',  # OMX Helsinki 25
    'Belgium': '^BFX',  # BEL 20
    'Norway': '',
    'Israel': '^TA125.TA',  # TA-125
    'Ireland': '^ISEQ',  # ISEQ All Share
    'New Zealand': '^NZ50',  # S&P/NZX 50 INDEX GROSS
    'Austria': '^ATX',  # Austrian Traded Index
    'Portugal': 'PSI20.LS',  # PSI 20
    'Eurozona': '^STOXX50E',  # Euro Stoxx 50

    'China': '000001.SS',  # SSE Composite Index
    'Taiwan': '^TWII',  # TSEC weighted index
    'India': '^BSESN',  # S&P BSE SENSEX
    'Korea': '^KS11',  # KOSPI Composite Index
    'Brazil': '^BVSP',  # IBOVESPA
    'Saudi Arabia': '^TASI.SR',  # Tadawul All Shares Index
    'South Africa': '',
    'Mexico': '^MXX',  # IPC MEXICO
    'Thailand': '^SET.BK',  # SET Index
    'Indonesia': '^JKSE',  # IDX COMPOSITE
    'Malaysia': '^KLSE',  # FTSE Bursa Malaysia KLCI
    'Turkiye': 'XU100.IS',  # BIST 100
    'Philippines': 'PSEI.PS',  # PSEi INDEX
    'Poland': '',
    'Chile': '',
    'Greece': 'GD.AT',  # COMPOSITE INDEX
    'Hungary': '',
    'Czechia': '',
    'Colombia': '',
    'Argentina': '^MERV',  # MERVAL
    'Russia': 'IMOEX.ME'  # MOEX Russia Index
}

In [6]:
df = pd.DataFrame(index=pd.date_range(start_date, end_date, freq='B'),
                  columns=yahoo_references)

for country in yahoo_references:
    index_ticker = yahoo_references[country]
    if index_ticker != '':
        index_data = yf.download(index_ticker, start_date)
        df[country] = index_data['Adj Close']

df

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Denmark,...,Turkiye,Philippines,Poland,Chile,Greece,Hungary,Czechia,Colombia,Argentina,Russia
2023-11-30,4567.799805,33486.890625,7453.799805,20236.300781,7310.77002,10854.320312,16215.429688,7087.299805,765.039978,,...,7948.600098,6223.72998,,,1275.130005,,,,813393.9,3165.790039
2023-12-01,4594.629883,33431.511719,7529.399902,20452.900391,7346.149902,10887.360352,16397.519531,7073.200195,771.369995,,...,8026.299805,6245.180176,,,1280.109985,,,,870169.2,3142.290039
2023-12-04,4569.779785,33231.269531,7513.0,20410.199219,7332.589844,10952.44043,16404.759766,7124.700195,770.609985,,...,8087.200195,6284.370117,,,1275.339966,,,,845384.3,3114.409912
2023-12-05,4567.180176,32775.820312,7489.799805,20375.900391,7386.990234,10964.80957,16533.109375,7061.600098,774.070007,,...,8057.399902,6308.950195,,,1270.349976,,,,885270.4,3129.330078
2023-12-06,4549.339844,33445.898438,7515.399902,20274.199219,7435.990234,11001.620117,16656.439453,7178.399902,776.179993,,...,7855.100098,6305.850098,,,1273.47998,,,,894502.3,3079.5
2023-12-07,4585.589844,32858.308594,7513.700195,20278.5,7428.52002,10968.099609,16628.990234,7173.299805,776.340027,,...,7978.799805,6234.77002,,,1273.329956,,,,941829.9,3073.629883
2023-12-08,4604.370117,32307.859375,7554.5,20331.5,7526.549805,11071.769531,16759.220703,7194.899902,781.849976,,...,7913.799805,,,,1275.089966,,,,,3079.98999
2023-12-11,4622.439941,32791.800781,7544.899902,20318.400391,7551.529785,11130.360352,16794.429688,7199.0,786.450012,,...,7728.399902,6228.290039,,,1275.5,,,,976823.1,3026.169922
2023-12-12,4643.700195,32843.699219,7542.799805,20233.800781,7543.549805,11151.219727,16791.740234,7235.299805,786.130005,,...,7748.899902,6292.390137,,,1283.119995,,,,1010022.0,3019.889893
2023-12-13,4707.089844,32926.351562,7548.399902,20629.5,7531.220215,11188.910156,16766.050781,7257.799805,787.02002,,...,7529.299805,6255.740234,,,1292.349976,,,,1003484.0,3032.219971


In [10]:
investing_pages = {
    'Denmark': 'omx-copenhagen-20-historical-data',
    'Norway': 'oslo-obx-historical-data',
    'South Africa': 'ftse-jse-top-40-historical-data',
    'Poland': 'wig-20-historical-data',
    'Chile': 'ipsa-historical-data',
    'Hungary': 'ftse-hungary-historical-data',
    'Czechia': 'px-historical-data',
    'Colombia': 'ftse-colombia-historical-data'
}

In [11]:
def retrieve_prices_from_investing(path):
    service = Service(executable_path='../chromedriver.exe')
    options = webdriver.ChromeOptions()
    driver = webdriver.Chrome(service=service, options=options)

    base_url = 'https://www.investing.com/indices/'
    driver.get(f'{base_url}{path}')
    time.sleep(2)

    botton_cookies = driver.find_element(
        By.XPATH, '//*[@id="onetrust-accept-btn-handler"]'
    )
    botton_cookies.click()
    time.sleep(2)

    table = driver.find_element(
        By.XPATH, '//*[@id="__next"]'
    )
    dfs = pd.read_html(StringIO(table.get_attribute('innerHTML')))
    if path == 'oslo-obx-historical-data' or path == 'ftse-jse-top-40-historical-data':
        df_raw = dfs[1]
    else:
        df_raw = dfs[0]
    driver.close()

    df_raw.index = pd.to_datetime(df_raw['Date'])
    df_raw = df_raw.sort_index()
    return df_raw['Price']

In [12]:
for country in investing_pages:
    path = investing_pages[country]
    df_scrapped = retrieve_prices_from_investing(path)
    print(country, len(df_scrapped))
    df[country] = df_scrapped

Denmark 21
Norway 21
South Africa 20
Poland 21
Chile 20
Hungary 21
Czechia 20
Colombia 21


Date
2023-11-17    3606.12
2023-11-20    3596.42
2023-11-21    3606.61
2023-11-22    3624.06
2023-11-23    3582.51
2023-11-24    3618.83
2023-11-27    3596.39
2023-11-28    3600.67
2023-11-29    3603.34
2023-11-30    3632.12
2023-12-01    3648.01
2023-12-04    3673.18
2023-12-05    3639.55
2023-12-06    3577.66
2023-12-07    3640.75
2023-12-08    3640.75
2023-12-11    3614.24
2023-12-12    3620.08
2023-12-13    3675.84
2023-12-14    3696.49
2023-12-15    3609.84
Name: Price, dtype: float64

In [13]:
df = df.ffill()
df = df.bfill()
df

Unnamed: 0,United States,Japan,United Kingdom,Canada,France,Switzerland,Germany,Australia,Netherlands,Denmark,...,Turkiye,Philippines,Poland,Chile,Greece,Hungary,Czechia,Colombia,Argentina,Russia
2023-11-30,4567.799805,33486.890625,7453.799805,20236.300781,7310.77002,10854.320312,16215.429688,7087.299805,765.039978,2226.45,...,7948.600098,6223.72998,2215.25,5818.51,1275.130005,4749.02,1389.16,3632.12,813393.9,3165.790039
2023-12-01,4594.629883,33431.511719,7529.399902,20452.900391,7346.149902,10887.360352,16397.519531,7073.200195,771.369995,2233.68,...,8026.299805,6245.180176,2285.98,5886.71,1280.109985,4760.73,1390.38,3648.01,870169.2,3142.290039
2023-12-04,4569.779785,33231.269531,7513.0,20410.199219,7332.589844,10952.44043,16404.759766,7124.700195,770.609985,2225.67,...,8087.200195,6284.370117,2281.04,5823.36,1275.339966,4805.55,1397.46,3673.18,845384.3,3114.409912
2023-12-05,4567.180176,32775.820312,7489.799805,20375.900391,7386.990234,10964.80957,16533.109375,7061.600098,774.070007,2218.41,...,8057.399902,6308.950195,2298.78,5905.31,1270.349976,4794.25,1399.27,3639.55,885270.4,3129.330078
2023-12-06,4549.339844,33445.898438,7515.399902,20274.199219,7435.990234,11001.620117,16656.439453,7178.399902,776.179993,2204.68,...,7855.100098,6305.850098,2309.81,5920.01,1273.47998,4785.57,1411.36,3577.66,894502.3,3079.5
2023-12-07,4585.589844,32858.308594,7513.700195,20278.5,7428.52002,10968.099609,16628.990234,7173.299805,776.340027,2175.27,...,7978.799805,6234.77002,2288.84,5970.81,1273.329956,4784.6,1407.27,3640.75,941829.9,3073.629883
2023-12-08,4604.370117,32307.859375,7554.5,20331.5,7526.549805,11071.769531,16759.220703,7194.899902,781.849976,2182.76,...,7913.799805,6234.77002,2306.05,5970.81,1275.089966,4820.48,1407.55,3640.75,941829.9,3079.98999
2023-12-11,4622.439941,32791.800781,7544.899902,20318.400391,7551.529785,11130.360352,16794.429688,7199.0,786.450012,2178.44,...,7728.399902,6228.290039,2313.32,5884.78,1275.5,4790.5,1408.27,3614.24,976823.1,3026.169922
2023-12-12,4643.700195,32843.699219,7542.799805,20233.800781,7543.549805,11151.219727,16791.740234,7235.299805,786.130005,2149.68,...,7748.899902,6292.390137,2293.66,5913.5,1283.119995,4856.85,1406.03,3620.08,1010022.0,3019.889893
2023-12-13,4707.089844,32926.351562,7548.399902,20629.5,7531.220215,11188.910156,16766.050781,7257.799805,787.02002,2180.53,...,7529.299805,6255.740234,2298.31,6020.41,1292.349976,4844.99,1397.88,3675.84,1003484.0,3032.219971


In [14]:
df_new = pd.concat([historic_data, df.round(2).iloc[1:]])
df_new.to_csv('market_data/stock_indices.csv')