In [1]:
# import the necesary package
import yfinance as yf

In [2]:
# ignoring warning messages
import warnings
# warnings.filterwarnings("ignore")

In [17]:
raw_data = yf.download(tickers= ['^GSPC', '^FTSE', '^N225', '^GDAXI'],
                       start= "1994-01-07",
                       end= "2022-12-27",
                       interval = "1d",
                       group_by = 'ticker',
                       auto_adjust = True,
                       threads = True)

# The time series we are interested in - (in our case, these are the S&P, FTSE, NIKKEI and DAX)
# The starting date of our data set
# The ending date of our data set (at the time of upload, this is the current date)
# The distance in time between two recorded observations. Since we're using daily closing prices, we set it equal to "1d", which indicates 1 day. 
# The way we want to group the scraped data. Usually we want it to be "ticker", so that we have all the information about a time series in 1 variable.
# Automatically adjuss the closing prices for each period.  
# Whether to use threads for mass downloading. 

[*********************100%***********************]  4 of 4 completed


In [18]:
# Creating a back up copy in case we remove/alter elements of the data by mistake
df = raw_data.copy()

In [19]:
df.head()

Unnamed: 0_level_0,^N225,^N225,^N225,^N225,^N225,^GSPC,^GSPC,^GSPC,^GSPC,^GSPC,^GDAXI,^GDAXI,^GDAXI,^GDAXI,^GDAXI,^FTSE,^FTSE,^FTSE,^FTSE,^FTSE
Unnamed: 0_level_1,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
1994-01-07,17842.980469,18131.410156,17787.480469,18124.009766,0.0,467.089996,470.26001,467.029999,469.899994,324920000.0,2218.959961,2227.639893,2201.820068,2224.949951,0.0,3401.399902,3446.800049,3398.699951,3446.0,0.0
1994-01-10,18186.519531,18567.060547,18186.519531,18443.439453,0.0,469.899994,475.269989,469.549988,475.269989,319490000.0,2231.840088,2238.01001,2222.0,2225.0,0.0,3465.699951,3468.100098,3430.0,3440.600098,0.0
1994-01-11,18481.849609,18671.669922,18373.039062,18485.25,0.0,475.269989,475.279999,473.269989,474.130005,305490000.0,2225.429932,2235.610107,2225.179932,2228.100098,0.0,3442.5,3442.5,3413.5,3413.800049,0.0
1994-01-12,18447.339844,18807.080078,18301.929688,18793.880859,0.0,474.130005,475.059998,472.140015,474.170013,310690000.0,2227.120117,2227.790039,2182.060059,2182.060059,0.0,3394.800049,3402.399902,3372.0,3372.0,0.0
1994-01-13,18770.380859,18823.380859,18548.75,18577.259766,0.0,474.170013,474.170013,471.799988,472.470001,277970000.0,2171.5,2183.709961,2134.100098,2142.370117,0.0,3380.699951,3383.300049,3356.899902,3360.0,0.0


In [20]:
# Adding new columns to the data set
df['spx'] = df['^GSPC']['Close']
df['dax'] = df['^GDAXI']['Close']
df['ftse'] = df['^FTSE']['Close']
df['nikkei'] = df['^N225']['Close']


In [27]:
# filtering only the spx, dax, ftse, and nikkei columns
# remove multicolumn index with .droplevel
df = df.loc[:, ['spx', 'dax', 'ftse', 'nikkei']].droplevel(1, axis=1)

In [30]:
df = df.asfreq('b')
# df.isna().sum()
# df[df.isna().any(axis=1)]

In [36]:
df=df.fillna(method='ffill') # Filling any missing values

In [37]:
df.head()

Unnamed: 0_level_0,spx,dax,ftse,nikkei
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1994-01-07,469.899994,2224.949951,3446.0,18124.009766
1994-01-10,475.269989,2225.0,3440.600098,18443.439453
1994-01-11,474.130005,2228.100098,3413.800049,18485.25
1994-01-12,474.170013,2182.060059,3372.0,18793.880859
1994-01-13,472.470001,2142.370117,3360.0,18577.259766


In [38]:
df.tail()

Unnamed: 0_level_0,spx,dax,ftse,nikkei
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-12-20,3821.620117,13884.660156,7370.600098,26568.029297
2022-12-21,3878.439941,14097.820312,7497.299805,26387.720703
2022-12-22,3822.389893,13914.070312,7469.299805,26507.869141
2022-12-23,3844.820068,13940.929688,7473.0,26235.25
2022-12-26,3844.820068,13940.929688,7473.0,26405.869141
