In [1]:
# Importing the necessary package 
import yfinance 

In [2]:
# Ignoring warning messages
import warnings 
warnings.filterwarnings("ignore")

In [21]:
# Using the .download() method to get our data
#
# tickers -> The time series we are interested in - (in our case, these are the S&P, FTSE, NIKKEI and DAX)
# start -> The starting date of our data set
# end -> The ending date of our data set (at the time of upload, this is the current date)
# interval -> The distance in time between two recorded observations. Since we're using daily closing prices, we set it equal to "1d", which indicates 1 day. 
# group_by -> The way we want to group the scraped data. Usually we want it to be "ticker", so that we have all the information about a time series in 1 variable.
# auto_adjust -> Automatically adjust the closing prices for each period. 
# treads - > Whether to use threads for mass downloading. 

raw_data = yfinance.download (tickers = "^IXIC", start = "2016-01-01", end = "2022-04-19", interval = "1d", group_by = 'ticker', auto_adjust = True, treads = True)


[*********************100%***********************]  1 of 1 completed


In [22]:
# Creating a back up copy in case we remove/alter elements of the data by mistake
df_comp = raw_data.copy()

In [23]:
df_comp.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-04,4897.649902,4903.089844,4846.97998,4903.089844,2218420000
2016-01-05,4917.839844,4926.72998,4872.740234,4891.430176,1927380000
2016-01-06,4813.759766,4866.040039,4804.689941,4835.759766,2168620000
2016-01-07,4736.399902,4788.02002,4688.169922,4689.430176,2552590000
2016-01-08,4722.02002,4742.569824,4637.850098,4643.629883,2288750000


In [24]:
# Removing the first elements, since we always start 1 period before the first, due to time zone differences of closing prices
df_comp = df_comp.iloc[1:]

In [25]:
df_comp=df_comp.asfreq('b') # Setting the frequency of the data
df_comp=df_comp.fillna(method='ffill') # Filling any missing values

In [26]:
print (df_comp.head()) # Displaying the first 5 elements to make sure the data was scraped correctly
print (df_comp.tail()) # Making sure the last day we're including in the series are correct

                   Open         High          Low        Close        Volume
Date                                                                        
2016-01-05  4917.839844  4926.729980  4872.740234  4891.430176  1.927380e+09
2016-01-06  4813.759766  4866.040039  4804.689941  4835.759766  2.168620e+09
2016-01-07  4736.399902  4788.020020  4688.169922  4689.430176  2.552590e+09
2016-01-08  4722.020020  4742.569824  4637.850098  4643.629883  2.288750e+09
2016-01-11  4673.439941  4683.020020  4573.779785  4637.990234  2.391110e+09
                    Open          High           Low         Close  \
Date                                                                 
2022-04-12  13584.690430  13685.950195  13317.740234  13371.570312   
2022-04-13  13373.120117  13679.429688  13353.660156  13643.589844   
2022-04-14  13647.429688  13662.929688  13345.219727  13351.080078   
2022-04-15  13647.429688  13662.929688  13345.219727  13351.080078   
2022-04-18  13319.389648  13414.269531  1

In [27]:
df_comp.to_csv(f'data/NASDAQ.csv', index=True)