In [2]:
!pip install yfinance

Collecting yfinance
  Downloading yfinance-0.1.70-py2.py3-none-any.whl (26 kB)
Collecting lxml>=4.5.1
  Downloading lxml-4.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (6.4 MB)
[K     |████████████████████████████████| 6.4 MB 6.7 MB/s 
Collecting requests>=2.26
  Downloading requests-2.27.1-py2.py3-none-any.whl (63 kB)
[K     |████████████████████████████████| 63 kB 1.1 MB/s 
Installing collected packages: requests, lxml, yfinance
  Attempting uninstall: requests
    Found existing installation: requests 2.23.0
    Uninstalling requests-2.23.0:
      Successfully uninstalled requests-2.23.0
  Attempting uninstall: lxml
    Found existing installation: lxml 4.2.6
    Uninstalling lxml-4.2.6:
      Successfully uninstalled lxml-4.2.6
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires requests~

In [3]:
import yfinance as yf
import pandas as pd

def get_finance_data(ticker_list: list, start_date: str, end_date: str, data_interval: str):
    """ 
    Retrieves stock price and/or market index data for multiple stocks and/or market indexes in a given time period. 
    The data is stored in a single dataframe.

    Paramters
    ----------
    ticker_list : list
        list containing Yahoo Finance tickers to pull data from
    start_date : str 
        start date of when to pull data from in the format "YYYY-MM-DD"
    end_date : str 
        end date of when to pull data from in the format "YYYY-MM-DD"
    data_interval : str 
        interval to fetch data
        valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo

    Returns
    ----------
    df : pandas.DataFrame
        dataframe with stock price / market index data over defined time period
    """

    # get data and store in df
    df = yf.download(ticker_list, start = start_date, end = end_date, interval = data_interval)['Close']

    # rename ticker columns to their full name
    for x in df.columns:
        df = df.rename(columns={x: yf.Ticker(x).info.get('shortName')})

    return df

# input ticker symbols # international market indexes list # tickers_list = (['^GSPC', '^DJI', '^IXIC', '^RUT','GSPTSE', '^STI', '^HSI', '^BVSP', '^GDAXI', '^IBEX', '^FTSE', '^FCHI'])
tickers_list = ['^GSPC', '^DJI', '^IXIC', '^RUT']

# input start date and time period, and call function to get data
df_merged = get_finance_data(tickers_list, '2017-04-01', '2021-04-01', '3mo')
df_merged.head()

# calculate percentage change over defined time period for each stock/index
df_change = df_merged.pct_change()
df_change = df_change.add_suffix('_%')

df_change.head()

# join market index values and percentage change values
df_final = df_merged.join(df_change)

# save as csv file
df_final.to_csv('market_data.csv')

[*********************100%***********************]  4 of 4 completed


In [4]:
df_final.head()

Unnamed: 0_level_0,Dow Jones Industrial Average,S&P 500,NASDAQ Composite,Russell 2000,Dow Jones Industrial Average_%,S&P 500_%,NASDAQ Composite_%,Russell 2000_%
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2017-04-01,21349.630859,2423.409912,6140.419922,1415.359985,,,,
2017-07-01,22405.089844,2519.360107,6495.959961,1490.859985,0.049437,0.039593,0.057902,0.053343
2017-10-01,24719.220703,2673.610107,6903.390137,1535.51001,0.103286,0.061226,0.062721,0.029949
2018-01-01,24103.109375,2640.870117,7063.450195,1529.430054,-0.024924,-0.012246,0.023186,-0.00396
2018-04-01,24271.410156,2718.370117,7510.299805,1643.069946,0.006983,0.029346,0.063262,0.074302
