yfinance (https://pypi.org/project/yfinance/) is an open-source toll
that uses Yahoo's publicly available APIs to download finance data.

GOOD FOR >30m resolution, but has LIMITATIONS for others:

- Minute Data: 7 days
- 2 Minute Data: 60 days
- 5 Minute Data: 60 days
- 15 Minute Data: 60 days
- 30 Minute Data: 60 days
- Hourly Data: 730 days
- Daily/Weekly/Monthly: No limit

valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max

valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo

<!-- EXAMPLE USAGE:

tickers = 'MA V'
start = '2011-12-30'
end = '2022-01-01'
data = pd.DataFrame()
data = yf.download(tickers, start, end)['Close']
data
yf.Ticker("MA").calendar # next event
yf.Ticker("MA").earnings_dates # historical events
yf.Ticker("MA").recommendations # grades
yf.Ticker("MA").actions # dividends & splits -->

In [543]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
from pathlib import Path

# define data path
Path("data").mkdir(parents=True, exist_ok=True)

### GET TICKER NAMES

In [544]:
# get dataframe from the previous step
df = pd.read_pickle("pairs_to_download.pkl")

tickerStrings = list(df.index.union(df.columns))
tickerStrings

['AEE',
 'AEP',
 'CMS',
 'CNP',
 'DUK',
 'ED',
 'NRG',
 'OGE',
 'SO',
 'VST',
 'WEC',
 'XEL']

### DOWNLOAD & CREATE CSV FILE

#### OPTION 1 (DOWNLOAD  & CREATE A FILE FOR EACH TICKER PER INTERVAL)

In [545]:
int_per = {'1d':'1y', '1h':'1y'}  # define interval and corresponding period

# enable to enter manually
#tickerStrings = ['MA', 'V', 'LNT', 'FTS', 'POR', 'CMS', 'OUT', 'WELL']

for ticker in tickerStrings:
    for key in int_per:
        data = yf.download(ticker, group_by="Ticker", period=int_per[key], interval=key)
        data['ticker'] = ticker
        data.set_index(data.columns[0]) # datetime row comes with different names or unnamed
        data.index.names = ['time']
    
        # use in need of sorting and renaming
        #data = data.set_index(["time"]).sort_index()
        #data = data.rename(columns={"Date": "time"})

        # save as seperate files
        data.to_csv(f'data/{ticker}_{key.upper()}.csv')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Check if downloaded correctly:

In [546]:
filename = 'data/' +  tickerStrings[0] + '_1h.csv'
df = pd.read_csv(filename, parse_dates=["time"])
df

Unnamed: 0,time,Open,High,Low,Close,Adj Close,Volume,ticker
0,2021-10-27 09:30:00-04:00,85.610001,85.684998,84.709999,84.839996,84.839996,0,AEE
1,2021-10-27 10:30:00-04:00,84.879997,85.220001,84.760002,84.980003,84.980003,78300,AEE
2,2021-10-27 11:30:00-04:00,85.000000,85.070000,84.760002,84.779999,84.779999,71290,AEE
3,2021-10-27 12:30:00-04:00,84.760002,84.760002,84.300003,84.610001,84.610001,56404,AEE
4,2021-10-27 13:30:00-04:00,84.589996,84.705002,84.389999,84.599998,84.599998,89561,AEE
...,...,...,...,...,...,...,...,...
1757,2022-10-26 12:30:00-04:00,79.720001,80.070000,79.629997,79.970001,79.970001,67476,AEE
1758,2022-10-26 13:30:00-04:00,79.970001,80.050003,79.660004,79.680000,79.680000,60391,AEE
1759,2022-10-26 14:30:00-04:00,79.669998,79.730003,79.480003,79.610001,79.610001,106762,AEE
1760,2022-10-26 15:30:00-04:00,79.635002,79.839996,79.459999,79.550003,79.550003,174665,AEE


#### OPTION 2 (DOWNLOAD  & CREATE SINGLE DF FROM ALL TICKERS)

In [547]:
int_per = {'1d':'1y', '1h':'1y'}  # define interval and corresponding period

df_list = list()

for key in int_per:
    for ticker in tickerStrings:
        data = yf.download(ticker, group_by="Ticker", period=int_per[key], interval=key)
        data['ticker'] = ticker
        data.set_index(data.columns[0])
        data.index.names = ['time']
        df_list.append(data)

    # combine all dataframes into a single dataframe
    df = pd.concat(df_list)

    # save to csv
    df.to_csv('data/tickers_'+key.upper()+'.csv')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Check if downloaded correctly:

In [548]:
filename = 'data/tickers_1D.csv'
df = pd.read_csv(filename, parse_dates=["time"])
df

Unnamed: 0,time,Open,High,Low,Close,Adj Close,Volume,ticker
0,2021-10-27 00:00:00-04:00,85.610001,85.690002,84.300003,84.459999,82.312729,649800,AEE
1,2021-10-28 00:00:00-04:00,84.139999,84.940002,84.089996,84.809998,82.653831,528100,AEE
2,2021-10-29 00:00:00-04:00,84.709999,85.290001,83.800003,84.290001,82.147049,814200,AEE
3,2021-11-01 00:00:00-04:00,84.440002,84.580002,83.529999,84.279999,82.137299,911000,AEE
4,2021-11-02 00:00:00-04:00,84.580002,84.860001,83.540001,84.330002,82.186035,919800,AEE
...,...,...,...,...,...,...,...,...
3019,2022-10-20 00:00:00-04:00,61.779999,61.779999,59.400002,59.730000,59.730000,6537600,XEL
3020,2022-10-21 00:00:00-04:00,59.840000,61.240002,59.430000,60.900002,60.900002,3776000,XEL
3021,2022-10-24 00:00:00-04:00,61.439999,62.090000,61.080002,61.580002,61.580002,4027800,XEL
3022,2022-10-25 00:00:00-04:00,61.880001,62.669998,61.730000,62.400002,62.400002,2774300,XEL


In [549]:
df_c = df.set_index(["ticker", "time"]).sort_index() # set indexes
df_c.xs(tickerStrings[0]) # check the first ticker

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-10-27 00:00:00-04:00,85.610001,85.690002,84.300003,84.459999,82.312729,649800
2021-10-28 00:00:00-04:00,84.139999,84.940002,84.089996,84.809998,82.653831,528100
2021-10-29 00:00:00-04:00,84.709999,85.290001,83.800003,84.290001,82.147049,814200
2021-11-01 00:00:00-04:00,84.440002,84.580002,83.529999,84.279999,82.137299,911000
2021-11-02 00:00:00-04:00,84.580002,84.860001,83.540001,84.330002,82.186035,919800
...,...,...,...,...,...,...
2022-10-20 00:00:00-04:00,78.279999,78.279999,76.269997,76.620003,76.620003,1465800
2022-10-21 00:00:00-04:00,76.879997,78.230003,76.169998,77.580002,77.580002,1199600
2022-10-24 00:00:00-04:00,78.089996,78.820000,77.250000,77.870003,77.870003,1959300
2022-10-25 00:00:00-04:00,78.019997,79.669998,77.839996,79.519997,79.519997,1884300


#### EXERCISE (DOWNLOAD MULTIPLE TICKERS AND FLATTEN THE LEVELS )

In [550]:
data = yf.download(  # or pdr.get_data_yahoo(...
        # tickers list or string as well
        tickers = "OUT WELL",

        # use "period" instead of start/end
        # valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
        # (optional, default is '1mo')
        period = "1mo",

        # fetch data by interval (including intraday if period < 60 days)
        # valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
        # (optional, default is '1d')
        interval = "30m",

        # group by ticker (to access via data['SPY'])
        # (optional, default is 'column')
        group_by = 'ticker',

        # adjust all OHLC automatically
        # (optional, default is False)
        auto_adjust = True,

        # download pre/post regular market hours data
        # (optional, default is False)
        prepost = False,

        # use threads for mass downloading? (True/False/Integer)
        # (optional, default is True)
        threads = True,

        # proxy URL scheme use use when downloading?
        # (optional, default is None)
        proxy = None
    )
data

[*********************100%***********************]  2 of 2 completed


Unnamed: 0_level_0,OUT,OUT,OUT,OUT,OUT,WELL,WELL,WELL,WELL,WELL
Unnamed: 0_level_1,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2022-09-27 09:30:00-04:00,15.580000,15.720000,15.490000,15.630000,82537,66.389999,66.800003,65.629997,65.629997,141446
2022-09-27 10:00:00-04:00,15.640000,15.640000,15.460000,15.520000,94120,65.654999,65.654999,64.769997,64.980003,184573
2022-09-27 10:30:00-04:00,15.520000,15.700000,15.520000,15.650000,61978,64.980003,65.809998,64.940002,65.610001,99256
2022-09-27 11:00:00-04:00,15.660000,15.740000,15.580000,15.590000,91325,65.589996,65.709999,65.169998,65.206596,106584
2022-09-27 11:30:00-04:00,15.585000,15.650000,15.530000,15.560000,108421,65.197800,65.370003,65.029999,65.070000,85504
...,...,...,...,...,...,...,...,...,...,...
2022-10-26 14:00:00-04:00,18.219999,18.250000,18.120001,18.155001,166529,60.715000,60.799999,60.549999,60.575001,104434
2022-10-26 14:30:00-04:00,18.155001,18.180000,17.980000,17.980000,119306,60.580002,60.615700,60.369999,60.439999,135778
2022-10-26 15:00:00-04:00,17.959999,17.959999,17.820000,17.910000,134603,60.470001,60.500000,60.299999,60.330002,178213
2022-10-26 15:30:00-04:00,17.910000,18.055000,17.900000,18.025000,429489,60.349998,60.520000,60.250000,60.439999,370748


to flatten the MultiIndex use map with join:

In [551]:
data_flat = data.copy()
data_flat.columns = data_flat.columns.map('_'.join)
data_flat =data_flat.reset_index()
data_flat

Unnamed: 0,Datetime,OUT_Open,OUT_High,OUT_Low,OUT_Close,OUT_Volume,WELL_Open,WELL_High,WELL_Low,WELL_Close,WELL_Volume
0,2022-09-27 09:30:00-04:00,15.580000,15.720000,15.490000,15.630000,82537,66.389999,66.800003,65.629997,65.629997,141446
1,2022-09-27 10:00:00-04:00,15.640000,15.640000,15.460000,15.520000,94120,65.654999,65.654999,64.769997,64.980003,184573
2,2022-09-27 10:30:00-04:00,15.520000,15.700000,15.520000,15.650000,61978,64.980003,65.809998,64.940002,65.610001,99256
3,2022-09-27 11:00:00-04:00,15.660000,15.740000,15.580000,15.590000,91325,65.589996,65.709999,65.169998,65.206596,106584
4,2022-09-27 11:30:00-04:00,15.585000,15.650000,15.530000,15.560000,108421,65.197800,65.370003,65.029999,65.070000,85504
...,...,...,...,...,...,...,...,...,...,...,...
282,2022-10-26 14:00:00-04:00,18.219999,18.250000,18.120001,18.155001,166529,60.715000,60.799999,60.549999,60.575001,104434
283,2022-10-26 14:30:00-04:00,18.155001,18.180000,17.980000,17.980000,119306,60.580002,60.615700,60.369999,60.439999,135778
284,2022-10-26 15:00:00-04:00,17.959999,17.959999,17.820000,17.910000,134603,60.470001,60.500000,60.299999,60.330002,178213
285,2022-10-26 15:30:00-04:00,17.910000,18.055000,17.900000,18.025000,429489,60.349998,60.520000,60.250000,60.439999,370748


or use index values to get the data

save as a file

In [555]:
multiindex = data.columns
ticker_list = set([item[0] for item in multiindex])

for ticker in ticker_list:
    data_i = data[(ticker, 'Close')].reset_index().droplevel(level=0, axis=1)
    data_i = data_i.rename(columns={ data_i.columns[0]: "time" })
    data_i = data_i.set_index(["time"]).sort_index()
    #save as seperate files
    data_i.to_csv(f'data/ticker_{ticker}.csv')