yfinance (https://pypi.org/project/yfinance/) is an open-source toll
that uses Yahoo's publicly available APIs to download finance data.

GOOD FOR >30m resolution, but has LIMITATIONS for others:

- Minute Data: 7 days
- 2 Minute Data: 60 days
- 5 Minute Data: 60 days
- 15 Minute Data: 60 days
- 30 Minute Data: 60 days
- Hourly Data: 730 days
- Daily/Weekly/Monthly: No limit

valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max

valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo

<!-- EXAMPLE USAGE:

tickers = 'MA V'
start = '2011-12-30'
end = '2022-01-01'
data = pd.DataFrame()
data = yf.download(tickers, start, end)['Close']
data
yf.Ticker("MA").calendar # next event
yf.Ticker("MA").earnings_dates # historical events
yf.Ticker("MA").recommendations # grades
yf.Ticker("MA").actions # dividends & splits -->

In [166]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
from pathlib import Path

# define data path
Path("data").mkdir(parents=True, exist_ok=True)

### GET TICKER NAMES

In [167]:
# get dataframe from the previous step
df = pd.read_pickle("pairs_to_download.pkl")

tickerStrings = list(df.index.union(df.columns))
tickerStrings

['APO', 'BAC', 'BEN', 'BX', 'C', 'COF', 'KKR', 'RF', 'SCHW', 'STT', 'USB']

### DOWNLOAD & CREATE CSV FILE

#### SETUP

In [168]:
int_per = {'1d':'1y', '1h':'1y'}  # define interval and corresponding period

#### OPTION 1 (DOWNLOAD  & CREATE A FILE FOR EACH TICKER PER INTERVAL)

In [169]:
# int_per = {'1d':'3mo', '1h':'3mo'}  # define interval and corresponding period


# enable to enter manually
#tickerStrings = ['MA', 'V', 'LNT', 'FTS', 'POR', 'CMS', 'OUT', 'WELL']

for ticker in tickerStrings:
    for key in int_per:
        data = yf.download(ticker, group_by="Ticker", period=int_per[key], interval=key)
        data['ticker'] = ticker
        data.set_index(data.columns[0]) # datetime row comes with different names or unnamed
        data.index.names = ['time']
    
        # use in need of sorting and renaming
        #data = data.set_index(["time"]).sort_index()
        #data = data.rename(columns={"Date": "time"})

        # save as seperate files
        data.to_csv(f'data/{ticker}_{key.upper()}.csv')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Check if downloaded correctly:

In [170]:
filename = 'data/' +  tickerStrings[0] + '_1H.csv'
df = pd.read_csv(filename, parse_dates=["time"])
df

Unnamed: 0,time,Open,High,Low,Close,Adj Close,Volume,ticker
0,2021-11-01 13:30:00-04:00,76.324997,76.639999,76.139999,76.290001,76.290001,1735242,APO
1,2021-11-01 14:30:00-04:00,76.330002,76.470001,75.879997,75.974998,75.974998,756397,APO
2,2021-11-01 15:30:00-04:00,76.000000,76.519997,75.879997,76.349998,76.349998,1640415,APO
3,2021-11-02 09:30:00-04:00,76.114403,76.309998,74.239998,75.169998,75.169998,970010,APO
4,2021-11-02 10:30:00-04:00,74.860001,76.149902,74.790001,75.669998,75.669998,757594,APO
...,...,...,...,...,...,...,...,...
1759,2022-11-01 10:30:00-04:00,57.000000,57.250000,56.630001,57.209999,57.209999,453351,APO
1760,2022-11-01 11:30:00-04:00,57.224998,57.349998,56.639999,57.029999,57.029999,293098,APO
1761,2022-11-01 12:30:00-04:00,57.020000,57.330002,56.939999,57.139999,57.139999,187120,APO
1762,2022-11-01 13:30:00-04:00,57.130001,57.169998,57.119999,57.150002,57.150002,12680,APO


#### OPTION 2 (DOWNLOAD  & CREATE SINGLE DF FROM ALL TICKERS)

In [171]:
# int_per = {'1d':'1y', '1h':'1y'}  # define interval and corresponding period

df_list = list()

for key in int_per:
    for ticker in tickerStrings:
        data = yf.download(ticker, group_by="Ticker", period=int_per[key], interval=key)
        data['ticker'] = ticker
        data.index.names = ['time']
        df_list.append(data)

    # combine all dataframes into a single dataframe
    df = pd.concat(df_list)

    # save to csv
    df.to_csv('data/tickers_'+key.upper()+'.csv')
    
    df_list = []

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Check if downloaded correctly:

In [172]:
filename = 'data/tickers_1H.csv'
df = pd.read_csv(filename, parse_dates=["time"])

In [173]:
df_c = df.set_index(["ticker", "time"]).sort_index() # set indexes
df_c
df_c.xs(tickerStrings[0]) # check the first ticker

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-11-01 13:30:00-04:00,76.324997,76.639999,76.139999,76.290001,76.290001,1735242
2021-11-01 14:30:00-04:00,76.330002,76.470001,75.879997,75.974998,75.974998,756397
2021-11-01 15:30:00-04:00,76.000000,76.519997,75.879997,76.349998,76.349998,1640415
2021-11-02 09:30:00-04:00,76.114403,76.309998,74.239998,75.169998,75.169998,970010
2021-11-02 10:30:00-04:00,74.860001,76.149902,74.790001,75.669998,75.669998,757594
...,...,...,...,...,...,...
2022-11-01 10:30:00-04:00,57.000000,57.250000,56.630001,57.209999,57.209999,453351
2022-11-01 11:30:00-04:00,57.224998,57.349998,56.639999,57.029999,57.029999,293098
2022-11-01 12:30:00-04:00,57.020000,57.330002,56.939999,57.139999,57.139999,187765
2022-11-01 13:30:00-04:00,57.130001,57.169998,57.119999,57.150002,57.150002,12680


#### EXERCISE (DOWNLOAD MULTIPLE TICKERS AND FLATTEN THE LEVELS )

In [174]:
data = yf.download(  # or pdr.get_data_yahoo(...
        # tickers list or string as well
        tickers = "OUT WELL",

        # use "period" instead of start/end
        # valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
        # (optional, default is '1mo')
        period = "1mo",

        # fetch data by interval (including intraday if period < 60 days)
        # valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
        # (optional, default is '1d')
        interval = "30m",

        # group by ticker (to access via data['SPY'])
        # (optional, default is 'column')
        group_by = 'ticker',

        # adjust all OHLC automatically
        # (optional, default is False)
        auto_adjust = True,

        # download pre/post regular market hours data
        # (optional, default is False)
        prepost = False,

        # use threads for mass downloading? (True/False/Integer)
        # (optional, default is True)
        threads = True,

        # proxy URL scheme use use when downloading?
        # (optional, default is None)
        proxy = None
    )
data

[*********************100%***********************]  2 of 2 completed


Unnamed: 0_level_0,WELL,WELL,WELL,WELL,WELL,OUT,OUT,OUT,OUT,OUT
Unnamed: 0_level_1,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2022-10-03 09:30:00-04:00,65.099998,65.300003,63.849998,64.169998,256361,15.520000,15.540000,15.010000,15.100000,70525
2022-10-03 10:00:00-04:00,64.220001,65.029999,64.180000,64.995003,222513,15.140000,15.470000,15.120000,15.450000,83731
2022-10-03 10:30:00-04:00,65.029999,65.309998,64.750000,65.260002,226219,15.460000,15.500000,15.375000,15.410000,53183
2022-10-03 11:00:00-04:00,65.250000,65.500000,65.110001,65.160004,151454,15.410000,15.565000,15.410000,15.500000,51303
2022-10-03 11:30:00-04:00,65.199997,65.285004,64.750000,64.820000,102661,15.500000,15.520000,15.310000,15.350000,48560
...,...,...,...,...,...,...,...,...,...,...
2022-11-01 11:30:00-04:00,60.430000,60.430000,60.090000,60.290001,60996,18.320000,18.379999,18.240000,18.375000,53978
2022-11-01 12:00:00-04:00,60.259998,60.470001,60.029999,60.029999,99617,18.370001,18.490000,18.350000,18.405001,74688
2022-11-01 12:30:00-04:00,60.025002,60.040001,59.860001,59.990002,44902,18.410000,18.480000,18.375000,18.459999,57331
2022-11-01 13:00:00-04:00,59.984699,60.299999,59.939999,60.160000,71972,18.459999,18.600000,18.450001,18.480000,79032


to flatten the MultiIndex use map with join:

In [175]:
data_flat = data.copy()
data_flat.columns = data_flat.columns.map('_'.join)
data_flat =data_flat.reset_index()
data_flat

Unnamed: 0,Datetime,WELL_Open,WELL_High,WELL_Low,WELL_Close,WELL_Volume,OUT_Open,OUT_High,OUT_Low,OUT_Close,OUT_Volume
0,2022-10-03 09:30:00-04:00,65.099998,65.300003,63.849998,64.169998,256361,15.520000,15.540000,15.010000,15.100000,70525
1,2022-10-03 10:00:00-04:00,64.220001,65.029999,64.180000,64.995003,222513,15.140000,15.470000,15.120000,15.450000,83731
2,2022-10-03 10:30:00-04:00,65.029999,65.309998,64.750000,65.260002,226219,15.460000,15.500000,15.375000,15.410000,53183
3,2022-10-03 11:00:00-04:00,65.250000,65.500000,65.110001,65.160004,151454,15.410000,15.565000,15.410000,15.500000,51303
4,2022-10-03 11:30:00-04:00,65.199997,65.285004,64.750000,64.820000,102661,15.500000,15.520000,15.310000,15.350000,48560
...,...,...,...,...,...,...,...,...,...,...,...
277,2022-11-01 11:30:00-04:00,60.430000,60.430000,60.090000,60.290001,60996,18.320000,18.379999,18.240000,18.375000,53978
278,2022-11-01 12:00:00-04:00,60.259998,60.470001,60.029999,60.029999,99617,18.370001,18.490000,18.350000,18.405001,74688
279,2022-11-01 12:30:00-04:00,60.025002,60.040001,59.860001,59.990002,44902,18.410000,18.480000,18.375000,18.459999,57331
280,2022-11-01 13:00:00-04:00,59.984699,60.299999,59.939999,60.160000,71972,18.459999,18.600000,18.450001,18.480000,79032


or use index values to get the data

save as a file

In [176]:
multiindex = data.columns
ticker_list = set([item[0] for item in multiindex])

for ticker in ticker_list:
    data_i = data[(ticker, 'Close')].reset_index().droplevel(level=0, axis=1)
    data_i = data_i.rename(columns={ data_i.columns[0]: "time" })
    data_i = data_i.set_index(["time"]).sort_index()
    #save as seperate files
    data_i.to_csv(f'data/ticker_{ticker}.csv')