yfinance (https://pypi.org/project/yfinance/) is an open-source toll
that uses Yahoo's publicly available APIs to download finance data.

GOOD FOR >30m resolution, but has LIMITATIONS for others:

- Minute Data: 7 days
- 2 Minute Data: 60 days
- 5 Minute Data: 60 days
- 15 Minute Data: 60 days
- 30 Minute Data: 60 days
- Hourly Data: 730 days
- Daily/Weekly/Monthly: No limit

valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max

valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo

<!-- EXAMPLE USAGE:

tickers = 'MA V'
start = '2011-12-30'
end = '2022-01-01'
data = pd.DataFrame()
data = yf.download(tickers, start, end)['Close']
data
yf.Ticker("MA").calendar # next event
yf.Ticker("MA").earnings_dates # historical events
yf.Ticker("MA").recommendations # grades
yf.Ticker("MA").actions # dividends & splits -->

In [300]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt

### GET TICKER NAMES

In [301]:
# get dataframe from the previous step
df = pd.read_pickle("pairs_to_download.pkl")

tickerStrings = list(df.index.union(df.columns))
tickerStrings

['D', 'ES', 'NI', 'OGE', 'SO', 'WEC', 'WTRG', 'XEL']

### DOWNLOAD & CREATE CSV FILE

#### OPTION 1 (DOWNLOAD  & CREATE A FILE FOR EACH TICKER PER INTERVAL)

In [302]:
int_per = {'1d':'5y', '1h':'2y'}  # define interval and corresponding period

# enable to enter manually
# tickerStrings = ['MA', 'V', 'LNT', 'FTS', 'POR', 'CMS', 'OUT', 'WELL']

for ticker in tickerStrings:
    for key in int_per:
        data = yf.download(ticker, group_by="Ticker", period=int_per[key], interval=key)
        data['ticker'] = ticker
        data.set_index(data.columns[0]) # datetime row comes with different names or unnamed
        data.index.names = ['time']
    
        # use in need of sorting and renaming
        #data = data.set_index(["time"]).sort_index()
        #data = data.rename(columns={"Date": "time"})

        # save as seperate files
        data.to_csv(f'{ticker}_{key.upper()}.csv')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Check if downloaded correctly:

In [305]:
filename = 'D_1h.csv'
df = pd.read_csv(filename, parse_dates=["time"])
df

Unnamed: 0,time,Open,High,Low,Close,Adj Close,Volume,ticker
0,2020-10-19 09:30:00-04:00,81.639999,81.989998,81.269997,81.930000,81.930000,382793,D
1,2020-10-19 10:30:00-04:00,81.910004,81.970001,80.980003,81.230003,81.230003,280087,D
2,2020-10-19 11:30:00-04:00,81.199997,81.360001,81.082199,81.139999,81.139999,163769,D
3,2020-10-19 12:30:00-04:00,81.120003,81.370003,81.080002,81.220001,81.220001,175051,D
4,2020-10-19 13:30:00-04:00,81.220001,81.245003,80.930000,81.180000,81.180000,226571,D
...,...,...,...,...,...,...,...,...
3512,2022-10-18 09:30:00-04:00,66.550003,67.059998,66.220001,66.769997,66.769997,537007,D
3513,2022-10-18 10:30:00-04:00,66.760002,66.940002,66.230003,66.370003,66.370003,348994,D
3514,2022-10-18 11:30:00-04:00,66.400002,66.639999,66.220001,66.500000,66.500000,263041,D
3515,2022-10-18 12:30:00-04:00,66.510002,66.730003,66.290001,66.389999,66.389999,140749,D


#### OPTION 2 (DOWNLOAD  & CREATE SINGLE DF FROM ALL TICKERS)

In [229]:
tickerStrings = ['MA', 'V']
df_list = list()
for ticker in tickerStrings:
    data = yf.download(ticker, group_by="Ticker", period='6mo')
    data['ticker'] = ticker
    data.set_index(data.columns[0])
    data.index.names = ['time']
    data = data.rename(columns={"Date": "time"})
    df_list.append(data)

# combine all dataframes into a single dataframe
df = pd.concat(df_list)

# save to csv
df.to_csv('ticker.csv')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Check if downloaded correctly:

In [273]:
filename = 'ticker.csv'
df = pd.read_csv(filename, parse_dates=["time"])
df

Unnamed: 0,time,Open,High,Low,Close,Adj Close,Volume,ticker
0,2022-04-18 00:00:00-04:00,353.989990,360.760010,353.220001,356.799988,355.680084,1871400,MA
1,2022-04-19 00:00:00-04:00,356.809998,364.670013,356.070007,363.869995,362.727905,2999300,MA
2,2022-04-20 00:00:00-04:00,366.660004,371.890015,363.869995,365.290009,364.143463,3377300,MA
3,2022-04-21 00:00:00-04:00,369.480011,375.220001,363.459991,364.440002,363.296143,3605300,MA
4,2022-04-22 00:00:00-04:00,360.500000,362.100006,350.609985,351.179993,350.077759,2968800,MA
...,...,...,...,...,...,...,...,...
247,2022-10-10 00:00:00-04:00,184.130005,184.779999,180.899994,182.179993,182.179993,4743800,V
248,2022-10-11 00:00:00-04:00,180.369995,181.639999,178.190002,179.139999,179.139999,6294500,V
249,2022-10-12 00:00:00-04:00,179.089996,180.589996,178.220001,178.240005,178.240005,4884300,V
250,2022-10-13 00:00:00-04:00,175.000000,185.369995,174.600006,184.660004,184.660004,8426400,V


#### OPTION 3 (DOWNLOAD MULTIPLE TICKERS AND FLATTEN THE LEVELS )

In [231]:
data = yf.download(  # or pdr.get_data_yahoo(...
        # tickers list or string as well
        tickers = "OUT WELL",

        # use "period" instead of start/end
        # valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
        # (optional, default is '1mo')
        period = "1mo",

        # fetch data by interval (including intraday if period < 60 days)
        # valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
        # (optional, default is '1d')
        interval = "30m",

        # group by ticker (to access via data['SPY'])
        # (optional, default is 'column')
        group_by = 'ticker',

        # adjust all OHLC automatically
        # (optional, default is False)
        auto_adjust = True,

        # download pre/post regular market hours data
        # (optional, default is False)
        prepost = False,

        # use threads for mass downloading? (True/False/Integer)
        # (optional, default is True)
        threads = True,

        # proxy URL scheme use use when downloading?
        # (optional, default is None)
        proxy = None
    )
data

[*********************100%***********************]  2 of 2 completed


Unnamed: 0_level_0,OUT,OUT,OUT,OUT,OUT,WELL,WELL,WELL,WELL,WELL
Unnamed: 0_level_1,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2022-09-15 09:30:00-04:00,19.190001,19.520000,19.030001,19.500000,193784,,,,,
2022-09-15 10:00:00-04:00,19.510000,19.594999,19.340000,19.360001,256085,,,,,
2022-09-15 10:30:00-04:00,19.360001,19.395000,18.959999,18.990000,265663,,,,,
2022-09-15 11:00:00-04:00,18.980000,19.025000,18.809999,18.990000,241883,,,,,
2022-09-15 11:30:00-04:00,18.995001,19.040001,18.860001,18.930000,194440,,,,,
...,...,...,...,...,...,...,...,...,...,...
2022-10-14 14:00:00-04:00,16.264999,16.275000,16.135000,16.155001,65614,58.259998,58.279999,58.130001,58.209999,77579.0
2022-10-14 14:30:00-04:00,16.139999,16.150000,16.080000,16.110001,51679,58.200001,58.200001,57.919998,58.000000,146335.0
2022-10-14 15:00:00-04:00,16.115000,16.135000,15.985000,16.045000,111031,58.000000,58.150002,57.740002,58.020000,136505.0
2022-10-14 15:30:00-04:00,16.045000,16.080000,15.940000,15.960000,255359,58.009998,58.110001,57.810001,57.860001,615895.0


to flatten the MultiIndex use map with join:

In [232]:
data_flat = data.copy()
data_flat.columns = data_flat.columns.map('_'.join)
data_flat =data_flat.reset_index()
data_flat

Unnamed: 0,Datetime,OUT_Open,OUT_High,OUT_Low,OUT_Close,OUT_Volume,WELL_Open,WELL_High,WELL_Low,WELL_Close,WELL_Volume
0,2022-09-15 09:30:00-04:00,19.190001,19.520000,19.030001,19.500000,193784,,,,,
1,2022-09-15 10:00:00-04:00,19.510000,19.594999,19.340000,19.360001,256085,,,,,
2,2022-09-15 10:30:00-04:00,19.360001,19.395000,18.959999,18.990000,265663,,,,,
3,2022-09-15 11:00:00-04:00,18.980000,19.025000,18.809999,18.990000,241883,,,,,
4,2022-09-15 11:30:00-04:00,18.995001,19.040001,18.860001,18.930000,194440,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
282,2022-10-14 14:00:00-04:00,16.264999,16.275000,16.135000,16.155001,65614,58.259998,58.279999,58.130001,58.209999,77579.0
283,2022-10-14 14:30:00-04:00,16.139999,16.150000,16.080000,16.110001,51679,58.200001,58.200001,57.919998,58.000000,146335.0
284,2022-10-14 15:00:00-04:00,16.115000,16.135000,15.985000,16.045000,111031,58.000000,58.150002,57.740002,58.020000,136505.0
285,2022-10-14 15:30:00-04:00,16.045000,16.080000,15.940000,15.960000,255359,58.009998,58.110001,57.810001,57.860001,615895.0


or use index values to get the data

In [233]:
print(data.columns)
data[('OUT',   'Close')]

MultiIndex([( 'OUT',   'Open'),
            ( 'OUT',   'High'),
            ( 'OUT',    'Low'),
            ( 'OUT',  'Close'),
            ( 'OUT', 'Volume'),
            ('WELL',   'Open'),
            ('WELL',   'High'),
            ('WELL',    'Low'),
            ('WELL',  'Close'),
            ('WELL', 'Volume')],
           )


Datetime
2022-09-15 09:30:00-04:00    19.500000
2022-09-15 10:00:00-04:00    19.360001
2022-09-15 10:30:00-04:00    18.990000
2022-09-15 11:00:00-04:00    18.990000
2022-09-15 11:30:00-04:00    18.930000
                               ...    
2022-10-14 14:00:00-04:00    16.155001
2022-10-14 14:30:00-04:00    16.110001
2022-10-14 15:00:00-04:00    16.045000
2022-10-14 15:30:00-04:00    15.960000
2022-10-14 16:00:00-04:00    15.950000
Name: (OUT, Close), Length: 287, dtype: float64

save as a file

In [234]:
multiindex = data.columns
ticker_list = set([item[0] for item in multiindex])

for ticker in ticker_list:
    print(ticker)
    data_i = data[(ticker, 'Close')].reset_index().droplevel(level=0, axis=1)
    data_i = data_i.rename(columns={ data_i.columns[0]: "time" })
    data_i = data_i.set_index(["time"]).sort_index()
    print(data_i)
    #save as seperate files
    data_i.to_csv(f'ticker_{ticker}.csv')

OUT
                               Close
time                                
2022-09-15 09:30:00-04:00  19.500000
2022-09-15 10:00:00-04:00  19.360001
2022-09-15 10:30:00-04:00  18.990000
2022-09-15 11:00:00-04:00  18.990000
2022-09-15 11:30:00-04:00  18.930000
...                              ...
2022-10-14 14:00:00-04:00  16.155001
2022-10-14 14:30:00-04:00  16.110001
2022-10-14 15:00:00-04:00  16.045000
2022-10-14 15:30:00-04:00  15.960000
2022-10-14 16:00:00-04:00  15.950000

[287 rows x 1 columns]
WELL
                               Close
time                                
2022-09-15 09:30:00-04:00        NaN
2022-09-15 10:00:00-04:00        NaN
2022-09-15 10:30:00-04:00        NaN
2022-09-15 11:00:00-04:00        NaN
2022-09-15 11:30:00-04:00        NaN
...                              ...
2022-10-14 14:00:00-04:00  58.209999
2022-10-14 14:30:00-04:00  58.000000
2022-10-14 15:00:00-04:00  58.020000
2022-10-14 15:30:00-04:00  57.860001
2022-10-14 16:00:00-04:00  57.840000

[287

check the file

In [235]:
filename = 'ticker_WELL.csv'
df = pd.read_csv(filename, parse_dates=["time"])
df

Unnamed: 0,time,Close
0,2022-09-15 09:30:00-04:00,
1,2022-09-15 10:00:00-04:00,
2,2022-09-15 10:30:00-04:00,
3,2022-09-15 11:00:00-04:00,
4,2022-09-15 11:30:00-04:00,
...,...,...
282,2022-10-14 14:00:00-04:00,58.209999
283,2022-10-14 14:30:00-04:00,58.000000
284,2022-10-14 15:00:00-04:00,58.020000
285,2022-10-14 15:30:00-04:00,57.860001
