In [12]:
# insert path for util
sys.path.insert(0, 'C:/Users/ping/MyDrive/py_files/python/')
from util import pickle_dump, pickle_load
verbose=False

In [2]:
def download_AdjOHLCV(file_symbols, verbose=False):
    """Download adjusted OHLCV data for symbols in file_symbols, and return dataframe df.
       Fetch OHLCV data for symbol 'SPY': df['SPY']

    Args:
        file_symbols(str): full path to a text file with a symbol on each line 

    Return:
        df(dataframe): dataframe with OHLCV data for all symbols,
                             Fetch OHLCV data for symbol 'SPY': df['SPY']
    """

    # import pandas as pd
    import yfinance as yf

    print(f'++++  read symbols from {file_symbols}  ++++')
    with open(file_symbols, 'r') as f:  # get symbols from text file
        # remove leading and trailing whitespaces
        symbols = [line.strip() for line in f]  

    # removes '' in list of symbols, a blank line in text file makes '' in list
    symbols = list(filter(None, symbols))

    if verbose:
        print('symbols in file: "{}"'.format(file_symbols))
        print('Leading space, trailing spaces, and empty string (i.e. "") have been stripped from file')

        print('symbols: {}'.format(symbols))
        print("symbol count: {}".format(len(symbols)), '\n')

    # print(f'++++  download OHLCV data  ++++')
    df = yf.download(  # or pdr.get_data_yahoo(...
            # tickers list or string as well
            # tickers = "SPY AAPL MSFT",
            tickers = symbols,

            # use "period" instead of start/end
            # valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
            # (optional, default is '1mo')
            period = "max",

            # fetch data by interval (including intraday if period < 60 days)
            # valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
            # (optional, default is '1d')
            interval = "1d",

            # group by ticker (to access via data['SPY'])
            # (optional, default is 'column')
            group_by = 'ticker',

            # adjust all OHLC automatically
            # (optional, default is False)
            auto_adjust = True,

            # download pre/post regular market hours data
            # (optional, default is False)
            prepost = False,

            # use threads for mass downloading? (True/False/Integer)
            # (optional, default is True)
            threads = True,

            # proxy URL scheme use use when downloading?
            # (optional, default is None)
            proxy = None
        )

    return df

In [5]:
# file_symbols = 'C:/Users/ping/MyDrive/stocks/MktCap2b_AUMtop1200/source/2021_Top1200_MktCap_n_AUM.txt'
file_symbols = 'C:/Users/ping/Desktop/my_yfinance/symbols_trash.txt'
df_OHLCV = download_AdjOHLCV(file_symbols, verbose=verbose)
file_symbols = 'C:/Users/ping/Desktop/my_yfinance/symbols_XOM.txt'
df_XOM = download_AdjOHLCV(file_symbols, verbose=verbose)

++++  read symbols from C:/Users/ping/Desktop/my_yfinance/symbols_trash.txt  ++++
[*********************100%***********************]  5 of 5 completed
++++  read symbols from C:/Users/ping/Desktop/my_yfinance/symbols_XOM.txt  ++++
[*********************100%***********************]  1 of 1 completed


In [7]:
df_OHLCV.tail(7)

Unnamed: 0_level_0,ETH-USD,ETH-USD,ETH-USD,ETH-USD,ETH-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD,...,AAPL,AAPL,AAPL,AAPL,AAPL,XOM,XOM,XOM,XOM,XOM
Unnamed: 0_level_1,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume,...,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2022-07-15,1191.674805,1275.778198,1182.903198,1233.12915,17411450000.0,20573.15625,21138.244141,20397.0,20836.328125,25905580000.0,...,149.779999,150.860001,148.199997,150.169998,76030800.0,84.800003,84.879997,82.900002,84.540001,17231200.0
2022-07-16,1232.791626,1377.94458,1195.605957,1352.626465,18364010000.0,20834.103516,21514.404297,20518.898438,21190.316406,24302950000.0,...,,,,,,,,,,
2022-07-17,1353.205078,1378.41748,1329.763306,1338.635742,16079710000.0,21195.041016,21600.640625,20778.179688,20779.34375,22927800000.0,...,,,,,,,,,,
2022-07-18,1338.80603,1578.717896,1338.80603,1578.717896,27440420000.0,20781.912109,22633.033203,20781.912109,22485.689453,39974480000.0,...,150.740005,151.570007,146.699997,147.070007,81420900.0,86.580002,87.940002,85.75,86.099998,19395800.0
2022-07-19,1578.383911,1607.033081,1501.797485,1542.97522,27753530000.0,22467.849609,23666.962891,21683.40625,23389.433594,48765200000.0,...,147.919998,151.229996,146.910004,151.0,82982400.0,86.419998,88.519997,86.25,88.269997,18248900.0
2022-07-20,1542.954346,1612.645752,1500.803223,1520.200684,22942710000.0,23393.191406,24196.818359,23009.949219,23231.732422,42932550000.0,...,151.119995,153.720001,150.369995,153.039993,64823400.0,87.580002,89.650002,87.25,89.239998,16584400.0
2022-07-21,1520.374512,1595.761963,1472.185425,1576.749512,20009560000.0,23233.201172,23388.322266,22431.148438,23164.628906,33631010000.0,...,154.5,155.570007,151.940002,155.350006,64953600.0,86.93,87.800003,85.209999,87.75,21368700.0


In [9]:

# reindex to XOM's index (i.e. only trading days are in the dataframe) 
df_OHLCV = df_OHLCV.loc[df_XOM.index]
df_OHLCV.tail(7)

Unnamed: 0_level_0,ETH-USD,ETH-USD,ETH-USD,ETH-USD,ETH-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD,...,AAPL,AAPL,AAPL,AAPL,AAPL,XOM,XOM,XOM,XOM,XOM
Unnamed: 0_level_1,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume,...,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2022-07-13,1038.186646,1113.587158,1019.220337,1113.587158,18302590000.0,19325.972656,20223.052734,18999.953125,20212.074219,33042430000.0,...,142.990005,146.449997,142.119995,145.490005,71185600.0,83.379997,86.309998,83.300003,84.839996,19928600.0
2022-07-14,1113.515747,1202.953369,1077.405762,1191.526245,16688640000.0,20211.466797,20789.894531,19689.257812,20569.919922,31158740000.0,...,144.080002,148.949997,143.25,148.470001,78140700.0,81.849998,83.309998,80.690002,83.139999,26343000.0
2022-07-15,1191.674805,1275.778198,1182.903198,1233.12915,17411450000.0,20573.15625,21138.244141,20397.0,20836.328125,25905580000.0,...,149.779999,150.860001,148.199997,150.169998,76030800.0,84.800003,84.879997,82.900002,84.540001,17231200.0
2022-07-18,1338.80603,1578.717896,1338.80603,1578.717896,27440420000.0,20781.912109,22633.033203,20781.912109,22485.689453,39974480000.0,...,150.740005,151.570007,146.699997,147.070007,81420900.0,86.580002,87.940002,85.75,86.099998,19395800.0
2022-07-19,1578.383911,1607.033081,1501.797485,1542.97522,27753530000.0,22467.849609,23666.962891,21683.40625,23389.433594,48765200000.0,...,147.919998,151.229996,146.910004,151.0,82982400.0,86.419998,88.519997,86.25,88.269997,18248900.0
2022-07-20,1542.954346,1612.645752,1500.803223,1520.200684,22942710000.0,23393.191406,24196.818359,23009.949219,23231.732422,42932550000.0,...,151.119995,153.720001,150.369995,153.039993,64823400.0,87.580002,89.650002,87.25,89.239998,16584400.0
2022-07-21,1520.374512,1595.761963,1472.185425,1576.749512,20009560000.0,23233.201172,23388.322266,22431.148438,23164.628906,33631010000.0,...,154.5,155.570007,151.940002,155.350006,64953600.0,86.93,87.800003,85.209999,87.75,21368700.0


In [13]:
filename_pickle = 'df_OHLCV'
path_pickle_dump = 'C:/Users/ping/Desktop/my_yfinance/'
print(f'Full path to pickled df_OHLCV:  {path_pickle_dump}{filename_pickle}')
pickle_dump(df_OHLCV, path_pickle_dump, filename_pickle)# insert path for util

Full path to pickled df_OHLCV:  C:/Users/ping/Desktop/my_yfinance/df_OHLCV


In [17]:
df_pickled = pickle_load(path_pickle_dump, filename_pickle, verbose)
df_pickled['BTC-USD'].tail(7)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-07-13,19325.972656,20223.052734,18999.953125,20212.074219,33042430000.0
2022-07-14,20211.466797,20789.894531,19689.257812,20569.919922,31158740000.0
2022-07-15,20573.15625,21138.244141,20397.0,20836.328125,25905580000.0
2022-07-18,20781.912109,22633.033203,20781.912109,22485.689453,39974480000.0
2022-07-19,22467.849609,23666.962891,21683.40625,23389.433594,48765200000.0
2022-07-20,23393.191406,24196.818359,23009.949219,23231.732422,42932550000.0
2022-07-21,23233.201172,23388.322266,22431.148438,23164.628906,33631010000.0
