[how-to-deal-with-multi-level-column-names-downloaded-with-yfinance](https://stackoverflow.com/questions/63107594/how-to-deal-with-multi-level-column-names-downloaded-with-yfinance/63107801#63107801)

In [None]:
    # TODO: if file exists do not overwrite

In [76]:
def download_symbol_AdjOHLCV(file_symbols, verbose=False):
    """Download adjusted OHLCV data for symbols in file_symbols, and return dataframe df_OHLCV.
       Fetch OHLCV data for symbol 'SPY': df_OHLCV['SPY']

    Args:
        file_symbols(str): full path to a text file with a symbol on each line 

    Return:
        df_OHLCV(dataframe): dataframe with OHLCV data for all symbols,
                             Fetch OHLCV data for symbol 'SPY': df_OHLCV['SPY']
    """

    # import pandas as pd
    import yfinance as yf

    print(f'++++  read symbols from {file_symbols}  ++++')
    with open(file_symbols, 'r') as f:  # get symbols from text file
        # remove leading and trailing whitespaces
        symbols = [line.strip() for line in f]  

    # removes '' in list of symbols, a blank line in text file makes '' in list
    symbols = list(filter(None, symbols))

    if verbose:
        print('symbols in file: "{}"'.format(file_symbols))
        print('Leading space, trailing spaces, and empty string (i.e. "") have been stripped from file')

        print('symbols: {}'.format(symbols))
        print("symbol count: {}".format(len(symbols)), '\n')

    # print(f'++++  download OHLCV data  ++++')
    df_OHLCV = yf.download(  # or pdr.get_data_yahoo(...
            # tickers list or string as well
            # tickers = "SPY AAPL MSFT",
            tickers = symbols,

            # use "period" instead of start/end
            # valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
            # (optional, default is '1mo')
            period = "max",

            # fetch data by interval (including intraday if period < 60 days)
            # valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
            # (optional, default is '1d')
            interval = "1d",

            # group by ticker (to access via data['SPY'])
            # (optional, default is 'column')
            group_by = 'ticker',

            # adjust all OHLC automatically
            # (optional, default is False)
            auto_adjust = True,

            # download pre/post regular market hours data
            # (optional, default is False)
            prepost = False,

            # use threads for mass downloading? (True/False/Integer)
            # (optional, default is True)
            threads = True,

            # proxy URL scheme use use when downloading?
            # (optional, default is None)
            proxy = None
        )

    return df_OHLCV

In [91]:
# file_symbols = 'C:/Users/ping/MyDrive/stocks/MktCap2b_AUMtop1200/source/2021_Top1200_MktCap_n_AUM.txt'
file_symbols = 'C:/Users/ping/Desktop/my_yfinance/symbols_trash.txt'
# file_symbols = 'C:/Users/ping/Desktop/my_yfinance/symbols_XOM.txt'
df_trash = download_symbol_AdjOHLCV(file_symbols, verbose=False)
df_trash.tail(10)

++++  read symbols from C:/Users/ping/Desktop/my_yfinance/symbols_trash.txt  ++++
[*********************100%***********************]  5 of 5 completed


Unnamed: 0_level_0,ETH-USD,ETH-USD,ETH-USD,ETH-USD,ETH-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD,...,FTEC,FTEC,FTEC,FTEC,FTEC,XOM,XOM,XOM,XOM,XOM
Unnamed: 0_level_1,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume,...,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2022-07-12,1097.259155,1097.259155,1038.13562,1038.19165,12583280000.0,19970.474609,20043.445312,19308.53125,19323.914062,25810220000.0,...,99.940002,100.760002,97.709999,98.269997,181100.0,83.779999,85.050003,82.860001,84.5,21013000.0
2022-07-13,1038.186646,1113.587158,1019.220337,1113.587158,18302590000.0,19325.972656,20223.052734,18999.953125,20212.074219,33042430000.0,...,96.389999,98.669998,96.0,98.059998,228800.0,83.379997,86.309998,83.300003,84.839996,19928600.0
2022-07-14,1113.515747,1202.953369,1077.405762,1191.526245,16688640000.0,20211.466797,20789.894531,19689.257812,20569.919922,31158740000.0,...,97.120003,99.110001,95.959999,98.760002,261700.0,81.849998,83.309998,80.690002,83.139999,26343000.0
2022-07-15,1191.674805,1275.778198,1182.903198,1233.12915,17411450000.0,20573.15625,21138.244141,20397.0,20836.328125,25905580000.0,...,99.739998,100.580002,99.260002,100.519997,228600.0,84.800003,84.879997,82.900002,84.540001,17231200.0
2022-07-16,1232.791626,1377.94458,1195.605957,1352.626465,18364010000.0,20834.103516,21514.404297,20518.898438,21190.316406,24302950000.0,...,,,,,,,,,,
2022-07-17,1353.205078,1378.41748,1329.763306,1338.635742,16079710000.0,21195.041016,21600.640625,20778.179688,20779.34375,22927800000.0,...,,,,,,,,,,
2022-07-18,1338.80603,1578.717896,1338.80603,1578.717896,27440420000.0,20781.912109,22633.033203,20781.912109,22485.689453,39974480000.0,...,101.459999,101.959999,99.279999,99.629997,200700.0,86.580002,87.940002,85.75,86.099998,19395800.0
2022-07-19,1578.383911,1607.033081,1501.797485,1542.97522,27753530000.0,22467.849609,23666.962891,21683.40625,23389.433594,48765200000.0,...,100.779999,102.769997,100.25,102.68,197000.0,86.419998,88.519997,86.25,88.269997,18248900.0
2022-07-20,1542.954346,1612.645752,1500.803223,1520.200684,22942710000.0,23393.191406,24196.818359,23009.949219,23231.732422,42932550000.0,...,102.720001,104.949997,102.599998,104.529999,172100.0,87.580002,89.650002,87.25,89.239998,16584400.0
2022-07-21,1520.374512,1595.761963,1472.185425,1576.749512,20009560000.0,23233.201172,23388.322266,22431.148438,23164.628906,33631010000.0,...,104.68,106.050003,103.669998,106.010002,255100.0,86.93,87.800003,85.209999,87.75,21368700.0


In [92]:
# file_symbols = 'C:/Users/ping/MyDrive/stocks/MktCap2b_AUMtop1200/source/2021_Top1200_MktCap_n_AUM.txt'
# file_symbols = 'C:/Users/ping/Desktop/my_yfinance/symbols_trash.txt'
file_symbols = 'C:/Users/ping/Desktop/my_yfinance/symbols_XOM.txt'
df_XOM = download_symbol_AdjOHLCV(file_symbols, verbose=False)
df_XOM.tail(10)

++++  read symbols from C:/Users/ping/Desktop/my_yfinance/symbols_XOM.txt  ++++
[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-07-08,86.949997,87.300003,84.93,86.080002,17668000
2022-07-11,85.169998,86.120003,84.360001,85.639999,15603800
2022-07-12,83.779999,85.050003,82.860001,84.5,21013000
2022-07-13,83.379997,86.309998,83.300003,84.839996,19928600
2022-07-14,81.849998,83.309998,80.690002,83.139999,26343000
2022-07-15,84.800003,84.879997,82.900002,84.540001,17231200
2022-07-18,86.580002,87.940002,85.75,86.099998,19395800
2022-07-19,86.419998,88.519997,86.25,88.269997,18248900
2022-07-20,87.580002,89.650002,87.25,89.239998,16584400
2022-07-21,86.93,87.800003,85.209999,87.75,21368700


In [111]:
# df.loc[pd.Index(["cobra", "viper"], name="foo")]
# df1.loc[['a', 'b', 'd'], :]
# df.loc[('at', [1,3,4]), 'Dwell']
# df_trash.index.get_level_values(0)
df_x = df_trash.loc[pd.Index(df_XOM.index), :]
# df_trash.loc[('Date', [df_XOM.index]), :]
df_x.tail(10)

Unnamed: 0_level_0,ETH-USD,ETH-USD,ETH-USD,ETH-USD,ETH-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD,...,FTEC,FTEC,FTEC,FTEC,FTEC,XOM,XOM,XOM,XOM,XOM
Unnamed: 0_level_1,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume,...,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2022-07-08,1237.580322,1262.885742,1200.632202,1222.506226,16315930000.0,21637.154297,22314.941406,21257.453125,21731.117188,49899830000.0,...,100.059998,101.629997,99.82,101.260002,148000.0,86.949997,87.300003,84.93,86.080002,17668000.0
2022-07-11,1168.139038,1169.193848,1095.002808,1097.236572,12064180000.0,20856.353516,20856.353516,19924.539062,19970.556641,24150250000.0,...,100.32,100.57,98.980003,99.650002,127800.0,85.169998,86.120003,84.360001,85.639999,15603800.0
2022-07-12,1097.259155,1097.259155,1038.13562,1038.19165,12583280000.0,19970.474609,20043.445312,19308.53125,19323.914062,25810220000.0,...,99.940002,100.760002,97.709999,98.269997,181100.0,83.779999,85.050003,82.860001,84.5,21013000.0
2022-07-13,1038.186646,1113.587158,1019.220337,1113.587158,18302590000.0,19325.972656,20223.052734,18999.953125,20212.074219,33042430000.0,...,96.389999,98.669998,96.0,98.059998,228800.0,83.379997,86.309998,83.300003,84.839996,19928600.0
2022-07-14,1113.515747,1202.953369,1077.405762,1191.526245,16688640000.0,20211.466797,20789.894531,19689.257812,20569.919922,31158740000.0,...,97.120003,99.110001,95.959999,98.760002,261700.0,81.849998,83.309998,80.690002,83.139999,26343000.0
2022-07-15,1191.674805,1275.778198,1182.903198,1233.12915,17411450000.0,20573.15625,21138.244141,20397.0,20836.328125,25905580000.0,...,99.739998,100.580002,99.260002,100.519997,228600.0,84.800003,84.879997,82.900002,84.540001,17231200.0
2022-07-18,1338.80603,1578.717896,1338.80603,1578.717896,27440420000.0,20781.912109,22633.033203,20781.912109,22485.689453,39974480000.0,...,101.459999,101.959999,99.279999,99.629997,200700.0,86.580002,87.940002,85.75,86.099998,19395800.0
2022-07-19,1578.383911,1607.033081,1501.797485,1542.97522,27753530000.0,22467.849609,23666.962891,21683.40625,23389.433594,48765200000.0,...,100.779999,102.769997,100.25,102.68,197000.0,86.419998,88.519997,86.25,88.269997,18248900.0
2022-07-20,1542.954346,1612.645752,1500.803223,1520.200684,22942710000.0,23393.191406,24196.818359,23009.949219,23231.732422,42932550000.0,...,102.720001,104.949997,102.599998,104.529999,172100.0,87.580002,89.650002,87.25,89.239998,16584400.0
2022-07-21,1520.374512,1595.761963,1472.185425,1576.749512,20009560000.0,23233.201172,23388.322266,22431.148438,23164.628906,33631010000.0,...,104.68,106.050003,103.669998,106.010002,255100.0,86.93,87.800003,85.209999,87.75,21368700.0


In [None]:
import datetime as dt
# get today's date in string format

days_delta = 0  # days from today
# days_delta = 1  # days from today

today_str = str(dt.date.today()- dt.timedelta(days=days_delta))
# print(f'today_str: {today_str}', '\n')
# file_symbols = 'C:/Users/ping/MyDrive/stocks/MktCap2b_AUMtop1200/source/2021_Top1200_MktCap_n_AUM.txt'
# file_symbols = 'C:/Users/ping/Desktop/my_yfinance/symbols_trash.txt'
file_symbols = 'C:/Users/ping/Desktop/my_yfinance/symbols_XOM.txt'
filename_pickle = 'df_OHLCV_' + today_str
path_pickle_dump = 'C:/Users/ping/Desktop/my_yfinance/'
print(f'Full path to symbols text file: {file_symbols}')
print(f'Full path to pickled df_OHLCV:  {path_pickle_dump}{filename_pickle}')


In [None]:
    # insert path for util
    sys.path.insert(0, 'C:/Users/ping/MyDrive/py_files/python/')
    from util import pickle_dump
    # pickle_dump(df_OHLCV, path_pickle_dump, filename_pickle, verbose)
    # print(f'Full path to pickled df_OHLCV: {path_pickle_dump}{filename_pickle}')

In [None]:
verbose=False

In [None]:
type(df_XOM.index)

In [None]:
df_trash = df_keep.iloc[:,:10]
df_trash

In [None]:
df_XOM

In [None]:
df_trash.merge(df_XOM, how='inner', on='Date')

In [None]:
df_XOM_idx = df_keep
df_XOM_idx.reindex_like(df_XOM.index)
df_XOM_idx['XOM']

In [None]:
df_keep['XOM']

In [None]:
# https://stackoverflow.com/questions/49435438/pandas-validate-date-format
# True if all index date can be converted into pd.Timestamp objects.
import pandas as pd
if (pd.to_datetime(df.index, format="YYY-mm-dd", errors="coerce").notnull().all()):
  print('Date index can be converted into pd.Timestamp objects')
if not df.index.has_duplicates:
  print('Date index has no duplicate dates')
# df.index.has_duplicates

In [None]:
df_keep = df

In [None]:
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dropna.html
# drop columns with any NaN
df_num_col = lstRow.dropna(axis=1, how='any')

In [None]:
df_num_col

In [None]:
df_num_col.describe()

In [None]:
lstRow = df.tail(1)
lstRow
# type(lstRow)

In [None]:
lstRow['LGH']

In [None]:
import numpy as np
# https://stackoverflow.com/questions/25039626/how-do-i-find-numeric-columns-in-pandas
# numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
# newdf = lstRow.select_dtypes(include=numerics)
list(lstRow.select_dtypes(include=[np.number]).columns.values)

In [None]:
newdf

In [None]:
print(f'len(df): {len(df)}')
print(df['XOM'][:3])
print(df['XOM'][-3:])

In [None]:
print(f'len before dropping rows with all NaN: {len(df)}')
df.dropna(how='all')  # drop row if all have NaN
print(f'len after dropping rows with all NaN: {len(df)}')

In [None]:
sys.path.insert(0, 'C:/Users/ping/MyDrive/py_files/python/perfstat/')
from util import pickle_load

df_pickled = pickle_load(path_pickle_dump, filename_pickle, verbose)
df_pickled['AAPL']

In [None]:
date_last_df_OHLCV = df_OHLCV.index[-1].strftime('%Y-%m-%d')
date_today = dt.datetime.today().strftime('%Y-%m-%d')
date_last_df_OHLCV == date_today
if verbose:
  print(f'last date in df_OHLCV: {date_last_df_OHLCV}, {type(date_last_df_OHLCV)}')
  print(f'date_today         : {date_today}, {type(date_today)}')
  print(f'date_last_df_OHLCV == date_today')

In [None]:
print(df_OHLCV.index[-1].strftime('%Y-%m-%d'), type(df_OHLCV.index[-1].strftime('%Y-%m-%d')))
print(dt.datetime.today().strftime('%Y-%m-%d'), type(dt.datetime.today().strftime('%Y-%m-%d')))
print(df_OHLCV.index[-1].strftime('%Y-%m-%d') == dt.datetime.today().strftime('%Y-%m-%d'))


In [None]:


# print(dt.datetime.today(), type(dt.datetime.today()))



In [None]:
print(dt.datetime.today(), type(dt.datetime.today()))

In [None]:
## Pandas display options
# pd.set_option('display.max_columns', 200)
pd.set_option('display.width', 200)