In [1]:
# https://towardsdatascience.com/pandas-groupby-a-simple-but-detailed-tutorial-314b8f37005d
# https://towardsdatascience.com/accessing-data-in-a-multiindex-dataframe-in-pandas-569e8767201d
# https://towardsdatascience.com/summarizing-data-with-pandas-crosstab-efc8b9abecf
# https://towardsdatascience.com/how-to-flatten-multiindex-columns-and-rows-in-pandas-f5406c50e569
# https://datascientyst.com/list-aggregation-functions-aggfunc-groupby-pandas/
# https://stackoverflow.com/questions/25929319/how-to-iterate-over-pandas-multiindex-dataframe-using-index
# https://stackoverflow.com/questions/24495695/pandas-get-unique-multiindex-level-values-by-label
# https://stackoverflow.com/questions/55706391/pandas-crosstab-on-multiple-columns-then-groupby

# https://matplotlib.org/stable/gallery/pyplots/pyplot_text.html#sphx-glr-gallery-pyplots-pyplot-text-py

In [2]:
import pandas as pd
import numpy as np
from myUtils import pickle_load, pickle_dump, symb_perf_stats_vectorized

path_dir = "C:/Users/ping/MyDrive/stocks/yfinance/"
path_data_dump = path_dir + "VSCode_dump/"

f_pickled_df_a = 'df_OHLCV_clean'  # df adjusted OHLCV, dropped symbols with no vol and close
f_pickled_df_c = 'df_close_clean'  # df close, dropped symbols with no vol and close
f_pickled_perf_ranks_dict = 'perf_ranks_dict'  # dic of performance ranks
f_pickled_ranked_perf_ranks_dict = 'ranked_perf_ranks_dict'  # key: periods, value: ranked_perf_ranks

verbose = False  # True prints more output
look_back_days = -250 * 6  # subset df iloc days

In [3]:
print(f"Full path to pickled df Close cleaned:  {path_data_dump}{f_pickled_df_c}")
df_c = pickle_load(path_data_dump, f_pickled_df_c, verbose=verbose)
print(f"Full path to pickled df OHLCV cleaned:  {path_data_dump}{f_pickled_df_a}")
df_a = pickle_load(path_data_dump, f_pickled_df_a, verbose=verbose)

Full path to pickled df Close cleaned:  C:/Users/ping/MyDrive/stocks/yfinance/VSCode_dump/df_close_clean
Full path to pickled df OHLCV cleaned:  C:/Users/ping/MyDrive/stocks/yfinance/VSCode_dump/df_OHLCV_clean


In [4]:
perf_ranks_dict = {}  # dic of performance ranks
syms_perf_rank = []  # list of lists to store top 100 ranked symbols

# days_lookbacks = [-15, -30, -60, -120, -240]
days_lookbacks = [-15, -30]

for days_lookback in days_lookbacks:
  f_name = 'period' + str(days_lookback)

  _df_c = df_c[days_lookback::]
  symbols, period_yr, drawdown, UI, max_drawdown, returns_std, Std_UI, CAGR, CAGR_Std, CAGR_UI = \
      symb_perf_stats_vectorized(_df_c)
  caches_perf_stats_vect = []
  for symbol in symbols:
      date_first = drawdown.index[0].strftime('%Y-%m-%d')
      date_last = drawdown.index[-1].strftime('%Y-%m-%d')
      cache = (symbol, date_first, date_last, period_yr, CAGR[symbol],
              UI[symbol], Std_UI[symbol], CAGR_Std[symbol], CAGR_UI[symbol])
      # append performance data (tuple) to caches_perf_stats (list)
      caches_perf_stats_vect.append(cache)
  column_names = ['symbol', 'first date', 'last date', 'Year', 'CAGR',
                  'UI', 'Std/UI', 'CAGR/Std', 'CAGR/UI']

  # write symbols' performance stats to dataframe
  df_ps = pd.DataFrame(caches_perf_stats_vect, columns=column_names)
  df_ps['r_CAGR/UI'] = df_ps['CAGR/UI'].rank(ascending=False)
  df_ps['r_CAGR/Std'] = df_ps['CAGR/Std'].rank(ascending=False)
  df_ps['r_Std/UI'] = df_ps['Std/UI'].rank(ascending=False)
  
  _dict = {}
  cols_sort = ['r_CAGR/UI', 'r_CAGR/Std', 'r_Std/UI']
  # print(f'{f_name} top 100 symbols')  
  for col in cols_sort:
    symbols_top_100 = df_ps.sort_values(by=[col]).head(100).symbol.values
    syms_perf_rank.append(list(symbols_top_100))
    # print(f'{col}: {symbols_top_100}')
    _dict[col] = symbols_top_100
    perf_ranks_dict[f'{f_name}'] = _dict
  # print(' ')

pickle_dump(perf_ranks_dict, path_data_dump, f_pickled_perf_ranks_dict)
print(f'perf_ranks_dict:\n{perf_ranks_dict}\n')

perf_ranks_dict:
{'period-15': {'r_CAGR/UI': array(['CTRN', 'APPS', 'GDS', 'EVI', 'CLFD', 'BURL', 'ENR', 'AEO', 'AZTA',
       'BBW', 'MRTX', 'QURE', 'ANF', 'PDFS', 'ZTO', 'HZNP', 'CELH',
       'VRDN', 'COUP', 'GPS', 'ROST', 'HHC', 'SCHN', 'BMRN', 'WMC',
       'FICO', 'RUN', 'PVH', 'BIDU', 'NXST', 'FL', 'AMBA', 'ETSY', 'BBWI',
       'MATW', 'USFD', 'RDNT', 'X', 'SAGE', 'MASI', 'WLFC', 'LSCC', 'CIR',
       'AEL', 'URBN', 'DIOD', 'BYM', 'GES', 'AMBC', 'ENV', 'SRPT', 'GENC',
       'NKX', 'TITN', 'IPAR', 'BERY', 'TSM', 'ATKR', 'CPRI', 'MYE',
       'COLM', 'NBH', 'IMAX', 'AMD', 'BABA', 'SPWR', 'ALE', 'BOOT',
       'CPRT', 'RL', 'PARA', 'ATO', 'UHAL', 'GCO', 'MUI', 'LNW', 'RICK',
       'HOV', 'DE', 'JD', 'MNP', 'EXTR', 'RIO', 'NVG', 'PI', 'STLA',
       'HSBC', 'ARMK', 'ASML', 'SEE', 'HTLD', 'M', 'EVH', 'RYI', 'NUO',
       'GIII', 'CQP', 'ERII', 'NQP', 'SKX'], dtype=object), 'r_CAGR/Std': array(['GDS', 'APPS', 'CTRN', 'EVI', 'CLFD', 'MRTX', 'AEO', 'BURL',
       'AZTA', 'COUP', 'QUR

In [5]:
syms_perf_rank  # list of lists of top 100 rank
l_syms_perf_rank = [val for sublist in syms_perf_rank for val in sublist]  # flatten list of lists

from collections import Counter
cnt_symbol_freq = Counter(l_syms_perf_rank)  # count symbols and frequency
# print(cnt_symbol_freq) 
l_tuples = cnt_symbol_freq.most_common()  # convert to e.g [('AKRO', 6), ('IMVT', 4), ... ('ADEA', 3)]
symbols_ranked_perf_ranks = [symbol for symbol, count in l_tuples]  # select just the symbols without the frequency counts
symbols_ranked_perf_ranks  # list of most common symbols in syms_perf_rank in descending order

ranked_perf_ranks_dict ={}
# f_name = f'ranked_perf_ranks_period' + str(_periods)  # key name, ranked_perf_ranks_dict
f_name = f'ranked_perf_ranks_period' + str(days_lookbacks)  # key name, ranked_perf_ranks_dict
ranked_perf_ranks_dict[f'{f_name}'] = symbols_ranked_perf_ranks # values: list of most common symbols in all performance ranks in descending order
pickle_dump(ranked_perf_ranks_dict, path_data_dump, f_pickled_ranked_perf_ranks_dict)
print(f'ranked_perf_ranks_dict:\n{ranked_perf_ranks_dict}\n')

ranked_perf_ranks_dict:
{'ranked_perf_ranks_period[-15, -30]': ['AEO', 'BBW', 'PDFS', 'HZNP', 'RUN', 'WLFC', 'SPWR', 'CTRN', 'BURL', 'ROST', 'HHC', 'PVH', 'USFD', 'TITN', 'MYE', 'AZTA', 'QURE', 'GPS', 'FICO', 'LSCC', 'CIR', 'EXTR', 'PI', 'ASML', 'FN', 'HPE', 'CLFD', 'ENR', 'MRTX', 'ANF', 'VRDN', 'SCHN', 'BMRN', 'WMC', 'NXST', 'AMBA', 'MATW', 'X', 'AEL', 'BYM', 'AMBC', 'ENV', 'SRPT', 'GENC', 'IPAR', 'BERY', 'ATKR', 'ALE', 'DE', 'AMKR', 'ZEUS', 'WYNN', 'UNVR', 'TAK', 'TBNK', 'WPP', 'WNC', 'ACLS', 'GILD', 'LOPE', 'MCRI', 'RCKY', 'NRIM', 'GBX', 'AIMC', 'UFPT', 'AAON', 'ITGR', 'ELF', 'AMG', 'OSIS', 'CHGG', 'BUD', 'GS', 'HOG', 'AVNS', 'B', 'PMT', 'APPS', 'GDS', 'EVI', 'ZTO', 'CELH', 'COUP', 'BIDU', 'FL', 'ETSY', 'BBWI', 'RDNT', 'SAGE', 'MASI', 'URBN', 'DIOD', 'GES', 'NKX', 'TSM', 'CPRI', 'COLM', 'NBH', 'IMAX', 'AMD', 'BABA', 'BOOT', 'CPRT', 'RL', 'PARA', 'ATO', 'UHAL', 'GCO', 'MUI', 'LNW', 'RICK', 'HOV', 'JD', 'MNP', 'RIO', 'NVG', 'STLA', 'HSBC', 'ARMK', 'SEE', 'HTLD', 'M', 'EVH', 'RYI', 'NU