In [146]:
# https://towardsdatascience.com/pandas-groupby-a-simple-but-detailed-tutorial-314b8f37005d
# https://towardsdatascience.com/accessing-data-in-a-multiindex-dataframe-in-pandas-569e8767201d
# https://towardsdatascience.com/summarizing-data-with-pandas-crosstab-efc8b9abecf
# https://towardsdatascience.com/how-to-flatten-multiindex-columns-and-rows-in-pandas-f5406c50e569
# https://datascientyst.com/list-aggregation-functions-aggfunc-groupby-pandas/
# https://stackoverflow.com/questions/25929319/how-to-iterate-over-pandas-multiindex-dataframe-using-index
# https://stackoverflow.com/questions/24495695/pandas-get-unique-multiindex-level-values-by-label
# https://stackoverflow.com/questions/55706391/pandas-crosstab-on-multiple-columns-then-groupby

# https://matplotlib.org/stable/gallery/pyplots/pyplot_text.html#sphx-glr-gallery-pyplots-pyplot-text-py

In [147]:
import pandas as pd
import numpy as np
from myUtils import pickle_load, pickle_dump, symb_perf_stats_vectorized

path_dir = "C:/Users/ping/MyDrive/stocks/yfinance/"
path_data_dump = path_dir + "VSCode_dump/"

f_pickled_df_a = 'df_OHLCV_clean'  # df adjusted OHLCV, dropped symbols with no vol and close
f_pickled_df_c = 'df_close_clean'  # df close, dropped symbols with no vol and close
f_pickled_perf_ranks_dict = 'perf_ranks_dict'  # dic of performance ranks
f_pickled_ranked_perf_ranks_dict = 'ranked_perf_ranks_dict'  # key: periods, value: ranked_perf_ranks

verbose = False  # True prints more output
look_back_days = -250 * 6  # subset df iloc days

In [148]:
print(f"Full path to pickled df Close cleaned:  {path_data_dump}{f_pickled_df_c}")
df_c = pickle_load(path_data_dump, f_pickled_df_c, verbose=verbose)
print(f"Full path to pickled df OHLCV cleaned:  {path_data_dump}{f_pickled_df_a}")
df_a = pickle_load(path_data_dump, f_pickled_df_a, verbose=verbose)

Full path to pickled df Close cleaned:  C:/Users/ping/MyDrive/stocks/yfinance/VSCode_dump/df_close_clean
Full path to pickled df OHLCV cleaned:  C:/Users/ping/MyDrive/stocks/yfinance/VSCode_dump/df_OHLCV_clean


In [149]:
perf_ranks_dict = {}  # dic of performance ranks
syms_perf_rank = []  # list of lists to store top 100 ranked symbols
_periods = [-15, -30, -60, -120, -240]
for _period in _periods:
  f_name = 'period' + str(_period)

  _df_c = df_c[_period::]
  symbols, period_yr, drawdown, UI, max_drawdown, returns_std, Std_UI, CAGR, CAGR_Std, CAGR_UI = \
      symb_perf_stats_vectorized(_df_c)
  caches_perf_stats_vect = []
  for symbol in symbols:
      date_first = drawdown.index[0].strftime('%Y-%m-%d')
      date_last = drawdown.index[-1].strftime('%Y-%m-%d')
      cache = (symbol, date_first, date_last, period_yr, CAGR[symbol],
              UI[symbol], Std_UI[symbol], CAGR_Std[symbol], CAGR_UI[symbol])
      # append performance data (tuple) to caches_perf_stats (list)
      caches_perf_stats_vect.append(cache)
  column_names = ['symbol', 'first date', 'last date', 'Year', 'CAGR',
                  'UI', 'Std/UI', 'CAGR/Std', 'CAGR/UI']

  # write symbols' performance stats to dataframe
  df_ps = pd.DataFrame(caches_perf_stats_vect, columns=column_names)
  df_ps['r_CAGR/UI'] = df_ps['CAGR/UI'].rank(ascending=False)
  df_ps['r_CAGR/Std'] = df_ps['CAGR/Std'].rank(ascending=False)
  df_ps['r_Std/UI'] = df_ps['Std/UI'].rank(ascending=False)
  
  _dict = {}
  cols_sort = ['r_CAGR/UI', 'r_CAGR/Std', 'r_Std/UI']
  # print(f'{f_name} top 100 symbols')  
  for col in cols_sort:
    symbols_top_100 = df_ps.sort_values(by=[col]).head(100).symbol.values
    syms_perf_rank.append(list(symbols_top_100))
    # print(f'{col}: {symbols_top_100}')
    _dict[col] = symbols_top_100
    perf_ranks_dict[f'{f_name}'] = _dict
  # print(' ')

pickle_dump(perf_ranks_dict, path_data_dump, f_pickled_perf_ranks_dict)
print(f'perf_ranks_dict:\n{perf_ranks_dict}\n')

perf_ranks_dict:
{'period-15': {'r_CAGR/UI': array(['AXON', 'NRIM', 'PKOH', 'DMRC', 'VCYT', 'AMKR', 'RCKY', 'LGND',
       'CMPR', 'ABMD', 'WYNN', 'LOPE', 'RUN', 'YUMC', 'MLAB', 'TSM',
       'TSBK', 'TGLS', 'NYT', 'RMBS', 'AAON', 'JD', 'TSE', 'LVS', 'THRM',
       'CIR', 'ACLS', 'LSCC', 'PKX', 'AE', 'AU', 'HEES', 'BVH', 'PDFS',
       'NEOG', 'SAIA', 'HOFT', 'DD', 'WNC', 'SMG', 'XPO', 'CROX', 'BA',
       'ATNI', 'AVNS', 'UFPT', 'NTES', 'MAC', 'SPWR', 'CHGG', 'OSPN',
       'GILD', 'GPRE', 'PTMN', 'MTSI', 'HLI', 'CRAI', 'SKX', 'ASPS',
       'PUK', 'EXAS', 'NBHC', 'QNST', 'CNK', 'PRDO', 'MCY', 'GS', 'NOMD',
       'SLAB', 'PIPR', 'APTV', 'MOD', 'ACRS', 'AMG', 'SONY', 'LMNR',
       'ELF', 'IVZ', 'HTHT', 'GDO', 'PMT', 'SEIC', 'TEN', 'MDRX', 'SCCO',
       'APAM', 'PZZA', 'FIZZ', 'RETA', 'PDS', 'STRA', 'AZTA', 'SEDG',
       'DIOD', 'OPY', 'OMAB', 'APD', 'WB', 'FICO', 'RNR'], dtype=object), 'r_CAGR/Std': array(['DMRC', 'VCYT', 'LGND', 'AMKR', 'CMPR', 'RCKY', 'PKOH', 'WYNN',
       'AXON

In [152]:
syms_perf_rank  # list of lists of top 100 rank
l_syms_perf_rank = [val for sublist in syms_perf_rank for val in sublist]  # flatten list of lists

from collections import Counter
cnt_symbol_freq = Counter(l_syms_perf_rank)  # count symbols and frequency
# print(cnt_symbol_freq) 
l_tuples = cnt_symbol_freq.most_common()  # convert to e.g [('AKRO', 6), ('IMVT', 4), ... ('ADEA', 3)]
symbols_ranked_perf_ranks = [symbol for symbol, count in l_tuples]  # select just the symbols without the frequency counts
symbols_ranked_perf_ranks  # list of most common symbols in syms_perf_rank in descending order

ranked_perf_ranks_dict ={}
f_name = f'ranked_perf_ranks_period' + str(_periods)  # key name, ranked_perf_ranks_dict
ranked_perf_ranks_dict[f'{f_name}'] = symbols_ranked_perf_ranks # values: list of most common symbols in all performance ranks in descending order
pickle_dump(ranked_perf_ranks_dict, path_data_dump, f_pickled_ranked_perf_ranks_dict)
print(f'ranked_perf_ranks_dict:\n{ranked_perf_ranks_dict}\n')

ranked_perf_ranks_dict:
{'ranked_perf_ranks_period[-15, -30, -60, -120, -240]': ['TSBK', 'ELF', 'AVEO', 'ASC', 'TPL', 'AXON', 'NRIM', 'AAON', 'UFPT', 'GILD', 'MOD', 'SANM', 'INSW', 'LOPE', 'AE', 'ABMD', 'RMBS', 'WNC', 'CHGG', 'HLI', 'CRAI', 'CHUY', 'LANC', 'EME', 'RETA', 'OMAB', 'TR', 'EURN', 'GPC', 'ADEA', 'ERIE', 'HFWA', 'TWNK', 'TNK', 'AMKR', 'THRM', 'DD', 'ATNI', 'MAC', 'TEN', 'PDS', 'STRA', 'FICO', 'DENN', 'IBA', 'AIMC', 'RENN', 'PRTA', 'ANIK', 'MYOV', 'FTI', 'NTNX', 'MODN', 'FCBC', 'AGYS', 'NFLX', 'PBT', 'DXCM', 'STNG', 'DMRC', 'HOFT', 'CROX', 'BA', 'NBHC', 'PRDO', 'GS', 'PMT', 'RNR', 'NPO', 'PLOW', 'AEHR', 'OSBC', 'MCRI', 'BBSI', 'SLB', 'BIIB', 'EVI', 'TTC', 'HTBK', 'STBA', 'TBBK', 'ORLY', 'CCRN', 'FSLR', 'DINO', 'VRTX', 'MGI', 'NBIX', 'BAH', 'DGII', 'CPRX', 'RELL', 'TNP', 'LW', 'SWIR', 'VCYT', 'CIR', 'PKX', 'HEES', 'OSPN', 'QNST', 'PIPR', 'FIZZ', 'UVV', 'UNFI', 'MGRC', 'CAT', 'OII', 'NVEC', 'RCL', 'AER', 'ARGO', 'CLR', 'RNST', 'FBNC', 'ODP', 'TDW', 'NBTB', 'MEDP', 'ISEE', 'WWE'