In [14]:
# https://towardsdatascience.com/pandas-groupby-a-simple-but-detailed-tutorial-314b8f37005d
# https://towardsdatascience.com/accessing-data-in-a-multiindex-dataframe-in-pandas-569e8767201d
# https://towardsdatascience.com/summarizing-data-with-pandas-crosstab-efc8b9abecf
# https://towardsdatascience.com/how-to-flatten-multiindex-columns-and-rows-in-pandas-f5406c50e569
# https://datascientyst.com/list-aggregation-functions-aggfunc-groupby-pandas/
# https://stackoverflow.com/questions/25929319/how-to-iterate-over-pandas-multiindex-dataframe-using-index
# https://stackoverflow.com/questions/24495695/pandas-get-unique-multiindex-level-values-by-label
# https://stackoverflow.com/questions/55706391/pandas-crosstab-on-multiple-columns-then-groupby

# https://matplotlib.org/stable/gallery/pyplots/pyplot_text.html#sphx-glr-gallery-pyplots-pyplot-text-py

In [15]:
import pandas as pd
import numpy as np
from myUtils import pickle_load, pickle_dump, symb_perf_stats_vectorized

path_dir = "C:/Users/ping/MyDrive/stocks/yfinance/"
path_data_dump = path_dir + "VSCode_dump/"

filename_pickled_df_a = 'df_OHLCV_clean'  # df adjusted OHLCV, dropped symbols with no vol and close
filename_pickled_df_c = 'df_close_clean'  # df close, dropped symbols with no vol and close
filename_pickled_perf_ranks_dict = 'perf_ranks_dict'  # dic of performance ranks
filename_pickled_ranked_all_perf_ranks = 'ranked_all_perf_ranks'  # ranked most common symbols from all performance ranks 

verbose = False  # True prints more output
look_back_days = -250 * 6  # subset df iloc days

In [16]:
print(f"Full path to pickled df Close cleaned:  {path_data_dump}{filename_pickled_df_c}")
df_c = pickle_load(path_data_dump, filename_pickled_df_c, verbose=verbose)
print(f"Full path to pickled df OHLCV cleaned:  {path_data_dump}{filename_pickled_df_a}")
df_a = pickle_load(path_data_dump, filename_pickled_df_a, verbose=verbose)

Full path to pickled df Close cleaned:  C:/Users/ping/MyDrive/stocks/yfinance/VSCode_dump/df_close_clean
Full path to pickled df OHLCV cleaned:  C:/Users/ping/MyDrive/stocks/yfinance/VSCode_dump/df_OHLCV_clean


In [17]:
perf_ranks_dict = {}
syms_perf_rank = []  # list of lists to store top 10 ranked symbols
_periods = [-15, -30, -60, -120, -240]
for _period in _periods:
  f_name = 'period' + str(_period)

  _df_c = df_c[_period::]
  symbols, period_yr, drawdown, UI, max_drawdown, returns_std, Std_UI, CAGR, CAGR_Std, CAGR_UI = \
      symb_perf_stats_vectorized(_df_c)
  caches_perf_stats_vect = []
  for symbol in symbols:
      date_first = drawdown.index[0].strftime('%Y-%m-%d')
      date_last = drawdown.index[-1].strftime('%Y-%m-%d')
      cache = (symbol, date_first, date_last, period_yr, CAGR[symbol],
              UI[symbol], Std_UI[symbol], CAGR_Std[symbol], CAGR_UI[symbol])
      # append performance data (tuple) to caches_perf_stats (list)
      caches_perf_stats_vect.append(cache)
  column_names = ['symbol', 'first date', 'last date', 'Year', 'CAGR',
                  'UI', 'Std/UI', 'CAGR/Std', 'CAGR/UI']

  # write symbols' performance stats to dataframe
  df_ps = pd.DataFrame(caches_perf_stats_vect, columns=column_names)
  df_ps['r_CAGR/UI'] = df_ps['CAGR/UI'].rank(ascending=False)
  df_ps['r_CAGR/Std'] = df_ps['CAGR/Std'].rank(ascending=False)
  df_ps['r_Std/UI'] = df_ps['Std/UI'].rank(ascending=False)
  
  _dict = {}
  cols_sort = ['r_CAGR/UI', 'r_CAGR/Std', 'r_Std/UI']
  print(f'{f_name} top 100 symbols')  
  for col in cols_sort:
    symbols_top_100 = df_ps.sort_values(by=[col]).head(100).symbol.values
    syms_perf_rank.append(list(symbols_top_100))
    print(f'{col}: {symbols_top_100}')
    _dict[col] = symbols_top_100
    perf_ranks_dict[f'{f_name}'] = _dict
  print(' ')

pickle_dump(perf_ranks_dict, path_data_dump, filename_pickled_perf_ranks_dict)
print(f'pickled perf_ranks_dict to: {path_data_dump}{filename_pickled_perf_ranks_dict}\n')
print(f'perf_ranks_dict:\n{perf_ranks_dict}\n')
# print(f'syms_perf_rank: {syms_perf_rank}')


period-15 top 100 symbols
r_CAGR/UI: ['NRIM' 'VCYT' 'PKOH' 'LOPE' 'AXON' 'AMKR' 'DMRC' 'RMBS' 'MLAB' 'GBX'
 'PKX' 'RCKY' 'LGND' 'ABMD' 'WNC' 'BA' 'RUN' 'AIMC' 'WYNN' 'ETD' 'LSCC'
 'AE' 'CIR' 'TSM' 'AAON' 'HEES' 'THRM' 'TSE' 'MTSI' 'XPO' 'NEOG' 'ACLS'
 'DD' 'SRI' 'ATNI' 'HOFT' 'HLI' 'SMG' 'DAN' 'GPRE' 'IVZ' 'SHOP' 'TSBK'
 'AU' 'SMCI' 'CHGG' 'PIPR' 'KLAC' 'SEIC' 'OSPN' 'LVS' 'NOMD' 'AEIS' 'SAIA'
 'BVH' 'YUMC' 'MOD' 'FICO' 'NYT' 'UFPT' 'NVDA' 'PZZA' 'CZR' 'TEX' 'MAC'
 'SKX' 'TPL' 'OII' 'NPO' 'RNR' 'CNK' 'NBHC' 'TEN' 'JD' 'GS' 'PRDO' 'LRCX'
 'AVNS' 'AMAT' 'SPWR' 'APTV' 'MYE' 'GVA' 'MCY' 'AMG' 'SONY' 'PDFS' 'PLNT'
 'PTMN' 'PRG' 'AMD' 'AXSM' 'HVT' 'GE' 'FOXF' 'DENN' 'PUK' 'QNST' 'GDO' 'E']
r_CAGR/Std: ['VCYT' 'DMRC' 'PKOH' 'AMKR' 'MLAB' 'LGND' 'CIR' 'GBX' 'RUN' 'AXON' 'WYNN'
 'RCKY' 'LSCC' 'RMBS' 'WNC' 'PKX' 'ETD' 'BA' 'TSE' 'SRI' 'ACLS' 'AIMC'
 'THRM' 'XPO' 'SHOP' 'TSM' 'HEES' 'SMCI' 'AE' 'SMG' 'NEOG' 'FICO' 'MTSI'
 'JD' 'AAON' 'HOFT' 'SAIA' 'ATNI' 'ABMD' 'AU' 'AXSM' 'GPRE' 'DAN' 'CHGG'
 'O

In [19]:
syms_perf_rank  # list of lists of top 100 rank
l_syms_perf_rank = [val for sublist in syms_perf_rank for val in sublist]  # flatten list of lists

from collections import Counter
cnt_symbol_freq = Counter(l_syms_perf_rank)  # count symbols and frequency
# print(cnt_symbol_freq) 
l_tuples = cnt_symbol_freq.most_common()  # convert to e.g [('AKRO', 6), ('IMVT', 4), ... ('ADEA', 3)]
symbols_ranked_all_perf_ranks = [symbol for symbol, count in l_tuples]  # select just the symbols without the frequency counts
symbols_ranked_all_perf_ranks  # list of most common symbols in syms_perf_rank in descending order
pickle_dump(symbols_ranked_all_perf_ranks, path_data_dump, filename_pickled_ranked_all_perf_ranks)
symbols_ranked_all_perf_ranks

['TPL',
 'NRIM',
 'TSBK',
 'AVEO',
 'ASC',
 'INSW',
 'LOPE',
 'AXON',
 'AAON',
 'MOD',
 'UFPT',
 'SANM',
 'RMBS',
 'AE',
 'RELL',
 'ELF',
 'ABMD',
 'WNC',
 'AIMC',
 'HLI',
 'CHGG',
 'GILD',
 'OMAB',
 'CHUY',
 'EME',
 'LANC',
 'HFWA',
 'STNG',
 'NPO',
 'IBA',
 'EURN',
 'SLB',
 'FTI',
 'FCBC',
 'GPC',
 'ANIK',
 'ADEA',
 'TNK',
 'AMKR',
 'DMRC',
 'THRM',
 'SRI',
 'MAC',
 'OII',
 'TEN',
 'MYE',
 'DENN',
 'OSBC',
 'RETA',
 'PLOW',
 'TR',
 'ODP',
 'STBA',
 'TBBK',
 'MEDP',
 'PRTA',
 'NTNX',
 'MYOV',
 'MODN',
 'ERIE',
 'NFLX',
 'DXCM',
 'AGYS',
 'TWNK',
 'TRQ',
 'VCYT',
 'GBX',
 'PKX',
 'BA',
 'ETD',
 'CIR',
 'ATNI',
 'DAN',
 'SMCI',
 'PIPR',
 'CZR',
 'PRDO',
 'AXSM',
 'CRAI',
 'AEHR',
 'RCL',
 'HALO',
 'TDW',
 'BBSI',
 'NBTB',
 'BIIB',
 'TTC',
 'HTBK',
 'EVI',
 'FSLR',
 'TCBK',
 'CCRN',
 'CHCO',
 'WING',
 'VRTX',
 'NBIX',
 'MGI',
 'DGII',
 'CPRX',
 'CCBG',
 'TNP',
 'HEES',
 'XPO',
 'DD',
 'HOFT',
 'SMG',
 'OSPN',
 'FICO',
 'TEX',
 'RNR',
 'GE',
 'E',
 'WLFC',
 'MGRC',
 'RFP',
 'CAT',
 'UNFI'