In [1]:
# https://towardsdatascience.com/pandas-groupby-a-simple-but-detailed-tutorial-314b8f37005d
# https://towardsdatascience.com/accessing-data-in-a-multiindex-dataframe-in-pandas-569e8767201d
# https://towardsdatascience.com/summarizing-data-with-pandas-crosstab-efc8b9abecf
# https://towardsdatascience.com/how-to-flatten-multiindex-columns-and-rows-in-pandas-f5406c50e569
# https://datascientyst.com/list-aggregation-functions-aggfunc-groupby-pandas/
# https://stackoverflow.com/questions/25929319/how-to-iterate-over-pandas-multiindex-dataframe-using-index
# https://stackoverflow.com/questions/24495695/pandas-get-unique-multiindex-level-values-by-label
# https://stackoverflow.com/questions/55706391/pandas-crosstab-on-multiple-columns-then-groupby

# https://matplotlib.org/stable/gallery/pyplots/pyplot_text.html#sphx-glr-gallery-pyplots-pyplot-text-py

In [2]:
import pandas as pd
import numpy as np
from myUtils import pickle_load, pickle_dump, symb_perf_stats_vectorized

path_dir = "C:/Users/ping/MyDrive/stocks/yfinance/"
path_data_dump = path_dir + "VSCode_dump/"

f_pickled_df_a = 'df_OHLCV_clean'  # df adjusted OHLCV, dropped symbols with no vol and close
f_pickled_df_c = 'df_close_clean'  # df close, dropped symbols with no vol and close
f_pickled_perf_ranks_dict = 'perf_ranks_dict'  # dic of performance ranks
f_pickled_ranked_perf_ranks_dict = 'ranked_perf_ranks_dict'  # key: periods, value: ranked_perf_ranks

verbose = False  # True prints more output
look_back_days = -250 * 6  # subset df iloc days

In [3]:
print(f"Full path to pickled df Close cleaned:  {path_data_dump}{f_pickled_df_c}")
df_c = pickle_load(path_data_dump, f_pickled_df_c, verbose=verbose)
print(f"Full path to pickled df OHLCV cleaned:  {path_data_dump}{f_pickled_df_a}")
df_a = pickle_load(path_data_dump, f_pickled_df_a, verbose=verbose)

Full path to pickled df Close cleaned:  C:/Users/ping/MyDrive/stocks/yfinance/VSCode_dump/df_close_clean
Full path to pickled df OHLCV cleaned:  C:/Users/ping/MyDrive/stocks/yfinance/VSCode_dump/df_OHLCV_clean


In [4]:
perf_ranks_dict = {}  # dic of performance ranks
syms_perf_rank = []  # list of lists to store top 100 ranked symbols
_periods = [-15, -30, -60, -120, -240]
for _period in _periods:
  f_name = 'period' + str(_period)

  _df_c = df_c[_period::]
  symbols, period_yr, drawdown, UI, max_drawdown, returns_std, Std_UI, CAGR, CAGR_Std, CAGR_UI = \
      symb_perf_stats_vectorized(_df_c)
  caches_perf_stats_vect = []
  for symbol in symbols:
      date_first = drawdown.index[0].strftime('%Y-%m-%d')
      date_last = drawdown.index[-1].strftime('%Y-%m-%d')
      cache = (symbol, date_first, date_last, period_yr, CAGR[symbol],
              UI[symbol], Std_UI[symbol], CAGR_Std[symbol], CAGR_UI[symbol])
      # append performance data (tuple) to caches_perf_stats (list)
      caches_perf_stats_vect.append(cache)
  column_names = ['symbol', 'first date', 'last date', 'Year', 'CAGR',
                  'UI', 'Std/UI', 'CAGR/Std', 'CAGR/UI']

  # write symbols' performance stats to dataframe
  df_ps = pd.DataFrame(caches_perf_stats_vect, columns=column_names)
  df_ps['r_CAGR/UI'] = df_ps['CAGR/UI'].rank(ascending=False)
  df_ps['r_CAGR/Std'] = df_ps['CAGR/Std'].rank(ascending=False)
  df_ps['r_Std/UI'] = df_ps['Std/UI'].rank(ascending=False)
  
  _dict = {}
  cols_sort = ['r_CAGR/UI', 'r_CAGR/Std', 'r_Std/UI']
  # print(f'{f_name} top 100 symbols')  
  for col in cols_sort:
    symbols_top_100 = df_ps.sort_values(by=[col]).head(100).symbol.values
    syms_perf_rank.append(list(symbols_top_100))
    # print(f'{col}: {symbols_top_100}')
    _dict[col] = symbols_top_100
    perf_ranks_dict[f'{f_name}'] = _dict
  # print(' ')

pickle_dump(perf_ranks_dict, path_data_dump, f_pickled_perf_ranks_dict)
print(f'perf_ranks_dict:\n{perf_ranks_dict}\n')

perf_ranks_dict:
{'period-15': {'r_CAGR/UI': array(['JD', 'GDS', 'ABMD', 'AXON', 'WB', 'AMKR', 'ASPS', 'NRIM', 'PKOH',
       'RUN', 'DMRC', 'YUMC', 'RCKY', 'TGLS', 'LGND', 'TSM', 'HTHT',
       'WYNN', 'MLAB', 'CROX', 'VCYT', 'BABA', 'THRM', 'NTES', 'ZTO',
       'NYT', 'CHN', 'SPWR', 'RMBS', 'TSBK', 'PUK', 'ACLS', 'LSCC',
       'AAON', 'HOFT', 'UFPT', 'AU', 'PKX', 'CMPR', 'BHP', 'DD', 'BIDU',
       'LVS', 'WNC', 'OSPN', 'PDFS', 'SEDG', 'SCCO', 'TSE', 'RIO', 'MCY',
       'TCOM', 'SMCI', 'MDRX', 'AZTA', 'CHGG', 'BA', 'AE', 'QNST', 'SKX',
       'PTMN', 'GDO', 'CIR', 'PRDO', 'AMG', 'CRAI', 'AVNS', 'BVH', 'FSLR',
       'ELF', 'SONY', 'MAC', 'Z', 'FTI', 'FMX', 'IFN', 'NEOG', 'DK',
       'IVZ', 'WLFC', 'DIOD', 'EMF', 'EXAS', 'WPP', 'MYI', 'VALE', 'APAM',
       'NOMD', 'GPRE', 'PIPR', 'GS', 'WOR', 'OMAB', 'SMG', 'MTSI', 'SAIA',
       'CNK', 'CNHI', 'TCPC', 'UFCS'], dtype=object), 'r_CAGR/Std': array(['GDS', 'JD', 'ASPS', 'WB', 'AMKR', 'RUN', 'DMRC', 'HTHT', 'ABMD',
       'LGND', 'PK

In [5]:
syms_perf_rank  # list of lists of top 100 rank
l_syms_perf_rank = [val for sublist in syms_perf_rank for val in sublist]  # flatten list of lists

from collections import Counter
cnt_symbol_freq = Counter(l_syms_perf_rank)  # count symbols and frequency
# print(cnt_symbol_freq) 
l_tuples = cnt_symbol_freq.most_common()  # convert to e.g [('AKRO', 6), ('IMVT', 4), ... ('ADEA', 3)]
symbols_ranked_perf_ranks = [symbol for symbol, count in l_tuples]  # select just the symbols without the frequency counts
symbols_ranked_perf_ranks  # list of most common symbols in syms_perf_rank in descending order

ranked_perf_ranks_dict ={}
f_name = f'ranked_perf_ranks_period' + str(_periods)  # key name, ranked_perf_ranks_dict
ranked_perf_ranks_dict[f'{f_name}'] = symbols_ranked_perf_ranks # values: list of most common symbols in all performance ranks in descending order
pickle_dump(ranked_perf_ranks_dict, path_data_dump, f_pickled_ranked_perf_ranks_dict)
print(f'ranked_perf_ranks_dict:\n{ranked_perf_ranks_dict}\n')

ranked_perf_ranks_dict:
{'ranked_perf_ranks_period[-15, -30, -60, -120, -240]': ['ELF', 'TSBK', 'UFPT', 'AVEO', 'ASC', 'TPL', 'AXON', 'NRIM', 'AAON', 'INSW', 'RMBS', 'WNC', 'CHGG', 'FTI', 'ABMD', 'AE', 'LOPE', 'MOD', 'CHUY', 'GILD', 'LANC', 'EME', 'HFWA', 'STNG', 'AMKR', 'ASPS', 'SMCI', 'CRAI', 'FSLR', 'FMX', 'OMAB', 'FTSM', 'IBA', 'RENN', 'OSBC', 'EURN', 'ADEA', 'ERIE', 'TNK', 'SANM', 'TWNK', 'CROX', 'THRM', 'DD', 'CIR', 'MAC', 'PIPR', 'PLOW', 'AIMC', 'TR', 'PRTA', 'MYOV', 'ANIK', 'NTNX', 'MODN', 'FCBC', 'ORLY', 'AGYS', 'CHCO', 'DXCM', 'MGI', 'TNP', 'GPC', 'DMRC', 'HOFT', 'OSPN', 'PRDO', 'GS', 'NPO', 'MCRI', 'STRA', 'ODP', 'EXTR', 'LPG', 'MEDP', 'ROL', 'BIIB', 'ISEE', 'TTC', 'EVI', 'HTBK', 'STBA', 'TBBK', 'NFLX', 'PBT', 'VRTX', 'NBIX', 'NOC', 'LLY', 'DGII', 'CPRX', 'CCBG', 'RELL', 'HURN', 'SWIR', 'VCYT', 'QNST', 'DK', 'SMG', 'AXSM', 'NBHC', 'UNFI', 'ITGR', 'AFL', 'TRQ', 'OII', 'PDS', 'PARR', 'RNR', 'DENN', 'SAP', 'NVEC', 'FRO', 'TDW', 'SLB', 'HALO', 'NBTB', 'FIX', 'MRK', 'FBIZ', 'LHCG