In [1]:
def symb_perf_stats_vectorized_v1(df_symbols_close):
    """Takes dataframe of symbols' close and returns symbols, period_yr,
       drawdown, UI, max_drawdown, returns_std, Std_UI, CAGR, CAGR_Std, CAGR_UI
       https://stackoverflow.com/questions/36750571/calculate-max-draw-down-with-a-vectorized-solution-in-python
       http://www.tangotools.com/ui/ui.htm
       Calculation CHECKED against: http://www.tangotools.com/ui/UlcerIndex.xls
       Calculation VERIFIED in: symb_perf_stats_vectorized.ipynb

    Args:
        df_symbols_close(dataframe): dataframe with date as index,
          symbol's close in columns, and symbols as column names.

    Return:
        symbols(pandas.core.indexes.base.Index): stock symbols
        period_yr(float): years, (days in dataframe) / 252
        drawdown(numpy array): drawdown from peak, 0.05 means 5% drawdown,
            with date index and symbols as column names
        UI(pandas.series float64): ulcer-index
        max_drawdown(pandas series float64): maximum drawdown from peak
        returns_std(pandas series float64): standard deviation of daily returns
        Std_UI(pandas series float64): returns_std / UI
        CAGR(pandas series float64): compounded annual growth rate
        CAGR_Std(pandas series float64): CAGR / returns_std
        CAGR_UI(pandas series float64): CAGR / UI
    """
    # v1 convert drawdown from pandas series to numpy array

    import numpy as np
    import pandas as pd

    symbols = df_symbols_close.columns
    df_symbols_returns = df_symbols_close / df_symbols_close.shift(1) - 1
    # standard deviation divisor is N - ddof
    returns_std = df_symbols_returns.std(ddof=1)

    # +++ SET RETURNS OF FIRST ROW = 0,
    #  otherwise drawdown calculation starts with the second row
    df_symbols_returns.iloc[0] = 0
    cum_returns = (1 + df_symbols_returns).cumprod()
    
    drawdown = cum_returns.div(cum_returns.cummax()) - 1
    # convert from pandas Series into a NumPy array
    drawdown = np.array(drawdown)

    max_drawdown = drawdown.min()

    UI = np.sqrt(np.sum(np.square(drawdown), axis=0) / len(drawdown))    
    UI = pd.Series(UI, index=symbols)

    Std_UI = returns_std / UI
    period_yr = len(df_symbols_close) / 252  # 252 trading days per year
    CAGR = (df_symbols_close.iloc[-1] / df_symbols_close.iloc[0]) \
        ** (1 / period_yr) - 1
    CAGR_Std = CAGR / returns_std
    CAGR_UI = CAGR / UI

    return symbols, period_yr, drawdown, UI, max_drawdown, \
        returns_std, Std_UI, CAGR, CAGR_Std, CAGR_UI

In [2]:
import pandas as pd
# from myUtils import pickle_load, symb_perf_stats_vectorized, symb_perf_stats_vectorized_v1
from myUtils import pickle_load, symb_perf_stats_vectorized

file_close = 'df_close_clean'
path_dir = "C:/Users/ping/MyDrive/stocks/yfinance"
path_data_dump = path_dir + "/VSCode_dump/"

df_c = pickle_load(path_data_dump, file_close)
df_c = df_c[-252::]
format1 = 'df_c({}):\n{}\n{}'.format(len(df_c), df_c.head(3), df_c.tail(3))
# print(format1)

In [3]:
%%timeit -n 20 -r 17
symbols, period_yr, drawdown, UI, max_drawdown, returns_std, Std_UI, CAGR, CAGR_Std, CAGR_UI = \
    symb_perf_stats_vectorized(df_c)

91 ms ± 17.5 ms per loop (mean ± std. dev. of 17 runs, 20 loops each)


In [4]:
%%timeit -n 20 -r 17
symbols, period_yr, drawdown, UI, max_drawdown, returns_std, Std_UI, CAGR, CAGR_Std, CAGR_UI = \
    symb_perf_stats_vectorized_v1(df_c)

71.7 ms ± 1.68 ms per loop (mean ± std. dev. of 17 runs, 20 loops each)


In [5]:
def perf_eval_0(df_close):
  '''
  df_close is a dataframe with date index, columns of symbols' closing price, and symbol as column name 
  '''
  # %%timeit
  symbols, period_yr, drawdown, UI, max_drawdown, returns_std, Std_UI, CAGR, CAGR_Std, CAGR_UI = \
      symb_perf_stats_vectorized(df_close)
  caches_perf_stats_vect = []
  for symbol in symbols:
      date_first = drawdown.index[0].strftime('%Y-%m-%d')
      date_last = drawdown.index[-1].strftime('%Y-%m-%d')
      cache = (symbol, date_first, date_last, period_yr, CAGR[symbol],
              UI[symbol], Std_UI[symbol], CAGR_Std[symbol], CAGR_UI[symbol])
      # append performance data (tuple) to caches_perf_stats (list)
      caches_perf_stats_vect.append(cache)
  column_names = ['symbol', 'first date', 'last date', 'Year', 'CAGR',
                  'UI', 'Std/UI', 'CAGR/Std', 'CAGR/UI']
  # write symbols' performance stats to dataframe
  df_perf = pd.DataFrame(caches_perf_stats_vect, columns=column_names)

  _cols = ['CAGR', 'UI', 'Std/UI', 'CAGR/Std', 'CAGR/UI']
  # print(f'df_perf mean:\n{df_perf[_cols].mean()}\n')
  # print(f'df_perf std:\n{df_perf[_cols].std()}')
  grp_CAGRUI_mean = df_perf['CAGR/UI'].mean()
  grp_CAGRUI_std = df_perf['CAGR/UI'].std()
  grp_CAGRUI_mean_std = grp_CAGRUI_mean / grp_CAGRUI_std
  grp_CAGRUI_mean_std

  grp_CAGRStd_mean = df_perf['CAGR/Std'].mean()
  grp_CAGRStd_std = df_perf['CAGR/Std'].std()
  grp_CAGRStd_mean_std = grp_CAGRStd_mean / grp_CAGRStd_std
  grp_CAGRStd_mean_std

  grp_StdUI_mean = df_perf['Std/UI'].mean()
  grp_StdUI_std = df_perf['Std/UI'].std()
  grp_StdUI_mean_std = grp_StdUI_mean / grp_StdUI_std
  grp_StdUI_mean_std

  return df_perf, grp_StdUI_mean_std, grp_CAGRStd_mean_std, grp_CAGRUI_mean_std

In [6]:
df_perf, grp_StdUI_mean_std, grp_CAGRStd_mean_std, grp_CAGRUI_mean_std = perf_eval_0(df_c)
print(f'grp_StdUI_mean_std :{grp_StdUI_mean_std:.6f}')
print(f'grp_CAGRStd_mean_std :{grp_CAGRStd_mean_std:.6f}')
print(f'grp_CAGRUI_mean_std :{grp_CAGRUI_mean_std:.6f}')
df_perf.head()

grp_StdUI_mean_std :2.037027
grp_CAGRStd_mean_std :-0.282575
grp_CAGRUI_mean_std :0.069782


Unnamed: 0,symbol,first date,last date,Year,CAGR,UI,Std/UI,CAGR/Std,CAGR/UI
0,A,2021-12-17,2022-12-16,1.0,-0.007686,0.182331,0.122173,-0.345043,-0.042155
1,AA,2021-12-17,2022-12-16,1.0,-0.199027,0.399799,0.104572,-4.760511,-0.497818
2,AAL,2021-12-17,2022-12-16,1.0,-0.244248,0.2503,0.141526,-6.894994,-0.975819
3,AAON,2021-12-17,2022-12-16,1.0,-0.013452,0.266528,0.09235,-0.546513,-0.05047
4,AAP,2021-12-17,2022-12-16,1.0,-0.376316,0.214052,0.109728,-16.021879,-1.758053


In [7]:
def perf_eval_1(df_close):
  '''
  df_close is a dataframe with date index, columns of symbols' closing price, and symbol as column name 
  '''
  # %%timeit
  symbols, period_yr, drawdown, UI, max_drawdown, returns_std, Std_UI, CAGR, CAGR_Std, CAGR_UI = \
      symb_perf_stats_vectorized_v1(df_close)
  caches_perf_stats_vect = []
  for symbol in symbols:
      # date_first = drawdown.index[0].strftime('%Y-%m-%d')
      # date_last = drawdown.index[-1].strftime('%Y-%m-%d')
      date_first = df_close.index[0].strftime('%Y-%m-%d')
      date_last = df_close.index[-1].strftime('%Y-%m-%d')

      cache = (symbol, date_first, date_last, period_yr, CAGR[symbol],
              UI[symbol], Std_UI[symbol], CAGR_Std[symbol], CAGR_UI[symbol])
      # append performance data (tuple) to caches_perf_stats (list)
      caches_perf_stats_vect.append(cache)
  column_names = ['symbol', 'first date', 'last date', 'Year', 'CAGR',
                  'UI', 'Std/UI', 'CAGR/Std', 'CAGR/UI']
  # write symbols' performance stats to dataframe
  df_perf = pd.DataFrame(caches_perf_stats_vect, columns=column_names)

  _cols = ['CAGR', 'UI', 'Std/UI', 'CAGR/Std', 'CAGR/UI']
  # print(f'df_perf mean:\n{df_perf[_cols].mean()}\n')
  # print(f'df_perf std:\n{df_perf[_cols].std()}')
  grp_CAGRUI_mean = df_perf['CAGR/UI'].mean()
  grp_CAGRUI_std = df_perf['CAGR/UI'].std()
  grp_CAGRUI_mean_std = grp_CAGRUI_mean / grp_CAGRUI_std
  grp_CAGRUI_mean_std

  grp_CAGRStd_mean = df_perf['CAGR/Std'].mean()
  grp_CAGRStd_std = df_perf['CAGR/Std'].std()
  grp_CAGRStd_mean_std = grp_CAGRStd_mean / grp_CAGRStd_std
  grp_CAGRStd_mean_std

  grp_StdUI_mean = df_perf['Std/UI'].mean()
  grp_StdUI_std = df_perf['Std/UI'].std()
  grp_StdUI_mean_std = grp_StdUI_mean / grp_StdUI_std
  grp_StdUI_mean_std

  return df_perf, grp_StdUI_mean_std, grp_CAGRStd_mean_std, grp_CAGRUI_mean_std

In [8]:
df_perf, grp_StdUI_mean_std, grp_CAGRStd_mean_std, grp_CAGRUI_mean_std = perf_eval_1(df_c)
print(f'grp_StdUI_mean_std :{grp_StdUI_mean_std:.6f}')
print(f'grp_CAGRStd_mean_std :{grp_CAGRStd_mean_std:.6f}')
print(f'grp_CAGRUI_mean_std :{grp_CAGRUI_mean_std:.6f}')
df_perf.head()

grp_StdUI_mean_std :2.037027
grp_CAGRStd_mean_std :-0.282575
grp_CAGRUI_mean_std :0.069782


Unnamed: 0,symbol,first date,last date,Year,CAGR,UI,Std/UI,CAGR/Std,CAGR/UI
0,A,2021-12-17,2022-12-16,1.0,-0.007686,0.182331,0.122173,-0.345043,-0.042155
1,AA,2021-12-17,2022-12-16,1.0,-0.199027,0.399799,0.104572,-4.760511,-0.497818
2,AAL,2021-12-17,2022-12-16,1.0,-0.244248,0.2503,0.141526,-6.894994,-0.975819
3,AAON,2021-12-17,2022-12-16,1.0,-0.013452,0.266528,0.09235,-0.546513,-0.05047
4,AAP,2021-12-17,2022-12-16,1.0,-0.376316,0.214052,0.109728,-16.021879,-1.758053


In [15]:
_cols2 = ['AE', 'ABMD', 'RMBS', 'WNC']

In [19]:
%%timeit -n 30 -r 20
df_temp = df_c[_cols2].copy()
df_perf, grp_StdUI_mean_std, grp_CAGRStd_mean_std, grp_CAGRUI_mean_std = perf_eval_1(df_temp)

6.56 ms ± 695 µs per loop (mean ± std. dev. of 20 runs, 30 loops each)


In [21]:
print(f'grp_StdUI_mean_std: {grp_StdUI_mean_std:.6f}')
print(f'grp_CAGRStd_mean_std: {grp_CAGRStd_mean_std:.6f}')
print(f'grp_CAGRUI_mean_std: {grp_CAGRUI_mean_std:.6f}')
df_perf.head()

grp_StdUI_mean_std: 4.385780
grp_CAGRStd_mean_std: 1.804749
grp_CAGRUI_mean_std: 1.442305


Unnamed: 0,symbol,first date,last date,Year,CAGR,UI,Std/UI,CAGR/Std,CAGR/UI
0,AE,2021-12-17,2022-12-16,1.0,0.517507,0.136025,0.184495,20.621272,3.804512
1,ABMD,2021-12-17,2022-12-16,1.0,0.207416,0.225783,0.187439,4.901061,0.918649
2,RMBS,2021-12-17,2022-12-16,1.0,0.354357,0.202122,0.127407,13.760481,1.753179
3,WNC,2021-12-17,2022-12-16,1.0,0.273579,0.242853,0.122113,9.225254,1.12652


In [20]:
%%timeit -n 30 -r 20
_df = df_perf.loc[df_perf['symbol'].isin(_cols2)]
_means = _df[['Std/UI',	'CAGR/Std', 'CAGR/UI']].mean()
_stds = _df[['Std/UI',	'CAGR/Std', 'CAGR/UI']].std()
_mean_std = _means / _stds

3.15 ms ± 507 µs per loop (mean ± std. dev. of 20 runs, 30 loops each)


In [23]:
# print(_mean_std, _mean_std[0], _mean_std[1], _mean_std[2])
print(f'grp_StdUI_mean_std: {_mean_std[0]:.6f}')
print(f'grp_CAGRStd_mean_std: {_mean_std[1]:.6f}')
print(f'grp_CAGRUI_mean_std: {_mean_std[2]:.6f}')

grp_StdUI_mean_std: 4.385780
grp_CAGRStd_mean_std: 1.804749
grp_CAGRUI_mean_std: 1.442305


In [None]:
df_SPY = df_c[['SPY']].copy()
df_SPY
df_perf, grp_StdUI_mean_std, grp_CAGRStd_mean_std, grp_CAGRUI_mean_std = perf_eval(df_SPY)
print(f'grp_StdUI_mean_std: {grp_StdUI_mean_std:.6f}')
print(f'grp_CAGRStd_mean_std: {grp_CAGRStd_mean_std:.6f}')
print(f'grp_CAGRUI_mean_std: {grp_CAGRUI_mean_std:.6f}')
df_perf.head()