In [106]:
def symb_perf_stats_vectorized_v1(df_symbols_close):
    """Takes dataframe of symbols' close and returns symbols, period_yr,
       drawdown, UI, max_drawdown, returns_std, Std_UI, CAGR, CAGR_Std, CAGR_UI
       https://stackoverflow.com/questions/36750571/calculate-max-draw-down-with-a-vectorized-solution-in-python
       http://www.tangotools.com/ui/ui.htm
       Calculation CHECKED against: http://www.tangotools.com/ui/UlcerIndex.xls
       Calculation VERIFIED in: symb_perf_stats_vectorized.ipynb

    Args:
        df_symbols_close(dataframe): dataframe with date as index,
          symbol's close in columns, and symbols as column names.

    Return:
        symbols(pandas.core.indexes.base.Index): stock symbols
        period_yr(float): years, (days in dataframe) / 252
        drawdown(numpy array): drawdown from peak, 0.05 means 5% drawdown,
            with date index and symbols as column names
        UI(pandas.series float64): ulcer-index
        max_drawdown(pandas series float64): maximum drawdown from peak
        returns_std(pandas series float64): standard deviation of daily returns
        Std_UI(pandas series float64): returns_std / UI
        CAGR(pandas series float64): compounded annual growth rate
        CAGR_Std(pandas series float64): CAGR / returns_std
        CAGR_UI(pandas series float64): CAGR / UI
    """
    # v1 convert drawdown from pandas series to numpy array

    import numpy as np
    import pandas as pd

    symbols = df_symbols_close.columns
    df_symbols_returns = df_symbols_close / df_symbols_close.shift(1) - 1
    # standard deviation divisor is N - ddof
    returns_std = df_symbols_returns.std(ddof=1)

    # +++ SET RETURNS OF FIRST ROW = 0,
    #  otherwise drawdown calculation starts with the second row
    df_symbols_returns.iloc[0] = 0
    cum_returns = (1 + df_symbols_returns).cumprod()
    
    drawdown = cum_returns.div(cum_returns.cummax()) - 1
    # convert from pandas Series into a NumPy array
    drawdown = np.array(drawdown)
    max_drawdown = drawdown.min(axis=0)    
    UI = np.sqrt(np.sum(np.square(drawdown), axis=0) / len(drawdown))    

    # convert NumPy array to pandas dataframe
    drawdown = pd.DataFrame(drawdown, index=df_symbols_close.index, columns=symbols)    
    # convert NumPy array to pandas series    
    max_drawdown = pd.Series(max_drawdown, index=symbols)        
    UI = pd.Series(UI, index=symbols)

    Std_UI = returns_std / UI
    period_yr = len(df_symbols_close) / 252  # 252 trading days per year
    CAGR = (df_symbols_close.iloc[-1] / df_symbols_close.iloc[0]) \
        ** (1 / period_yr) - 1
    CAGR_Std = CAGR / returns_std
    CAGR_UI = CAGR / UI

    return symbols, period_yr, drawdown, UI, max_drawdown, \
        returns_std, Std_UI, CAGR, CAGR_Std, CAGR_UI

In [107]:
def symb_perf_stats_vectorized_v2(df_symbols_close):
    """Takes dataframe of symbols' close and returns symbols, period_yr,
       drawdown, UI, max_drawdown, returns_std, Std_UI, CAGR, CAGR_Std, CAGR_UI
       https://stackoverflow.com/questions/36750571/calculate-max-draw-down-with-a-vectorized-solution-in-python
       http://www.tangotools.com/ui/ui.htm
       Calculation CHECKED against: http://www.tangotools.com/ui/UlcerIndex.xls
       Calculation VERIFIED in: symb_perf_stats_vectorized.ipynb

    Args:
        df_symbols_close(dataframe): dataframe with date as index,
          symbol's close in columns, and symbols as column names.

    Return:
        symbols(pandas.core.indexes.base.Index): stock symbols
        period_yr(float): years, (days in dataframe) / 252
        drawdown(pandas dataframe): drawdown from peak, 0.05 means 5% drawdown,
            with date index and symbols as column names
        UI(pandas.series float64): ulcer-index
        max_drawdown(pandas series float64): maximum drawdown from peak
        returns_std(pandas series float64): standard deviation of daily returns
        Std_UI(pandas series float64): returns_std / UI
        CAGR(pandas series float64): compounded annual growth rate
        CAGR_Std(pandas series float64): CAGR / returns_std
        CAGR_UI(pandas series float64): CAGR / UI
    """
    # v1 convert drawdown from pandas series to numpy array

    import numpy as np
    import pandas as pd

    # symbols = df_symbols_close.columns
    # df_symbols_returns = df_symbols_close / df_symbols_close.shift(1) - 1
    # # standard deviation divisor is N - ddof
    # returns_std = df_symbols_returns.std(ddof=1)

    # # +++ SET RETURNS OF FIRST ROW = 0,
    # #  otherwise drawdown calculation starts with the second row
    # df_symbols_returns.iloc[0] = 0
    # cum_returns = (1 + df_symbols_returns).cumprod()
    
    # drawdown = cum_returns.div(cum_returns.cummax()) - 1
    # # convert from pandas Series into a NumPy array
    # drawdown = np.array(drawdown)
    # max_drawdown = drawdown.min(axis=0)    
    # UI = np.sqrt(np.sum(np.square(drawdown), axis=0) / len(drawdown))    

    # # convert NumPy array to pandas dataframe
    # drawdown = pd.DataFrame(drawdown, index=df_symbols_close.index, columns=symbols)    
    # # convert NumPy array to pandas series    
    # max_drawdown = pd.Series(max_drawdown, index=symbols)        
    # UI = pd.Series(UI, index=symbols)

    # Std_UI = returns_std / UI
    # period_yr = len(df_symbols_close) / 252  # 252 trading days per year
    # CAGR = (df_symbols_close.iloc[-1] / df_symbols_close.iloc[0]) \
    #     ** (1 / period_yr) - 1
    # CAGR_Std = CAGR / returns_std
    # CAGR_UI = CAGR / UI


    # df_symbols_close = df_c.copy()


    dates = df_symbols_close.index
    symbols = df_symbols_close.columns

    arr = df_symbols_close.to_numpy()
    arr_returns = arr / np.roll(arr, 1, axis=0) - 1
    arr_returns_std = np.std(arr_returns[1:, :], axis=0, ddof=1) 
    arr_returns[0] = 0 
    arr_cum_returns = (1 + arr_returns).cumprod(axis=0)
    arr_drawdown = arr_cum_returns / np.maximum.accumulate(arr_cum_returns, axis=0) -1
    arr_max_drawdown = arr_drawdown.min(axis=0)    
    arr_UI = np.sqrt(np.sum(np.square(arr_drawdown), axis=0) / len(arr_drawdown))    
    arr_Std_UI = arr_returns_std / arr_UI
    Std_UI = pd.Series(arr_Std_UI, index=symbols)
    period_yr = len(df_symbols_close) / 252  # 252 trading days per year
    arr_CAGR = (arr[-1] / arr[0]) ** (1 / period_yr) - 1
    arr_CAGR_Std = arr_CAGR / arr_returns_std
    arr_CAGR_UI = arr_CAGR / arr_UI

    # add date index and symbols as column names
    drawdown = pd.DataFrame(arr_drawdown, index=dates, columns=symbols)
    UI = pd.Series(arr_UI, index=symbols)
    max_drawdown = pd.Series(arr_max_drawdown, index=symbols)
    returns_std = pd.Series(arr_returns_std, index=symbols)
    CAGR = pd.Series(arr_CAGR, index=symbols)
    CAGR_Std = pd.Series(arr_CAGR_Std, index=symbols)
    CAGR_UI = pd.Series(arr_CAGR_UI, index=symbols)

    return symbols, period_yr, drawdown, UI, max_drawdown, \
        returns_std, Std_UI, CAGR, CAGR_Std, CAGR_UI

In [108]:
import pandas as pd
import numpy as np
# from myUtils import pickle_load, symb_perf_stats_vectorized, symb_perf_stats_vectorized_v1
from myUtils import pickle_load, symb_perf_stats_vectorized

file_close = 'df_close_clean'
path_dir = "C:/Users/ping/MyDrive/stocks/yfinance"
path_data_dump = path_dir + "/VSCode_dump/"

df_c = pickle_load(path_data_dump, file_close)
df_c = df_c[-252::]
format1 = 'df_c({}):\n{}\n{}'.format(len(df_c), df_c.head(3), df_c.tail(3))
# print(format1)

In [None]:
df_symbols_close = df_c.copy()
dates = df_symbols_close.index
symbols = df_symbols_close.columns

In [None]:
df_c

In [None]:
%%timeit -n 20 -r 17
arr = df_symbols_close.to_numpy()
arr_returns = arr / np.roll(arr, 1, axis=0) - 1
arr_returns_std = np.std(arr_returns[1:, :], axis=0, ddof=1) 
arr_returns[0] = 0 
arr_cum_returns = (1 + arr_returns).cumprod(axis=0)
arr_drawdown = arr_cum_returns / np.maximum.accumulate(arr_cum_returns, axis=0) -1
arr_max_drawdown = arr_drawdown.min(axis=0)    
arr_max_drawdown
arr_UI = np.sqrt(np.sum(np.square(arr_drawdown), axis=0) / len(arr_drawdown))    
arr_Std_UI = arr_returns_std / arr_UI
arr_Std_UI
arr_Std_UI = pd.Series(arr_Std_UI, index=symbols)
arr_period_yr = len(df_symbols_close) / 252  # 252 trading days per year
arr_CAGR = (arr[-1] / arr[0]) ** (1 / arr_period_yr) - 1
arr_CAGR_Std = arr_CAGR / arr_returns_std
arr_CAGR_UI = arr_CAGR / arr_UI

# add date index and symbols as column names
arr_drawdown = pd.DataFrame(arr_drawdown, index=dates, columns=symbols)
arr_UI = pd.Series(arr_UI, index=symbols)
arr_max_drawdown = pd.Series(arr_max_drawdown, index=symbols)
arr_returns_std = pd.Series(arr_returns_std, index=symbols)
arr_CAGR = pd.Series(arr_CAGR, index=symbols)
arr_CAGR_Std = pd.Series(arr_CAGR_Std, index=symbols)
arr_CAGR_UI = pd.Series(arr_CAGR_UI, index=symbols)

In [None]:
print(f'symbols:\n{symbols}')
print(f'arr_period_yr:\n{arr_period_yr}')
print(f'arr_drawdown:\n{arr_drawdown}')
print(f'arr_UI:\n{arr_UI}')
print(f'arr_max_drawdown:\n{arr_max_drawdown}')
print(f'arr_returns_std:\n{arr_returns_std}')
print(f'arr_Std_UI:\n{arr_Std_UI}')
print(f'arr_CAGR:\n{arr_CAGR}')
print(f'arr_CAGR_Std:\n{arr_CAGR_Std}')
print(f'arr_CAGR_UI:\n{arr_CAGR_UI}')

In [None]:
%%timeit -n 20 -r 17
df_symbols_returns = df_symbols_close / df_symbols_close.shift(1) - 1
returns_std = df_symbols_returns.std(ddof=1)
# returns_std
df_symbols_returns.iloc[0] = 0
# df_symbols_returns
cum_returns = (1 + df_symbols_returns).cumprod()
# cum_returns.cummax()
drawdown = cum_returns.div(cum_returns.cummax()) - 1
drawdown = np.array(drawdown)
max_drawdown = drawdown.min(axis=0)    
UI = np.sqrt(np.sum(np.square(drawdown), axis=0) / len(drawdown))
Std_UI = returns_std / UI
period_yr = len(df_symbols_close) / 252  # 252 trading days per year
CAGR = (df_symbols_close.iloc[-1] / df_symbols_close.iloc[0]) \
    ** (1 / period_yr) - 1
CAGR_Std = CAGR / returns_std
CAGR_UI = CAGR / UI

In [None]:
print(f'symbols:\n{symbols}')
print(f'period_yr:\n{period_yr}')
print(f'drawdown:\n{drawdown}')
print(f'UI:\n{UI}')
print(f'max_drawdown:\n{max_drawdown}')
print(f'returns_std:\n{returns_std}')
print(f'Std_UI:\n{Std_UI}')
print(f'CAGR:\n{CAGR}')
print(f'CAGR_Std:\n{CAGR_Std}')
print(f'CAGR_UI:\n{CAGR_UI}')

In [113]:
%%timeit -n 20 -r 17
symbols, period_yr, drawdown, UI, max_drawdown, returns_std, Std_UI, CAGR, CAGR_Std, CAGR_UI = \
    symb_perf_stats_vectorized(df_c)

91.2 ms ± 16.2 ms per loop (mean ± std. dev. of 17 runs, 20 loops each)


In [114]:
%%timeit -n 20 -r 17
symbols, period_yr, drawdown, UI, max_drawdown, returns_std, Std_UI, CAGR, CAGR_Std, CAGR_UI = \
    symb_perf_stats_vectorized_v1(df_c)

73.9 ms ± 4.88 ms per loop (mean ± std. dev. of 17 runs, 20 loops each)


In [115]:
%%timeit -n 20 -r 17
symbols, period_yr, drawdown, UI, max_drawdown, returns_std, Std_UI, CAGR, CAGR_Std, CAGR_UI = \
    symb_perf_stats_vectorized_v2(df_c)

26.6 ms ± 1.82 ms per loop (mean ± std. dev. of 17 runs, 20 loops each)


In [None]:
print(f'symbols:\n{symbols}')
print(f'period_yr:\n{period_yr}')
print(f'drawdown:\n{drawdown}')
print(f'UI:\n{UI}')
print(f'max_drawdown:\n{max_drawdown}')
print(f'returns_std:\n{returns_std}')
print(f'Std_UI:\n{Std_UI}')
print(f'CAGR:\n{CAGR}')
print(f'CAGR_Std:\n{CAGR_Std}')
print(f'CAGR_UI:\n{CAGR_UI}')

In [None]:
def perf_eval_0(df_close):
  '''
  df_close is a dataframe with date index, columns of symbols' closing price, and symbol as column name 
  '''
  # %%timeit
  symbols, period_yr, drawdown, UI, max_drawdown, returns_std, Std_UI, CAGR, CAGR_Std, CAGR_UI = \
      symb_perf_stats_vectorized(df_close)
  caches_perf_stats_vect = []
  for symbol in symbols:
      date_first = drawdown.index[0].strftime('%Y-%m-%d')
      date_last = drawdown.index[-1].strftime('%Y-%m-%d')
      cache = (symbol, date_first, date_last, period_yr, CAGR[symbol],
              UI[symbol], Std_UI[symbol], CAGR_Std[symbol], CAGR_UI[symbol])
      # append performance data (tuple) to caches_perf_stats (list)
      caches_perf_stats_vect.append(cache)
  column_names = ['symbol', 'first date', 'last date', 'Year', 'CAGR',
                  'UI', 'Std/UI', 'CAGR/Std', 'CAGR/UI']
  # write symbols' performance stats to dataframe
  df_perf = pd.DataFrame(caches_perf_stats_vect, columns=column_names)

  _cols = ['CAGR', 'UI', 'Std/UI', 'CAGR/Std', 'CAGR/UI']
  # print(f'df_perf mean:\n{df_perf[_cols].mean()}\n')
  # print(f'df_perf std:\n{df_perf[_cols].std()}')
  grp_CAGRUI_mean = df_perf['CAGR/UI'].mean()
  grp_CAGRUI_std = df_perf['CAGR/UI'].std()
  grp_CAGRUI_mean_std = grp_CAGRUI_mean / grp_CAGRUI_std
  grp_CAGRUI_mean_std

  grp_CAGRStd_mean = df_perf['CAGR/Std'].mean()
  grp_CAGRStd_std = df_perf['CAGR/Std'].std()
  grp_CAGRStd_mean_std = grp_CAGRStd_mean / grp_CAGRStd_std
  grp_CAGRStd_mean_std

  grp_StdUI_mean = df_perf['Std/UI'].mean()
  grp_StdUI_std = df_perf['Std/UI'].std()
  grp_StdUI_mean_std = grp_StdUI_mean / grp_StdUI_std
  grp_StdUI_mean_std

  return df_perf, grp_StdUI_mean_std, grp_CAGRStd_mean_std, grp_CAGRUI_mean_std

In [None]:
df_perf, grp_StdUI_mean_std, grp_CAGRStd_mean_std, grp_CAGRUI_mean_std = perf_eval_0(df_c)
print(f'grp_StdUI_mean_std :{grp_StdUI_mean_std:.6f}')
print(f'grp_CAGRStd_mean_std :{grp_CAGRStd_mean_std:.6f}')
print(f'grp_CAGRUI_mean_std :{grp_CAGRUI_mean_std:.6f}')
df_perf.head()

In [None]:
def perf_eval_1(df_close):
  '''
  df_close is a dataframe with date index, columns of symbols' closing price, and symbol as column name 
  '''
  # %%timeit
  symbols, period_yr, drawdown, UI, max_drawdown, returns_std, Std_UI, CAGR, CAGR_Std, CAGR_UI = \
      symb_perf_stats_vectorized_v1(df_close)
  caches_perf_stats_vect = []
  for symbol in symbols:
      # date_first = drawdown.index[0].strftime('%Y-%m-%d')
      # date_last = drawdown.index[-1].strftime('%Y-%m-%d')
      date_first = df_close.index[0].strftime('%Y-%m-%d')
      date_last = df_close.index[-1].strftime('%Y-%m-%d')

      cache = (symbol, date_first, date_last, period_yr, CAGR[symbol],
              UI[symbol], Std_UI[symbol], CAGR_Std[symbol], CAGR_UI[symbol])
      # append performance data (tuple) to caches_perf_stats (list)
      caches_perf_stats_vect.append(cache)
  column_names = ['symbol', 'first date', 'last date', 'Year', 'CAGR',
                  'UI', 'Std/UI', 'CAGR/Std', 'CAGR/UI']
  # write symbols' performance stats to dataframe
  df_perf = pd.DataFrame(caches_perf_stats_vect, columns=column_names)

  _cols = ['CAGR', 'UI', 'Std/UI', 'CAGR/Std', 'CAGR/UI']
  # print(f'df_perf mean:\n{df_perf[_cols].mean()}\n')
  # print(f'df_perf std:\n{df_perf[_cols].std()}')
  grp_CAGRUI_mean = df_perf['CAGR/UI'].mean()
  grp_CAGRUI_std = df_perf['CAGR/UI'].std()
  grp_CAGRUI_mean_std = grp_CAGRUI_mean / grp_CAGRUI_std
  grp_CAGRUI_mean_std

  grp_CAGRStd_mean = df_perf['CAGR/Std'].mean()
  grp_CAGRStd_std = df_perf['CAGR/Std'].std()
  grp_CAGRStd_mean_std = grp_CAGRStd_mean / grp_CAGRStd_std
  grp_CAGRStd_mean_std

  grp_StdUI_mean = df_perf['Std/UI'].mean()
  grp_StdUI_std = df_perf['Std/UI'].std()
  grp_StdUI_mean_std = grp_StdUI_mean / grp_StdUI_std
  grp_StdUI_mean_std

  return df_perf, grp_StdUI_mean_std, grp_CAGRStd_mean_std, grp_CAGRUI_mean_std

In [None]:
df_perf, grp_StdUI_mean_std, grp_CAGRStd_mean_std, grp_CAGRUI_mean_std = perf_eval_1(df_c)
print(f'grp_StdUI_mean_std :{grp_StdUI_mean_std:.6f}')
print(f'grp_CAGRStd_mean_std :{grp_CAGRStd_mean_std:.6f}')
print(f'grp_CAGRUI_mean_std :{grp_CAGRUI_mean_std:.6f}')
df_perf.head()

In [None]:
_cols2 = ['AE', 'ABMD', 'RMBS', 'WNC']

In [None]:
%%timeit -n 30 -r 20
df_temp = df_c[_cols2].copy()
df_perf, grp_StdUI_mean_std, grp_CAGRStd_mean_std, grp_CAGRUI_mean_std = perf_eval_1(df_temp)

In [None]:
print(f'grp_StdUI_mean_std:   {grp_StdUI_mean_std:.6f}')
print(f'grp_CAGRStd_mean_std: {grp_CAGRStd_mean_std:.6f}')
print(f'grp_CAGRUI_mean_std:  {grp_CAGRUI_mean_std:.6f}')
df_perf.head()

In [None]:
%%timeit -n 30 -r 20
_df = df_perf.loc[df_perf['symbol'].isin(_cols2)]
_means = _df[['Std/UI',	'CAGR/Std', 'CAGR/UI']].mean()
_stds = _df[['Std/UI',	'CAGR/Std', 'CAGR/UI']].std()
_mean_std = _means / _stds

In [None]:
# print(_mean_std, _mean_std[0], _mean_std[1], _mean_std[2])
print(f'grp_StdUI_mean_std:   {_mean_std[0]:.6f}')
print(f'grp_CAGRStd_mean_std: {_mean_std[1]:.6f}')
print(f'grp_CAGRUI_mean_std:  {_mean_std[2]:.6f}')

In [None]:
df_SPY = df_c[['SPY']].copy()
df_SPY
df_perf, grp_StdUI_mean_std, grp_CAGRStd_mean_std, grp_CAGRUI_mean_std = perf_eval(df_SPY)
print(f'grp_StdUI_mean_std: {grp_StdUI_mean_std:.6f}')
print(f'grp_CAGRStd_mean_std: {grp_CAGRStd_mean_std:.6f}')
print(f'grp_CAGRUI_mean_std: {grp_CAGRUI_mean_std:.6f}')
df_perf.head()