In [13]:
import pandas as pd
import numpy as np
import os

In [14]:
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_columns', 2000)
pd.set_option('display.width', 2000)
pd.options.mode.chained_assignment = None  # default='warn'

In [15]:
data_path = '/home/mvkrein/etf_model/data'
etf_data_file = os.path.join(data_path,'etf_new_var_20180910.csv')
etf_data = pd.read_csv(etf_data_file,index_col=0)
dates = list(etf_data['Date'].unique())
etf_data.sort_values(['Date','sym'],ascending=True,inplace=True)
etf_data.reset_index(drop=True,inplace=True)

In [16]:
etf_study_file = os.path.join(data_path,'ETF_list_min_6yr_history.csv')
etf_list = pd.read_csv(etf_study_file,index_col=0)

In [17]:
def monthly_sharpe(returns, N=12):

# Calculate the annualised Sharpe ratio of a returns stream 
# based on a number of trading periods, N. N defaults to 252,
# which then assumes a stream of daily returns.

# The function assumes that the returns are the excess of 
# those compared to a benchmark.

    return np.sqrt(N) * returns.mean() / returns.std()

In [18]:
def cagr(cum_rtns):
    yrs = len(cum_rtns)/12
    mkt_cagr = cum_rtns[(len(cum_rtns)-1),0]**(1/yrs) - 1
    model_cagr = cum_rtns[(len(cum_rtns)-1),1]**(1/yrs) - 1
    return mkt_cagr, model_cagr

In [19]:
i

20

In [23]:
output = []
for i in range(0,21):
    etf_predict_file = os.path.join(data_path,('etf_pred_linear_lasso_20181002_'+str(i)+'.csv'))
    etf_predict = pd.read_csv(etf_predict_file,index_col=0)
    etf_predict['rank_predict'] = (etf_predict.groupby('Date')['predict'].rank(method='dense', ascending=False))
    output_df = (etf_data.loc[:,['Date','sym','delta_p_L-21','target']]).merge\
    (etf_predict.loc[:,['Date','sym','predict','rank_predict']],left_on=['Date','sym'],right_on=['Date','sym'],how='inner')
    assess_df = output_df.loc[((output_df['rank_predict']<=5.0) & (output_df['Date']<'2018-07-26')),:]
    return_assess = pd.DataFrame(assess_df.groupby('Date')['delta_p_L-21'].mean())
    return_assess.columns = ['top_5_avg_rtn']
    return_assess.reset_index(drop=False,inplace=True)
    return_assess_vs_mkt = (etf_data.loc[etf_data['sym']=='IVV',['Date','sym','delta_p_L-21']]).merge\
    (return_assess,left_on=['Date'],right_on=['Date'],how='inner')
    mkt_sharpe = monthly_sharpe(return_assess_vs_mkt['delta_p_L-21'])
    model_sharpe = monthly_sharpe(return_assess_vs_mkt['top_5_avg_rtn'])
    cum_rtn = np.column_stack((np.cumprod((1+return_assess_vs_mkt['delta_p_L-21'])),\
                               np.cumprod((1+return_assess_vs_mkt['top_5_avg_rtn']))))
    mkt_cagr, model_cagr = cagr(cum_rtn)
    port_output = [i,mkt_sharpe,model_sharpe,mkt_cagr,model_cagr,len(cum_rtn)]
    output.append(port_output)

In [24]:
df_ports = pd.DataFrame(np.array(output).reshape(21,6),columns=['Port_#','mkt_sharpe','model_sharpe','mkt_cagr','model_cagr','months'])
df_ports

Unnamed: 0,Port_#,mkt_sharpe,model_sharpe,mkt_cagr,model_cagr,months
0,0.0,1.150681,0.781565,0.120036,0.127102,42.0
1,1.0,1.166491,0.630883,0.12096,0.094803,42.0
2,2.0,1.062045,0.360059,0.122051,0.046351,42.0
3,3.0,1.136558,0.326643,0.116329,0.039992,42.0
4,4.0,1.101932,0.312809,0.1151,0.038568,42.0
5,5.0,1.148064,0.505394,0.114183,0.068821,42.0
6,6.0,1.29034,0.627998,0.110207,0.085559,42.0
7,7.0,1.251754,0.348659,0.112367,0.043112,42.0
8,8.0,1.17645,0.317763,0.11347,0.037887,42.0
9,9.0,1.141078,0.161411,0.114501,0.012991,42.0


In [25]:
df_ports.loc[:,['mkt_sharpe','model_sharpe','mkt_cagr','model_cagr']].mean()

mkt_sharpe      1.110651
model_sharpe    0.464454
mkt_cagr        0.114066
model_cagr      0.062176
dtype: float64