### Initialize Packages & Data

In [2]:
# load libraries
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from statsmodels.api import OLS, add_constant
import numpy as np
import pandas as pd

In [3]:
# load firm data
df = pd.read_csv('value_data_ml.csv')
df['Date'] = pd.to_datetime(df['Date'],format='%Y-%m-%d')
df['year'] = pd.to_datetime(df['Date'],format='%Y-%m-%d').dt.to_period('Y')
df['month'] = pd.to_datetime(df['month'],format='%Y-%m-%d').dt.to_period('M')
df = df.rename(columns={'Adj Price':'Price','Return on Equity':'ROE','Return on Assets':'ROA',
                        'Return on Capital Employed':'ROCE','PSales Ratio':'P/S','P/B Ratio':'P/B',
                        'PE Ratio':'P/E','DE Ratio':'D/E','Earnings Yield':'EY'})
df = df.dropna().reset_index(drop=True)

# Load Fama-French factors & shift
ff3 = pd.read_csv('ff3_clean.csv')
ff3cols = ['Mkt-RF','SMB','HML','RF']
ff3['month'] = pd.to_datetime(ff3['date_month']).dt.to_period('M')
ff3 = ff3[['month','Mkt-RF','SMB','HML','RF']]

# Load S&P 500 data
sp = pd.read_csv('sp500_data.csv')
sp['month'] = pd.to_datetime(sp['caldt'],format='%Y%m%d').dt.to_period('M')
sp = sp[['month','sprtrn']].rename(columns={'sprtrn':'sp_return'})

In [4]:
# define common column lists
ind_cols = ['Price','ROE','ROA','P/S','WC KPI','Current Ratio','D/E','P/E','P/B','EY','ROCE','MKTCAP']
dep_cols = ['RET_hold', 'RET_hold_rank', 'RET_hold_top', 'beat_market']
good_metrics = ['ROE','ROA','WC KPI','Current Ratio','D/E','ROCE']
cheap_metrics = ['P/E','P/B','P/S','EY']

### Define Back Test Object & Functions

In [5]:
# define backtest class
class strategy:
    # define portfolio build loop function as init
    def __init__(self, df, start, end, cols, method='simple', value=True, dep='RET_hold'):
        # run build for first time
        self.const = reconstitute(df, start, end, cols, method=method, dep=dep)
        self.data, self.start, self.end, self.method = df, start, end, method
        self.cols, self.dep = cols, dep
        self.rebalance(value=value)
        self.performace()
        self.test()
        print('Portfolios Built\n')
            
    # define build ports function for later iterations
    def reconstitute(self, df, start, end, cols, method='simple', dep='RET_hold'):
        # run for later builds
        self.const = reconstitute(df, start, end, cols=cols, method=method, dep=dep)
        self.data, self.start, self.end, self.method = df, start, end, method
        self.cols, self.dep = cols, dep
        print('Portfolios Balanced')
        
    # define weight function
    def rebalance(self, value=True):
        self.ports, self.const = rebalance(self.const, value=value)
        self.value = value
        print('Returns Weighted')
    
    # define performance evaluation & comparision function
    def performace(self):
        self.main = performace(self.ports)
        print('Portfolios Compiled')
    
    # define CAPM & Fama-French tests
    def test(self):
        self.alphas = test(self.ports)
        print('Test Complete')

In [8]:
# define function to recontsitute portfolio each month with top 5 firms by ranking
def constitute(df, month, cols, method='simple', dep='RET_hold'):
    # Getting month data on quarter cycle & define columns
    X_test = df[df['month'] == month]

    # Rank through target features and define rank score
    if method == 'simple':
        rank = X_test[cols[0]].rank(ascending=False)
        for c in range(1,len(cols[:])): rank += (X_test[cols[c]].rank(ascending=False))
        X_test['score'] = rank
        
    # Rank via ML score
    elif method in ['ML','RF']:
        # isolate train data & scale
        o = df[df['month'].between(month-134, month-12)] # 10 year train window
        X_train, y_train = o[cols], o[dep]
        scl = StandardScaler()
        X_train = scl.fit_transform(X_train)
        X_test_scl = scl.transform(X_test[cols])
        
        # run model
        if method == 'ML': mdl = LinearRegression()
        else: mdl = RandomForestRegressor()
        mdl.fit(X_train, y_train)
        X_test['score'] = pd.Series(mdl.predict(X_test_scl)).rank(ascending=False)

    # Get the top 5 based on best score
    top5_permnos = X_test.sort_values(by='score')[:5]['PERMNO'].tolist()
    mask = (df['PERMNO'].isin(top5_permnos)) & (df['month'].between(month + 1,month + 12))
    top5_data = df[mask]
    return top5_data

# define function to complie portfolio constituients over whole period range
def reconstitute(df, start_date, end_date, cols, method='simple', dep='RET_hold'):
    top5_stocks = pd.DataFrame()
    while start_date <= end_date:
        top5 = constitute(df, start_date, cols=cols, method=method, dep=dep)
        top5_stocks = top5_stocks.append(top5)
        start_date += 3
    return top5_stocks.reset_index(drop=True) 

# define function to weight and compile constituents
def rebalance(constituent_data, value=True):
    mrk_mon = constituent_data.groupby(['month'])['MKTCAP'].sum() # get total market cap for month
    constituent_data = constituent_data.merge(mrk_mon, on='month')
    constituent_data['weight'] = constituent_data['MKTCAP_x']/constituent_data['MKTCAP_y'] # get value-weight
    constituent_data['w_ret'] = constituent_data['weight']*constituent_data['RET']
    constituent_data = constituent_data.rename(columns={'MKTCAP_x':'MKTCAP','MKTCAP_y':'MKTCAP_total'})
    if value: # value-weight returns
        portfolio_data = pd.DataFrame(constituent_data.groupby('month')['w_ret'].sum())
        portfolio_data = portfolio_data.reset_index().rename(columns={'w_ret':'return'})
    else: # equal-weight returns
        portfolio_data = pd.DataFrame(constituent_data.groupby('month')['RET'].mean())
        portfolio_data = portfolio_data.reset_index().rename(columns={'RET':'return'})
    
    # add premium and Fama-French features
    portfolio_data = portfolio_data.merge(ff3, on='month')
    portfolio_data['premium'] = portfolio_data['return'] - portfolio_data['RF']
    portfolio_data = portfolio_data.merge(sp, on='month')
    portfolio_data['sp_premium'] = portfolio_data['sp_return'] - portfolio_data['RF']
    return portfolio_data, constituent_data

# define function to get performance
def performace(portfolio_data):
    # get portfolio performance
    performace_data = pd.DataFrame((1+portfolio_data[['return','premium']].mean())**12-1).T
    performace_data['risk'] = portfolio_data['return'].std()*(12**0.5)
    performace_data['Sharpe'] = performace_data['premium']/performace_data['risk']
    
    # get S&P500 performance
    sp_performace_data = pd.DataFrame((1+portfolio_data[['sp_return','sp_premium']].mean())**12-1).T
    sp_performace_data.columns = ['return','premium']
    sp_performace_data['risk'] = portfolio_data['sp_return'].std()*(12**0.5)
    sp_performace_data['Sharpe'] = sp_performace_data['premium']/sp_performace_data['risk']
    
    # combine performance
    performace_data = performace_data.append(sp_performace_data)
    performace_data.index = ['Strategy','S&P500']
    return performace_data

# define function for FF3F alpha test
def test(portfolio_data):
    # run FF3F regression
    X, y = portfolio_data[['Mkt-RF','SMB','HML']], portfolio_data['premium']
    X = add_constant(X)
    ols = OLS(y.values, X).fit()
    
    # organize results & get r2
    out = pd.DataFrame(ols.summary2().tables[1][0:1][['Coef.','t']])
    out = out.rename(columns={'Coef.':'alpha','t':'t-stat'}).reset_index(drop=True)
    out['R2'] = pd.Series(ols.summary2().tables[0][1][6])[0]
    out['Mkt-RF Beta'] = pd.Series(ols.summary2().tables[1][1:2]['Coef.'])[0]
    out['Mkt-RF t'] = (out['Mkt-RF Beta'] - 1)/pd.Series(ols.summary2().tables[1][1:2]['Std.Err.'])[0]
    out['SMB Beta'] = pd.Series(ols.summary2().tables[1][2:3]['Coef.'])[0]
    out['SMB t'] = (out['SMB Beta'] - 1)/pd.Series(ols.summary2().tables[1][2:3]['Std.Err.'])[0]
    out['HML Beta'] = pd.Series(ols.summary2().tables[1][3:4]['Coef.'])[0]
    out['HML t'] = (out['HML Beta'] - 1)/pd.Series(ols.summary2().tables[1][3:4]['Std.Err.'])[0]
    
    # run CAPM regression
    X, y = portfolio_data[['Mkt-RF']], portfolio_data['premium']
    X = add_constant(X)
    ols = OLS(y.values, X).fit()
    
    # organize results & get r2
    out2 = pd.DataFrame(ols.summary2().tables[1][0:1][['Coef.','t']])
    out2 = out2.rename(columns={'Coef.':'alpha','t':'t-stat'}).reset_index(drop=True)
    out2['R2'] = pd.Series(ols.summary2().tables[0][1][6])[0]
    out2['Mkt-RF Beta'] = pd.Series(ols.summary2().tables[1][1:2]['Coef.'])[0]
    out2['Mkt-RF t'] = (out2['Mkt-RF Beta'] - 1)/pd.Series(ols.summary2().tables[1][1:2]['Std.Err.'])[0]
    for c in ['SMB Beta','SMB t','HML Beta','HML t']: out2[c] = '-'
    
    # combine tests & annualize alpha
    out = out.append(out2)
    out.index = ['Fama-French','CAPM']
    out['alpha'] = (1 + out.alpha)**12 - 1
    return out

### Back Test Strategies

#### Whole Period w/ Agg Rank & ML Strategies

In [165]:
# try simple & ML, full period strategy test
start_date, end_date = df['month'].min() + 12, df['month'].max()
cols, y_col, method = ['ROCE','EY'], 'RET_hold', 'ML'
whole = strategy(df, start_date, end_date, cols)
whole_ml = strategy(df, start_date, end_date, cols, method=method, dep=y_col)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built



In [152]:
whole.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.211831,0.179773,0.230793,0.778937
S&P500,0.09166,0.062532,0.14632,0.427362


In [153]:
whole.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.096989,2.818072,0.378,0.871629,-2.001754,0.126546,-9.61693,-0.24557,-13.6427
CAPM,0.093044,2.67752,0.36,0.913157,-1.382754,-,-,-,-


In [154]:
whole_ml.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.29505,0.260977,0.290965,0.896935
S&P500,0.09166,0.062532,0.14632,0.427362


In [155]:
whole_ml.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.15295,3.492838,0.394,0.973603,-0.331136,0.75275,-2.19001,0.257432,-6.54308
CAPM,0.152097,3.291613,0.321,1.085337,1.047328,-,-,-,-


#### Joel vs the Future w/ Agg Rank & ML Strategies on ROCE, EY pair

In [45]:
# intialize constants & structures for period tests
start_date, end_date = df['month'].min() + 12, df['month'].max()
mid_date = df['month'].unique()[199] # '2005-01'
cols, y_col, method = ['ROCE','EY'], 'RET_hold', 'ML'

# run past, future Joel period strategy test on Agg method
joel = strategy(df, start_date, mid_date, cols, value=value)
new = strategy(df, mid_date, end_date, cols, value=value)

# run past, future Joel period strategy test on ML method
joel_ml = strategy(df, start_date, mid_date, cols, method=method, dep=y_col)
new_ml = strategy(df, mid_date, end_date, cols, method=method, dep=y_col)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built

Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built



In [156]:
joel.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.265118,0.214308,0.262641,0.815973
S&P500,0.097045,0.052468,0.141283,0.371367


In [157]:
joel.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.122501,2.15549,0.357,1.022003,0.190305,0.252445,-5.81462,0.023303,-5.85957
CAPM,0.124816,2.242922,0.343,1.054173,0.523976,-,-,-,-


In [158]:
joel_ml.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.423393,0.366774,0.320638,1.143887
S&P500,0.097045,0.052468,0.141283,0.371367


In [159]:
joel_ml.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.237485,3.205802,0.33,1.070806,0.491951,0.758524,-1.50881,0.260963,-3.56168
CAPM,0.262662,3.435133,0.252,1.101718,0.756252,-,-,-,-


In [160]:
new.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.142508,0.128937,0.174686,0.738109
S&P500,0.086145,0.073189,0.14862,0.492459


In [161]:
new.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.03127,0.915025,0.462,0.800427,-2.926405,0.00534191,-7.87548,-0.340817,-13.1237
CAPM,0.05122,1.472322,0.429,0.741148,-4.12914,-,-,-,-


In [162]:
new_ml.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.208413,0.194126,0.250425,0.775187
S&P500,0.086145,0.073189,0.14862,0.492459


In [163]:
new_ml.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.106597,2.181345,0.499,0.856374,-1.521723,0.732896,-1.52811,0.336929,-4.68939
CAPM,0.078247,1.552037,0.43,1.064314,0.716298,-,-,-,-


#### Whole, Joel, & Future w/ Full Features and Agg Rank & ML Strategies

In [62]:
# intialize constants & structures for period tests
start_date, end_date, value = df['month'].min() + 12, df['month'].max(), True
mid_date = df['month'].unique()[199] # '2005-01'
cols, y_col, method = ind_cols, 'RET_hold', 'ML'

# run past, future Joel period strategy test
joel_full = strategy(df, start_date, mid_date, cols)
new_full = strategy(df, mid_date, end_date, cols)
whole_full = strategy(df, start_date, end_date, cols)

# run past, future Joel period strategy test
joel_full_ml = strategy(df, start_date, mid_date, cols, method=method, dep=y_col)
new_full_ml = strategy(df, mid_date, end_date, cols, method=method, dep=y_col)
whole_full_ml = strategy(df, start_date, end_date, cols, method=method, dep=y_col)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built

Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built

Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [116]:
whole_full.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.123953,0.094035,0.222963,0.421751
S&P500,0.09166,0.062532,0.14632,0.427362


In [117]:
whole_full.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.012265,0.434489,0.518,0.989786,-0.187099,-0.272217,-16.4549,-0.547269,-19.9084
CAPM,0.00698,0.232349,0.448,0.985142,-0.26347,-,-,-,-


In [118]:
whole_full_ml.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.284663,0.250841,0.303751,0.825811
S&P500,0.09166,0.062532,0.14632,0.427362


In [119]:
whole_full_ml.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.134032,3.052825,0.433,1.082878,1.028928,0.766026,-2.05101,0.100671,-7.84251
CAPM,0.130985,2.83043,0.365,1.209283,2.542456,-,-,-,-


In [120]:
joel_full.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.093041,0.048614,0.24703,0.196793
S&P500,0.097045,0.052468,0.141283,0.371367


In [121]:
joel_full.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.023783,0.518118,0.481,0.885782,-1.171937,-0.415967,-13.0658,-0.722812,-12.2616
CAPM,-0.030605,-0.657427,0.401,1.068447,0.738862,-,-,-,-


In [122]:
joel_full_ml.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.395429,0.339832,0.339276,1.001639
S&P500,0.097045,0.052468,0.141283,0.371367


In [123]:
joel_full_ml.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.206139,2.859295,0.42,1.268495,1.89362,0.762031,-1.50935,0.0803973,-4.4988
CAPM,0.214514,2.880844,0.344,1.362142,2.714108,-,-,-,-


In [124]:
new_full.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.158763,0.145015,0.188155,0.770723
S&P500,0.086145,0.073189,0.14862,0.492459


In [125]:
new_full.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.025139,0.759473,0.565,0.960155,-0.601541,-0.0152838,-8.27652,-0.467544,-14.7889
CAPM,0.052677,1.514852,0.512,0.87426,-2.007907,-,-,-,-


In [126]:
new_full_ml.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.217531,0.203145,0.24769,0.82016
S&P500,0.086145,0.073189,0.14862,0.492459


In [127]:
new_full_ml.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.116871,2.403195,0.496,0.837197,-1.740767,0.739813,-1.50222,0.334265,-4.75154
CAPM,0.088371,1.758287,0.426,1.046064,0.516877,-,-,-,-


#### Whole, Joel, & Future w/ RF Strategy

In [9]:
start_date, end_date = df['month'].min() + 12, df['month'].max()
mid_date = df['month'].unique()[199] # '2005-01'
cols, y_col, method = ['ROCE','EY'], 'RET_hold', 'RF'

# run past, future Joel period strategy test
joel_rf = strategy(df, start_date, mid_date, cols, method=method, dep=y_col)
new_rf = strategy(df, mid_date, end_date, cols, method=method, dep=y_col)
#whole_rf = strategy(df, start_date, end_date, cols, method=method, dep=y_col)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built



In [19]:
whole_rf_ports = joel_rf.ports.append(new_rf.ports)
whole_rf_main = performace(whole_rf_ports)
whole_rf_main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.27101,0.237291,0.302916,0.783354
S&P500,0.091764,0.062436,0.144695,0.4315


In [20]:
whole_rf_alphas = test(whole_rf_ports)
whole_rf_alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.117391,2.740026,0.434,1.127153,1.587793,0.661135,-3.00689,0.00449577,-8.7366
CAPM,0.114666,2.571891,0.382,1.246525,3.056723,-,-,-,-


In [10]:
joel_rf.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.322234,0.269321,0.342394,0.786582
S&P500,0.097045,0.052468,0.141283,0.371367


In [11]:
joel_rf.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.150662,2.140835,0.432,1.309508,2.187045,0.606573,-2.50013,-0.0922066,-5.35341
CAPM,0.143772,2.020294,0.376,1.437837,3.337637,-,-,-,-


In [12]:
new_rf.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.21826,0.203866,0.254362,0.801481
S&P500,0.086145,0.073189,0.14862,0.492459


In [13]:
new_rf.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.115025,2.282518,0.486,0.859432,-1.449338,0.737965,-1.45886,0.330678,-4.60651
CAPM,0.086806,1.674685,0.42,1.06729,0.731624,-,-,-,-


####  Whole, Joel, & Future w/ PC1 cheap & good features  and Agg Rank & ML Strategies

In [99]:
# load PC data
pca_all = pd.read_csv('pca_all', delimiter = ",")
pca_good = pd.read_csv('pca_good', delimiter = ",")
pca_cheap = pd.read_csv('pca_cheap', delimiter = ",")

# combine good & cheap to full dataset for easy use
df_pca = df.merge(pca_good, right_index=True, left_index=True)
df_pca = df_pca.merge(pca_cheap, right_index=True, left_index=True)
df_pca = df_pca.merge(pca_all, right_index=True, left_index=True)

In [95]:
# intialize constants & structures for period tests
start_date, end_date = df['month'].min() + 12, df['month'].max()
mid_date = df['month'].unique()[199] # '2005-01'
cols, y_col, method = ['PC1_g','PC1_c'], 'RET_hold', 'ML'

# run past, future Joel period on Agg strategy & PCA data
joel_pca = strategy(df_pca, start_dat, mid_date, cols)
new_pca = strategy(df_pca, mid_date, end_date, cols)
whole_pca = strategy(df_pca, start_date, end_date, cols)

# run past, future Joel period on ML strategy & PCA data
joel_pca_ml = strategy(df_pca, start_date, mid_date, cols, method=method, dep=y_col)
new_pca_ml = strategy(df_pca, mid_date, end_date, cols, method=method, dep=y_col)
whole_pca_ml = strategy(df_pca, start_date, end_date, cols, method=method, dep=y_col)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built

Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built

Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [128]:
whole_pca.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.142227,0.111863,0.463365,0.241415
S&P500,0.09166,0.062532,0.14632,0.427362


In [129]:
whole_pca.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,-0.012479,-0.171121,0.239,1.432926,3.035187,0.0215723,-4.84343,-0.409915,-6.94311
CAPM,-0.017492,-0.239825,0.23,1.468987,3.387031,-,-,-,-


In [130]:
whole_pca_ml.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.266769,0.233379,0.288782,0.808152
S&P500,0.09166,0.062532,0.14632,0.427362


In [131]:
whole_pca_ml.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.12704,2.982208,0.406,1.00111,0.014157,0.68313,-2.85447,0.0359737,-8.63914
CAPM,0.123611,2.775361,0.345,1.118072,1.485826,-,-,-,-


In [132]:
joel_pca.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.08361,0.039535,0.321769,0.122868
S&P500,0.097045,0.052468,0.141283,0.371367


In [133]:
joel_pca.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.002775,0.044557,0.43,1.046101,0.345332,0.0116049,-6.65832,-0.831095,-9.5142
CAPM,-0.05773,-0.934063,0.366,1.334131,2.681901,-,-,-,-


In [134]:
joel_pca_ml.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.355422,0.301291,0.317156,0.949977
S&P500,0.097045,0.052468,0.141283,0.371367


In [135]:
joel_pca_ml.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.210866,2.988297,0.366,1.035746,0.258051,0.554758,-2.8906,-0.203372,-6.02586
CAPM,0.193756,2.722795,0.302,1.194104,1.509852,-,-,-,-


In [136]:
new_pca.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.114765,0.101497,0.501075,0.202557
S&P500,0.086145,0.073189,0.14862,0.492459


In [137]:
new_pca.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,-0.060341,-0.528931,0.207,1.535736,2.255245,-0.0968316,-2.49319,-0.285503,-3.6122
CAPM,-0.044076,-0.390528,0.204,1.465583,2.191646,-,-,-,-


In [138]:
new_pca_ml.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.205554,0.191298,0.244958,0.780942
S&P500,0.086145,0.073189,0.14862,0.492459


In [139]:
new_pca_ml.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.10499,2.235403,0.515,0.842099,-1.739472,0.755326,-1.45544,0.331365,-4.91674
CAPM,0.076783,1.573151,0.441,1.053591,0.616141,-,-,-,-


####  Whole, Joel, & Future w/ All PC all features and Agg Rank & ML Strategies

In [107]:
# intialize constants & structures for period tests
start_date, end_date = df['month'].min() + 12, df['month'].max()
mid_date = df['month'].unique()[199] # '2005-01'
cols, y_col, method = pca_all.columns, 'RET_hold', 'ML'

# run past, future Joel period strategy test
joel_pca_full = strategy(df_pca, start_date, mid_date, cols)
new_pca_full = strategy(df_pca, mid_date, end_date, cols)
whole_pca_full = strategy(df_pca, start_date, end_date, cols)

# run past, future Joel period strategy test
joel_pca_full_ml = strategy(df_pca, start_date, mid_date, cols, method=method, dep=y_col)
new_pca_full_ml = strategy(df_pca, mid_date, end_date, cols, method=method, dep=y_col)
whole_pca_full_ml = strategy(df_pca, start_date, end_date, cols, method=method, dep=y_col)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built

Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built

Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

Returns Weighted
Portfolios Compiled
Test Complete
Portfolios Built



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [140]:
whole_pca_full.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.197968,0.166247,0.371721,0.447236
S&P500,0.09166,0.062532,0.14632,0.427362


In [141]:
whole_pca_full.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.052809,1.072434,0.49,1.166072,1.775768,0.971617,-0.214287,-0.86468,-14.005
CAPM,0.036688,0.655717,0.328,1.404543,3.903576,-,-,-,-


In [142]:
whole_pca_full_ml.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.28289,0.249111,0.303028,0.822073
S&P500,0.09166,0.062532,0.14632,0.427362


In [143]:
whole_pca_full_ml.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.133671,3.035453,0.427,1.070717,0.875183,0.760838,-2.08988,0.0987448,-7.83452
CAPM,0.130629,2.817209,0.359,1.196364,2.380497,-,-,-,-


In [144]:
joel_pca_full.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.258024,0.207475,0.443848,0.467446
S&P500,0.097045,0.052468,0.141283,0.371367


In [145]:
joel_pca_full.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.141529,1.729843,0.541,1.270656,1.638936,1.05856,0.318927,-1.02012,-8.48518
CAPM,0.059878,0.656334,0.347,1.790474,4.53634,-,-,-,-


In [146]:
joel_pca_full_ml.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.394727,0.339156,0.336237,1.008683
S&P500,0.097045,0.052468,0.141283,0.371367


In [147]:
joel_pca_full_ml.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.20806,2.898403,0.415,1.248247,1.759703,0.74662,-1.61525,0.0714388,-4.56569
CAPM,0.215622,2.913473,0.34,1.342524,2.583944,-,-,-,-


In [148]:
new_pca_full.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.18715,0.173094,0.294683,0.58739
S&P500,0.086145,0.073189,0.14862,0.492459


In [149]:
new_pca_full.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.018285,0.310812,0.43,1.145895,1.235452,0.620147,-1.73689,-0.643408,-9.28934
CAPM,0.049623,0.806838,0.369,1.157406,1.419274,-,-,-,-


In [150]:
new_pca_full_ml.main

Unnamed: 0,return,premium,risk,Sharpe
Strategy,0.217756,0.203367,0.244322,0.832375
S&P500,0.086145,0.073189,0.14862,0.492459


In [151]:
new_pca_full_ml.alphas

Unnamed: 0,alpha,t-stat,R2,Mkt-RF Beta,Mkt-RF t,SMB Beta,SMB t,HML Beta,HML t
Fama-French,0.117018,2.437532,0.495,0.832309,-1.816483,0.721343,-1.6299,0.316178,-4.94446
CAPM,0.089813,1.813587,0.427,1.034236,0.390116,-,-,-,-
