In [47]:
import pandas as pd
import numpy as np
import dynamic_portfolio.preprocess as prep
import dynamic_portfolio.utils as utils
import dynamic_portfolio.cross_validate as cv
from sklearn.pipeline import make_pipeline
from sklearn.decomposition import PCA
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.metrics import mean_squared_error
import warnings

%load_ext autoreload
%autoreload 2

pd.set_option('display.max_columns', None)

warnings.filterwarnings(action='ignore')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
pipe_gdb = make_pipeline(PCA(), GradientBoostingRegressor())

params = {'pca__n_components':0.9,
                'gradientboostingregressor__max_depth':2,
                'gradientboostingregressor__criterion':'friedman_mse',
                'gradientboostingregressor__n_estimators':100,
                'gradientboostingregressor__learning_rate':0.1}
pipe_gdb.set_params(**params)

In [None]:
tickers = utils.return_tickers()
scores = []
for ticker in tickers:
    
    model = pipe_gdb # instantiating model/pipe
    
    train_df = prep.ready_to_train_df(ticker) #loading train df
    test_df = prep.ready_to_test(ticker) # loading test df
    
    model.fit(train_df, train_df['return'])
    error = (mean_squared_error(test_df['return'], model.predict(test_df)))**0.5
    scores.append(error)
    print(f"error for {ticker}: {error}, index # {tickers.index(ticker)}")

In [2]:
apple = prep.ready_to_train_df('AAPL')
apple.head()

Unnamed: 0_level_0,volume,dividend_amount,split_coefficient,reportedEPS,surprisePercentage,10Y_yield,2Y_yield,10_2_spread,oil_price,orders,fed_funds,unemployment_rate,inf_exp,non_farm_payroll,cpi,retail_sales,gdp_per_capita,gold_price,usd_price,return,high/low,volatility_5days,volatility_10days,volatility_20days,momentum_5days,momentum_10days,momentum_20days,distance_5days,distance_10days,distance_20days,volume_5days,volume_10days,volume_20days,volume_momentum_5days,volume_momentum_10days,volume_momentum_20days,price/eps,momentum_5days/eps,momentum_10days/eps,momentum_20days/eps,gold_return,10Y_return,2Y_return,spread_return,oil_return,usd_return,unemployement_return,cpi_return,non_farm_payroll_return,gdp_return
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
2001-01-18,-0.10537,0.0,0.0,-0.375034,0.0,1.580927,1.903384,-1.342731,-1.252546,-0.934743,2.606178,-0.555556,0.2,-0.610978,-1.052896,-1.238462,-1.597627,-1.491827,1.864062,0.111838,1.842193,2.69592,2.923183,1.994286,-0.478714,1.685335,1.643434,-0.719437,-1.442745,-1.240978,-1.190488,-1.117295,-1.379262,-0.812372,-0.848085,-0.994603,-0.817374,0.046891,-0.565027,-0.625177,0.418402,-0.689312,-0.581609,0.752685,0.879385,-1.530922,0.0,0.673519,-2.405975,-0.036933
2001-01-19,-0.545803,0.0,0.0,-0.375034,0.0,1.643851,1.923358,-1.293905,-1.189135,-0.934743,2.600313,-0.555556,0.2,-0.610978,-1.052896,-1.238462,-1.597627,-1.492268,1.928136,0.043339,0.944023,3.980977,3.129773,2.609049,2.468842,1.849609,2.622334,4.175244,4.443153,4.337933,-1.559069,-1.634087,-1.748852,-0.671165,-0.801449,-0.94154,-0.83049,-0.764047,-0.609493,-0.904996,-0.104653,0.693065,0.125781,0.528113,2.282965,1.285421,0.0,0.673519,-2.405975,-0.036933
2001-01-22,-0.797475,0.0,0.0,-0.375034,0.0,1.697786,1.923358,-1.220667,-1.185778,-0.934743,2.647238,-0.555556,0.2,-0.610978,-1.052896,-1.238462,-1.597627,-1.487638,1.890759,-0.012821,1.835024,3.398468,3.136038,2.585941,1.603004,1.866289,3.228468,1.256646,1.306832,1.1456,-0.498298,-0.798034,-1.04531,-0.67708,-0.797209,-0.917831,-0.826442,-0.525836,-0.614008,-1.078259,0.668578,0.585609,-0.01856,0.718659,0.094866,-0.737712,0.0,0.673519,-2.405975,-0.036933
2001-01-23,-0.445201,0.0,0.0,-0.375034,0.0,1.742732,1.969965,-1.24508,-1.206294,-0.934743,2.629641,-0.555556,0.2,-0.610978,-1.052896,-1.238462,-1.597627,-1.48852,1.890759,0.064935,3.604759,2.843761,2.90904,2.630055,2.241785,2.254032,2.830601,-1.799599,-1.386286,-1.271107,1.070367,0.376331,-0.162741,-0.656114,-0.785039,-0.92054,-0.846683,-0.701579,-0.718961,-0.964529,-0.170887,0.481934,0.316109,-0.205489,-0.724499,0.004588,0.0,0.673519,-2.405975,-0.036933
2001-01-24,-0.604064,0.0,0.0,-0.375034,0.0,1.769699,1.963307,-1.196254,-1.213754,-0.934743,2.676566,-0.555556,0.2,-0.610978,-1.052896,-1.238462,-1.597627,-1.492047,2.018907,0.0,1.517272,2.830914,3.095191,2.664827,3.611615,3.002549,3.698629,1.323002,1.92392,2.003887,-0.356491,-0.869106,-1.189121,-0.530819,-0.727494,-0.890353,-0.846683,-1.07845,-0.921569,-1.212655,-0.571964,0.28529,-0.065675,0.440779,-0.281032,2.559282,0.0,0.673519,-2.405975,-0.036933


In [46]:
apple.iloc[: , 29]

date
2001-01-18   -1.240978
2001-01-19    4.337933
2001-01-22    1.145600
2001-01-23   -1.271107
2001-01-24    2.003887
                ...   
2018-06-28   -0.025803
2018-06-29    0.340464
2018-07-02   -0.033096
2018-07-03    0.553387
2018-07-05   -0.666650
Name: distance_20days, Length: 4347, dtype: float64

In [34]:
y = apple[['return']].shift(1).replace(np.nan, 0)
y.head()

Unnamed: 0_level_0,return
date,Unnamed: 1_level_1
2001-01-18,0.0
2001-01-19,0.111838
2001-01-22,0.043339
2001-01-23,-0.012821
2001-01-24,0.064935


In [35]:
model = RandomForestRegressor()

In [36]:
model.fit(apple.drop(columns=['return']), y)

In [37]:
y_test = prep.ready_to_test_df('AAPL')['return'].shift(1).replace(np.nan, 0)
x_test = prep.ready_to_test_df('AAPL').drop(columns = ['return'])

In [38]:
model.score(x_test, y_test)

0.9975258240437974

In [41]:
model.feature_importances_.argmax()

28

In [49]:
dict_score = {}
tickers = utils.return_tickers()
for ticker in tickers:
        dict_score[ticker] = cv.cross_validate_ml(prep.ready_to_train_df(ticker), GradientBoostingRegressor())
        print(f"done for ticker {ticker} index # {tickers.index(ticker)}")

done for ticker AAPL index # 0
done for ticker MSFT index # 1
done for ticker GOOG index # 2
done for ticker AMZN index # 3
done for ticker TSLA index # 4
done for ticker UNH index # 5
done for ticker XOM index # 6
done for ticker JNJ index # 7
done for ticker WMT index # 8
done for ticker NVDA index # 9
done for ticker JPM index # 10
done for ticker V index # 11
done for ticker CVX index # 12
done for ticker PG index # 13
done for ticker LLY index # 14
done for ticker MA index # 15
done for ticker HD index # 16
done for ticker META index # 17
done for ticker BAC index # 18
done for ticker ABBV index # 19
done for ticker PFE index # 20
done for ticker KO index # 21
done for ticker MRK index # 22
done for ticker PEP index # 23
done for ticker COST index # 24
done for ticker ORCL index # 25
done for ticker AVGO index # 26
done for ticker TMO index # 27
done for ticker MCD index # 28
done for ticker CSCO index # 29
done for ticker ACN index # 30
done for ticker DHR index # 31
done for tic

done for ticker BKR index # 259
done for ticker GLW index # 260
done for ticker LYB index # 261
done for ticker ES index # 262
done for ticker BAX index # 263
done for ticker STT index # 264
done for ticker VRSK index # 265
done for ticker TROW index # 266
done for ticker WBD index # 267
done for ticker AWK index # 268
done for ticker IT index # 269
done for ticker GPN index # 270
done for ticker HRL index # 271
done for ticker FANG index # 272
done for ticker WTW index # 273
done for ticker RJF index # 274
done for ticker GPC index # 275
done for ticker IFF index # 276
done for ticker CDW index # 277
done for ticker TSCO index # 278
done for ticker FITB index # 279
done for ticker ARE index # 280
done for ticker URI index # 281
done for ticker ZBH index # 282
done for ticker K index # 283
done for ticker LEN index # 284
done for ticker EBAY index # 285
done for ticker EIX index # 286
done for ticker CBRE index # 287
done for ticker EFX index # 288
done for ticker VMC index # 289
done 

In [None]:
def custom_gridsearch(df, model, max_depth=[2,3,4], criterion = ['friedman_mse', 'squared_error', 'mse'], n_estimator=[50, 75, 100], learning_rate=[0.08, 0.1, 0.12], loss=['squared_error', 'absolute_error', 'huber']):
    counter = 0
    rmse = []
    baseline = []
    params = []
    for max_depth_i in max_depth:
        for criterion_i in criterion:
            for n_estimator_i in n_estimator:
                for learning_rate_i in learning_rate:
                    for loss_i in loss:
                        test = cross_validate_ml(df = df, model = model(max_depth=max_depth_i,
                                                                   criterion = criterion_i,
                                                                   n_estimators = n_estimator_i,
                                                                   learning_rate = learning_rate_i,
                                                                   loss = loss_i))
                        rmse.append(test[0])
                        baseline.append(test[1])
                        params.append((max_depth_i, criterion_i, n_estimator_i, learning_rate_i))
                        counter += 1
                        print(f'model {counter} done with parameters: max_depth = {max_depth_i}, criterion = {criterion_i}, estimators = {n_estimator_i}, learning rate = {learning_rate_i}, loss = {loss_i}, rmse = {test[0]}')
    idx_min = np.argmin(rmse)
    best_params = params[idx_min]
    
    return best_params, rmse, params