In [None]:
import pandas as pd

In [None]:
import numpy as np

In [None]:
from dynamic_portfolio.utils import load_csv
from dynamic_portfolio.utils import features_creation, clean_data
from dynamic_portfolio.preprocess import scaler

# Loading data and creating clean dataframe

In [None]:
df = features_creation('META')

In [None]:
pd.set_option('display.max_columns', None)

# Cross vals

We use sklearn's time series split to break up the data in different folds
We use a sklearn example to run the first tests.

Metrics used : 
 - rmse
 - mae
 - R2

In [None]:
from sklearn.model_selection import TimeSeriesSplit

In [None]:
ts_cv = TimeSeriesSplit(
    n_splits=20,
    gap=0,
    max_train_size=252,
    test_size=45,
)

In [None]:
all_splits = list(ts_cv.split(X, y))

In [None]:
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error

In [None]:
model = HistGradientBoostingRegressor()

In [None]:
rmse = []
for train_index, test_index in ts_cv.split(X):
    cv_train, cv_test = X.iloc[train_index], X.iloc[test_index]
    model = model.fit(X,y)
    predictions = model.predict(cv_test)
    true_values = cv_test['return']
    rmse.append(np.sqrt(mean_squared_error(true_values, predictions)))

In [None]:
np.mean(rmse)*100

In [None]:
def evaluate(model, X, y, cv):
    cv_results = cross_validate(
        model,
        X,
        y,
        cv=cv,
        scoring=["neg_mean_absolute_error", "neg_root_mean_squared_error"],
    )
    mae = -cv_results["test_neg_mean_absolute_error"]
    rmse = -cv_results["test_neg_root_mean_squared_error"]
    print(
        f"Mean Absolute Error:     {mae.mean():.6f} +/- {mae.std():.6f}\n"
        f"Root Mean Squared Error: {rmse.mean():.6f} +/- {rmse.std():.6f}"
    )


In [None]:
import seaborn as sns

In [None]:
evaluate(model, X, y, ts_cv)

# PCA Analysis

In [25]:
from sklearn.decomposition import PCA

In [None]:
pca = PCA(n_components=0.9)
pca.fit(prep.ready_to_train_df('IBM'))

In [None]:
n_pcs= pca.n_components_ # get number of component
# get the index of the most important feature on EACH component
most_important = [np.abs(pca.components_[i]).argmax() for i in range(n_pcs)]
initial_feature_names = ibm.columns
# get the most important feature names
most_important_names = [initial_feature_names[most_important[i]] for i in range(n_pcs)]

## Using PCA analysis on all stocks

In [23]:
from sklearn.pipeline import make_pipeline

In [29]:
pipe.get_params()

{'memory': None,
 'steps': [('pca', PCA()),
  ('gradientboostingregressor', GradientBoostingRegressor())],
 'verbose': False,
 'pca': PCA(),
 'gradientboostingregressor': GradientBoostingRegressor(),
 'pca__copy': True,
 'pca__iterated_power': 'auto',
 'pca__n_components': None,
 'pca__n_oversamples': 10,
 'pca__power_iteration_normalizer': 'auto',
 'pca__random_state': None,
 'pca__svd_solver': 'auto',
 'pca__tol': 0.0,
 'pca__whiten': False,
 'gradientboostingregressor__alpha': 0.9,
 'gradientboostingregressor__ccp_alpha': 0.0,
 'gradientboostingregressor__criterion': 'friedman_mse',
 'gradientboostingregressor__init': None,
 'gradientboostingregressor__learning_rate': 0.1,
 'gradientboostingregressor__loss': 'squared_error',
 'gradientboostingregressor__max_depth': 3,
 'gradientboostingregressor__max_features': None,
 'gradientboostingregressor__max_leaf_nodes': None,
 'gradientboostingregressor__min_impurity_decrease': 0.0,
 'gradientboostingregressor__min_samples_leaf': 1,
 'gradi

In [30]:
pipe = make_pipeline(PCA(), GradientBoostingRegressor())

params = {'pca__n_components':0.9,
                'gradientboostingregressor__max_depth':3,
                'gradientboostingregressor__criterion':'friedman_mse',
                'gradientboostingregressor__n_estimators':100,
                'gradientboostingregressor__learning_rate':0.08}
pipe.set_params(**params)

In [31]:
pca_dict_score = {}
tickers = utils.return_tickers()
for ticker in tickers:
        dict_pca_score[ticker] = cross_validate_ml(prep.ready_to_train_df(ticker), pipe)
        print(f"done for ticker {ticker} index # {tickers.index(ticker)}")

done for ticker AAPL index # 0
done for ticker MSFT index # 1
done for ticker GOOG index # 2
done for ticker AMZN index # 3
done for ticker TSLA index # 4
done for ticker UNH index # 5
done for ticker XOM index # 6
done for ticker JNJ index # 7
done for ticker WMT index # 8
done for ticker NVDA index # 9
done for ticker JPM index # 10
done for ticker V index # 11
done for ticker CVX index # 12
done for ticker PG index # 13
done for ticker LLY index # 14
done for ticker MA index # 15
done for ticker HD index # 16
done for ticker META index # 17
done for ticker BAC index # 18
done for ticker ABBV index # 19
done for ticker PFE index # 20
done for ticker KO index # 21
done for ticker MRK index # 22
done for ticker PEP index # 23
done for ticker COST index # 24
done for ticker ORCL index # 25
done for ticker AVGO index # 26
done for ticker TMO index # 27
done for ticker MCD index # 28
done for ticker CSCO index # 29
done for ticker ACN index # 30
done for ticker DHR index # 31
done for tic

done for ticker BKR index # 259
done for ticker GLW index # 260
done for ticker LYB index # 261
done for ticker ES index # 262
done for ticker BAX index # 263
done for ticker STT index # 264
done for ticker VRSK index # 265
done for ticker TROW index # 266
done for ticker WBD index # 267
done for ticker AWK index # 268
done for ticker IT index # 269
done for ticker GPN index # 270
done for ticker HRL index # 271
done for ticker FANG index # 272
done for ticker WTW index # 273
done for ticker RJF index # 274
done for ticker GPC index # 275
done for ticker IFF index # 276
done for ticker CDW index # 277
done for ticker TSCO index # 278
done for ticker FITB index # 279
done for ticker ARE index # 280
done for ticker URI index # 281
done for ticker ZBH index # 282
done for ticker K index # 283
done for ticker LEN index # 284
done for ticker EBAY index # 285
done for ticker EIX index # 286
done for ticker CBRE index # 287
done for ticker EFX index # 288
done for ticker VMC index # 289
done 

In [33]:
pca_dict_score = {}
tickers = utils.return_tickers()
for ticker in tickers:
    pca = PCA(n_components=0.9)
    ticker_df = prep.ready_to_train_df(ticker)
    pca.fit(ticker_df)
    n_pcs= pca.n_components_
    most_important = [np.abs(pca.components_[i]).argmax() for i in range(n_pcs)]
    initial_feature_names = ticker_df.columns
    most_important_names = [initial_feature_names[most_important[i]] for i in range(n_pcs)]
    pca_df = ticker_df[list(np.unique(most_important_names))] 
    returns = ticker_df[['return']]
    final_pca = pd.merge(pca_df, returns, how='outer', left_index=True, right_index=True)
    pca_dict_score[ticker] = cross_validate_ml(final_pca, GradientBoostingRegressor(max_depth = 3, 
                                                                                    criterion='friedman_mse',
                                                                                    learning_rate=0.08,
                                                                                    n_estimators = 100))
    print(f"done for ticker {ticker} index # {tickers.index(ticker)}")

done for ticker AAPL index # 0
done for ticker MSFT index # 1
done for ticker GOOG index # 2
done for ticker AMZN index # 3
done for ticker TSLA index # 4
done for ticker UNH index # 5
done for ticker XOM index # 6
done for ticker JNJ index # 7
done for ticker WMT index # 8
done for ticker NVDA index # 9
done for ticker JPM index # 10
done for ticker V index # 11
done for ticker CVX index # 12
done for ticker PG index # 13
done for ticker LLY index # 14
done for ticker MA index # 15
done for ticker HD index # 16
done for ticker META index # 17
done for ticker BAC index # 18
done for ticker ABBV index # 19
done for ticker PFE index # 20
done for ticker KO index # 21
done for ticker MRK index # 22
done for ticker PEP index # 23
done for ticker COST index # 24
done for ticker ORCL index # 25
done for ticker AVGO index # 26
done for ticker TMO index # 27
done for ticker MCD index # 28
done for ticker CSCO index # 29
done for ticker ACN index # 30
done for ticker DHR index # 31
done for tic

done for ticker BKR index # 259
done for ticker GLW index # 260
done for ticker LYB index # 261
done for ticker ES index # 262
done for ticker BAX index # 263
done for ticker STT index # 264
done for ticker VRSK index # 265
done for ticker TROW index # 266
done for ticker WBD index # 267
done for ticker AWK index # 268
done for ticker IT index # 269
done for ticker GPN index # 270
done for ticker HRL index # 271
done for ticker FANG index # 272
done for ticker WTW index # 273
done for ticker RJF index # 274
done for ticker GPC index # 275
done for ticker IFF index # 276
done for ticker CDW index # 277
done for ticker TSCO index # 278
done for ticker FITB index # 279
done for ticker ARE index # 280
done for ticker URI index # 281
done for ticker ZBH index # 282
done for ticker K index # 283
done for ticker LEN index # 284
done for ticker EBAY index # 285
done for ticker EIX index # 286
done for ticker CBRE index # 287
done for ticker EFX index # 288
done for ticker VMC index # 289
done 

In [35]:
rmse_pca= []
baseline = []
for key in pca_dict_score.keys():
    rmse_pca.append(pca_dict_score[key][0])
    baseline.append(pca_dict_score[key][1])
print(np.mean(rmse_pca), np.mean(baseline) )

0.0033498792960944923 0.02096529727516623


In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.ylim(ymin=0)
plt.title('cumulated share of explained variance')
plt.xlabel('# of principal component used');

In [1]:
import pandas as pd
import numpy as np
import dynamic_portfolio.preprocess as prep
import dynamic_portfolio.utils as utils
import dynamic_portfolio.cross_validate as cv
import dynamic_portfolio.models as model
from sklearn.ensemble import GradientBoostingRegressor
import warnings
%load_ext autoreload
%autoreload 2
warnings.filterwarnings(action='ignore')
pd.set_option('display.max_columns', None)

### Cross validation functions

In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
cross_val = {
    'fold_length' : 252, # Working days for 1 year
    'fold_stride' : 60, # Step between folds, here one quarter
    'train_test_ratio' : 0.7, # Split in fold
    'input_length' : 0, # Number of days to move back from last train_index, here 0
    'horizon' : 1, # Number of days ahead to make prediction, here 1
    'output_length' : 1, # Number of targets wanted
}
#Split the dataset by FOLDS
def get_folds(
    df: pd.DataFrame,
    fold_length = cross_val['fold_length'],
    fold_stride = cross_val['fold_stride']):
    '''
    This function slides through the Time Series dataframe of shape (n_timesteps, n_features) to create folds
    - of equal `fold_length`
    - using `fold_stride` between each fold
    Returns a list of folds, each as a DataFrame
    '''
    folds = []
    for idx in range(0, len(df), fold_stride):
        # Exits the loop as soon as the last fold index would exceed the last index
        if (idx + fold_length) > len(df):
            break
        fold = df.iloc[idx:idx + fold_length, :]
        folds.append(fold)
    return folds
#Split FOLDS by Train et Test
#### FOR ONE FOLD !!!!!
def train_test_split(fold: pd.DataFrame,
                     train_test_ratio = cross_val['train_test_ratio'],
                     input_length = cross_val['input_length']):
    '''
    Returns a train dataframe and a test dataframe (fold_train, fold_test)
    from which one can sample (X,y) sequences.
    df_train should contain all the timesteps until round(train_test_ratio * len(fold))
    '''
    # TRAIN SET
    # ======================
    last_train_idx = round(train_test_ratio * len(fold))
    fold_train = fold.iloc[0:last_train_idx, :]
    # TEST SET
    # ======================
    first_test_idx = last_train_idx - input_length 
    fold_test = fold.iloc[first_test_idx:, :]
    return (fold_train, fold_test)
def cross_validate_ml(df, model) :
    '''
    get_folds() create many FOLDS, train_test_split() create a split on ONE FOLDS.
    The goal of this function is to make splits and sequences on each FOLDS.
    Then, apply a model.
    '''
    folds = get_folds(df, fold_length = cross_val['fold_length'], fold_stride = cross_val['fold_stride']) # 1 - Creating FOLDS
    scores =[]
    baseline = []
    for fold in folds:
        # 2 - CHRONOLOGICAL TRAIN TEST SPLIT of the current FOLD
        (fold_train, fold_test) = train_test_split(fold = fold,
                                                train_test_ratio = cross_val['train_test_ratio'],
                                                input_length = cross_val['input_length'] ,
                                                )
        # 3 - Scanninng fold_train and fold_test for SEQUENCES
        X_train, y_train = fold_train, fold_train['return']
        X_test, y_test = fold_test, fold_test['return']
        model.fit(X_train, y_train)
        rmse_model = (mean_squared_error(y_test, model.predict(X_test)))**0.5
        scores.append(rmse_model)
        rmse_baseline = mean_squared_error(y_test.iloc[[0]], y_train.iloc[[-1]])**0.5
        baseline.append(rmse_baseline)
    return np.mean(scores), np.mean(baseline)

### Script to run for model on all stocks

### Models used

In [13]:
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor

In [18]:
dict_score = {}
tickers = utils.return_tickers()
for ticker in tickers:
        dict_score[ticker] = cross_validate_ml(prep.ready_to_train_df(ticker), XGBRegressor())
        print(f"done for ticker {ticker} index # {tickers.index(ticker)}")

done for ticker AAPL index # 0
done for ticker MSFT index # 1
done for ticker GOOG index # 2
done for ticker AMZN index # 3
done for ticker TSLA index # 4
done for ticker UNH index # 5
done for ticker XOM index # 6
done for ticker JNJ index # 7
done for ticker WMT index # 8
done for ticker NVDA index # 9
done for ticker JPM index # 10
done for ticker V index # 11
done for ticker CVX index # 12
done for ticker PG index # 13
done for ticker LLY index # 14
done for ticker MA index # 15
done for ticker HD index # 16
done for ticker META index # 17
done for ticker BAC index # 18
done for ticker ABBV index # 19
done for ticker PFE index # 20
done for ticker KO index # 21
done for ticker MRK index # 22
done for ticker PEP index # 23
done for ticker COST index # 24
done for ticker ORCL index # 25
done for ticker AVGO index # 26
done for ticker TMO index # 27
done for ticker MCD index # 28
done for ticker CSCO index # 29
done for ticker ACN index # 30
done for ticker DHR index # 31
done for tic

done for ticker BKR index # 259
done for ticker GLW index # 260
done for ticker LYB index # 261
done for ticker ES index # 262
done for ticker BAX index # 263
done for ticker STT index # 264
done for ticker VRSK index # 265
done for ticker TROW index # 266
done for ticker WBD index # 267
done for ticker AWK index # 268
done for ticker IT index # 269
done for ticker GPN index # 270
done for ticker HRL index # 271
done for ticker FANG index # 272
done for ticker WTW index # 273
done for ticker RJF index # 274
done for ticker GPC index # 275
done for ticker IFF index # 276
done for ticker CDW index # 277
done for ticker TSCO index # 278
done for ticker FITB index # 279
done for ticker ARE index # 280
done for ticker URI index # 281
done for ticker ZBH index # 282
done for ticker K index # 283
done for ticker LEN index # 284
done for ticker EBAY index # 285
done for ticker EIX index # 286
done for ticker CBRE index # 287
done for ticker EFX index # 288
done for ticker VMC index # 289
done 

### Cross val scoring

In [20]:
rmse= []
baseline = []
for key in dict_score.keys():
    rmse.append(dict_score[key][0])
    baseline.append(dict_score[key][1])
print(np.mean(rmse), np.mean(baseline) )

0.004796076311898096 0.02096529727516623


## Custom grid search

In [None]:
max_depth = [2, 5, 8]
criterion = ['friedman_mse', 'squared_error', 'mse']
learning_rate=[0.01, 0.1, 0.2] 
n_estimators=[100, 200, 500]

In [11]:
def custom_gridsearch(df, model, max_depth=[2,3,4], criterion = ['friedman_mse', 'squared_error', 'mse'], n_estimator=[50, 75, 100], learning_rate=[0.08, 0.1, 0.12], loss=['squared_error', 'absolute_error', 'huber']):
    counter = 0
    rmse = []
    baseline = []
    params = []
    for max_depth_i in max_depth:
        for criterion_i in criterion:
            for n_estimator_i in n_estimator:
                for learning_rate_i in learning_rate:
                    for loss_i in loss:
                        test = cross_validate_ml(df = df, model = model(max_depth=max_depth_i,
                                                                   criterion = criterion_i,
                                                                   n_estimators = n_estimator_i,
                                                                   learning_rate = learning_rate_i,
                                                                   loss = loss_i))
                        rmse.append(test[0])
                        baseline.append(test[1])
                        params.append((max_depth_i, criterion_i, n_estimator_i, learning_rate_i))
                        counter += 1
                        print(f'model {counter} done with parameters: max_depth = {max_depth_i}, criterion = {criterion_i}, estimators = {n_estimator_i}, learning rate = {learning_rate_i}, loss = {loss_i}, rmse = {test[0]}')
    idx_min = np.argmin(rmse)
    best_params = params[idx_min]
    
    return best_params, rmse, params

### Model results on one stock on a grid search

In [12]:
gs_IP = custom_gridsearch(prep.ready_to_train_df('IP'), model = GradientBoostingRegressor)

model 1 done with parameters: max_depth = 2, criterion = friedman_mse, estimators = 50, learning rate = 0.08, loss = squared_error, rmse = 0.002958185553002535
model 2 done with parameters: max_depth = 2, criterion = friedman_mse, estimators = 50, learning rate = 0.08, loss = absolute_error, rmse = 0.006407018407676127
model 3 done with parameters: max_depth = 2, criterion = friedman_mse, estimators = 50, learning rate = 0.08, loss = huber, rmse = 0.004428068812946768
model 4 done with parameters: max_depth = 2, criterion = friedman_mse, estimators = 50, learning rate = 0.1, loss = squared_error, rmse = 0.002776729479836416
model 5 done with parameters: max_depth = 2, criterion = friedman_mse, estimators = 50, learning rate = 0.1, loss = absolute_error, rmse = 0.005372760873905239
model 6 done with parameters: max_depth = 2, criterion = friedman_mse, estimators = 50, learning rate = 0.1, loss = huber, rmse = 0.003432291380610513
model 7 done with parameters: max_depth = 2, criterion = 

model 53 done with parameters: max_depth = 2, criterion = squared_error, estimators = 100, learning rate = 0.12, loss = absolute_error, rmse = 0.0038499484792597865
model 54 done with parameters: max_depth = 2, criterion = squared_error, estimators = 100, learning rate = 0.12, loss = huber, rmse = 0.002795539577918935
model 55 done with parameters: max_depth = 2, criterion = mse, estimators = 50, learning rate = 0.08, loss = squared_error, rmse = 0.0029099191595345383
model 56 done with parameters: max_depth = 2, criterion = mse, estimators = 50, learning rate = 0.08, loss = absolute_error, rmse = 0.006407991680854807
model 57 done with parameters: max_depth = 2, criterion = mse, estimators = 50, learning rate = 0.08, loss = huber, rmse = 0.004428402988000523
model 58 done with parameters: max_depth = 2, criterion = mse, estimators = 50, learning rate = 0.1, loss = squared_error, rmse = 0.0028373111145722184
model 59 done with parameters: max_depth = 2, criterion = mse, estimators = 50

model 106 done with parameters: max_depth = 3, criterion = friedman_mse, estimators = 100, learning rate = 0.12, loss = squared_error, rmse = 0.0025730424155033494
model 107 done with parameters: max_depth = 3, criterion = friedman_mse, estimators = 100, learning rate = 0.12, loss = absolute_error, rmse = 0.0036681061284255722
model 108 done with parameters: max_depth = 3, criterion = friedman_mse, estimators = 100, learning rate = 0.12, loss = huber, rmse = 0.0027251777091245326
model 109 done with parameters: max_depth = 3, criterion = squared_error, estimators = 50, learning rate = 0.08, loss = squared_error, rmse = 0.002702793151349547
model 110 done with parameters: max_depth = 3, criterion = squared_error, estimators = 50, learning rate = 0.08, loss = absolute_error, rmse = 0.005058232367804183
model 111 done with parameters: max_depth = 3, criterion = squared_error, estimators = 50, learning rate = 0.08, loss = huber, rmse = 0.003941806328485692
model 112 done with parameters: m

model 159 done with parameters: max_depth = 3, criterion = mse, estimators = 100, learning rate = 0.1, loss = huber, rmse = 0.002699105329518561
model 160 done with parameters: max_depth = 3, criterion = mse, estimators = 100, learning rate = 0.12, loss = squared_error, rmse = 0.002601949405101594
model 161 done with parameters: max_depth = 3, criterion = mse, estimators = 100, learning rate = 0.12, loss = absolute_error, rmse = 0.0036977284985453603
model 162 done with parameters: max_depth = 3, criterion = mse, estimators = 100, learning rate = 0.12, loss = huber, rmse = 0.0027447041132625722
model 163 done with parameters: max_depth = 4, criterion = friedman_mse, estimators = 50, learning rate = 0.08, loss = squared_error, rmse = 0.002758204626328357
model 164 done with parameters: max_depth = 4, criterion = friedman_mse, estimators = 50, learning rate = 0.08, loss = absolute_error, rmse = 0.004962315891742368
model 165 done with parameters: max_depth = 4, criterion = friedman_mse, 

model 211 done with parameters: max_depth = 4, criterion = squared_error, estimators = 100, learning rate = 0.1, loss = squared_error, rmse = 0.002822949349514258
model 212 done with parameters: max_depth = 4, criterion = squared_error, estimators = 100, learning rate = 0.1, loss = absolute_error, rmse = 0.003671004260923169
model 213 done with parameters: max_depth = 4, criterion = squared_error, estimators = 100, learning rate = 0.1, loss = huber, rmse = 0.0027898015141627564
model 214 done with parameters: max_depth = 4, criterion = squared_error, estimators = 100, learning rate = 0.12, loss = squared_error, rmse = 0.002789152927054379
model 215 done with parameters: max_depth = 4, criterion = squared_error, estimators = 100, learning rate = 0.12, loss = absolute_error, rmse = 0.003691289937380604
model 216 done with parameters: max_depth = 4, criterion = squared_error, estimators = 100, learning rate = 0.12, loss = huber, rmse = 0.0028131115634149615
model 217 done with parameters:

In [21]:
gs_IP

((3, 'friedman_mse', 100, 0.08),
 [0.002958185553002535,
  0.006407018407676127,
  0.004428068812946768,
  0.002776729479836416,
  0.005372760873905239,
  0.003432291380610513,
  0.0026067464673304175,
  0.0048061652599137505,
  0.0030851908081413497,
  0.0026039764209378246,
  0.00477931059886155,
  0.003132758828900319,
  0.0027137581776954262,
  0.0042761812198638905,
  0.002879525183277261,
  0.002675563304797706,
  0.004136672056708204,
  0.0028028874014932675,
  0.00263901736188292,
  0.004243777202286242,
  0.0028285475219040047,
  0.0026600684459853496,
  0.003929768715246046,
  0.002738377789027572,
  0.0026049910482777846,
  0.0038342459402143712,
  0.0027842983111944423,
  0.002946016742644336,
  0.006400529273369599,
  0.004428402988000523,
  0.00274092287895698,
  0.005369459505880697,
  0.0034536502342627138,
  0.0026201916948474895,
  0.00481556253048331,
  0.0030731467770259437,
  0.0026640675291778127,
  0.004781983704751462,
  0.0031020685649265803,
  0.00264106682594

In [17]:
np.mean(baseline)

0.018969723709451568

### Saving models

In [36]:
pipe = make_pipeline(PCA(), GradientBoostingRegressor())

params = {'pca__n_components':0.9,
                'gradientboostingregressor__max_depth':2,
                'gradientboostingregressor__criterion':'friedman_mse',
                'gradientboostingregressor__n_estimators':100,
                'gradientboostingregressor__learning_rate':0.1}
pipe.set_params(**params)

In [None]:
tickers = utils.return_tickers()
for ticker in tickers:
    model = pipe
    model.fit(prep.readytotrain(ticker))
    joblib.dump(model, f"../raw_data/models/{ticker}_GradientBoostingRegressor_PCA.joblib")

In [None]:
tickers = utils.return_tickers()
y_pred = []
for ticker in tickers:
    model = joblib.load(model, f"model/{ticker}_GradientBoostingRegressor_PCA.joblib")
    y_pred.append(model.predict(df))