# 05 Quantile forecasting using Gradient Boosted Trees

In [1]:
import sys
sys.executable

'/usr/local/bin/python'

## Imports

In [2]:
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

import constants as cnst
import stock_utils as su

pd.set_option('display.max_columns', None)

NSE_DATA_DIR = PosixPath('../data/NSE') | Valid: True
PROCESSED_DATA_DIR = PosixPath('../data/processed') | Valid: True


## Constants

In [3]:
QUANTILE_LB, QUANTILE_UB = 0.1, 0.9

stock_symbols = su.get_all_stock_symbols(
    cnst.NSE_DATA_DIR
)

stock_symbols

['DEEPAKFERT', 'HDFCBANK', 'ITBEES', 'JUBLFOOD']

In [4]:
STOCK_SYMBOL = stock_symbols[0]
STOCK_SYMBOL

'DEEPAKFERT'

## Data loading

### Stock data

In [5]:
stock_df = pd.read_parquet(
    cnst.PROCESSED_DATA_DIR.joinpath(f'{STOCK_SYMBOL}-processed.parquet')
)

stock_df

Unnamed: 0,Date,Open,High,Low,LTP,Close,VWAP,52W H,52W L,Volume,Value,#Trades,Range,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter,DaysSinceLastTradingSession,Close 7MA,Close 15MA,Close 30MA,Close 60MA,Range 7MA,Range 15MA,Range 30MA,Range 60MA,VWAP 7MA,VWAP 15MA,VWAP 30MA,VWAP 60MA,Volume 7MA,Volume 15MA,Volume 30MA,Volume 60MA,Value 7MA,Value 15MA,Value 30MA,Value 60MA,#Trades 7MA,#Trades 15MA,#Trades 30MA,#Trades 60MA,Target 3D,Target 7D,Target 15D,Target 30D
0,2020-01-01,97.20,97.85,96.30,96.60,96.75,96.95,169.0,76.20,153261,1.485918e+07,3161,1.55,0,0,0,1,1,2020,2,1,1,1,96.75,96.75,96.75,96.75,1.55,1.55,1.55,1.55,96.95,96.95,96.95,96.95,153261,153261,153261,153261,14859179,14859179,14859179,14859179,3161,3161,3161,3161,96.10,106.20,109.70,107.00
1,2020-01-02,96.85,102.40,96.60,100.00,100.35,100.17,169.0,76.20,637039,6.381519e+07,7526,5.80,1,0,0,2,1,2020,3,2,1,1,98.55,98.55,98.55,98.55,3.68,3.68,3.68,3.68,98.56,98.56,98.56,98.56,395150,395150,395150,395150,39337186,39337186,39337186,39337186,5343,5343,5343,5343,100.40,109.65,111.55,103.30
2,2020-01-03,99.70,104.80,99.00,101.20,100.75,101.90,169.0,76.20,562843,5.735485e+07,7402,5.80,1,0,0,3,1,2020,4,3,1,1,99.28,99.28,99.28,99.28,4.38,4.38,4.38,4.38,99.67,99.67,99.67,99.67,451047,451047,451047,451047,45343075,45343075,45343075,45343075,6029,6029,6029,6029,98.95,115.30,114.85,99.45
3,2020-01-06,100.50,100.85,95.25,95.70,96.10,97.91,169.0,76.20,326011,3.192018e+07,5214,5.60,0,0,0,6,1,2020,0,6,1,3,98.49,98.49,98.49,98.49,4.69,4.69,4.69,4.69,99.23,99.23,99.23,99.23,419788,419788,419788,419788,41987352,41987352,41987352,41987352,5825,5825,5825,5825,106.30,119.85,121.25,99.95
4,2020-01-07,97.35,101.00,97.10,99.95,100.40,99.09,169.0,76.20,307107,3.043237e+07,4983,3.90,1,0,0,7,1,2020,1,7,1,1,98.87,98.87,98.87,98.87,4.53,4.53,4.53,4.53,99.20,99.20,99.20,99.20,397252,397252,397252,397252,39676356,39676356,39676356,39676356,5657,5657,5657,5657,106.20,118.20,116.60,96.55
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
976,2024-02-12,515.50,517.10,497.85,502.80,500.05,506.51,714.7,497.85,654659,3.315936e+08,26328,19.25,0,1,0,12,2,2024,0,43,1,3,532.51,581.46,623.67,634.67,22.49,23.74,21.57,21.98,538.22,586.38,627.50,637.57,928983,781001,679848,729576,506187770,450855191,420705230,467952521,35476,29835,24604,24325,507.90,,,
977,2024-02-13,500.10,503.80,483.35,498.95,495.05,493.89,714.7,483.35,827799,4.088438e+08,39023,20.45,0,1,0,13,2,2024,1,44,1,1,521.22,571.23,617.25,632.67,21.92,23.85,21.44,22.16,525.74,575.63,620.93,635.50,858821,808121,657226,739972,455081470,459721553,399636888,472665722,34396,31390,24655,24855,494.50,,,
978,2024-02-14,495.00,499.70,488.50,490.25,490.80,493.16,714.7,483.35,633883,3.126049e+08,31700,11.20,0,0,0,14,2,2024,2,45,1,1,514.09,562.03,610.53,630.67,17.86,22.40,21.00,22.15,517.33,566.06,614.19,633.49,735754,818019,640038,747111,381813171,459958157,383406355,475772685,30659,31969,24742,25240,,,,
979,2024-02-15,493.50,515.30,491.95,508.10,507.90,508.02,714.7,483.35,995334,5.056480e+08,49286,23.35,1,0,0,15,2,2024,3,46,1,1,509.79,553.37,604.57,629.15,18.63,22.59,21.34,22.26,512.59,557.76,608.17,631.92,711399,860316,656385,757426,363912813,478450515,388672600,480421170,32106,34057,25877,25807,,,,


### Standardized data

In [6]:
standardized_df = pd.read_parquet(
    cnst.PROCESSED_DATA_DIR.joinpath(f'{STOCK_SYMBOL}-standardized.parquet')
)

standardized_df

Unnamed: 0,Open,High,Low,LTP,VWAP,52W H,52W L,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter,DaysSinceLastTradingSession,Close 7MA,Close 15MA,Close 30MA,Close 60MA,Range 7MA,Range 15MA,Range 30MA,Range 60MA,VWAP 7MA,VWAP 15MA,VWAP 30MA,VWAP 60MA,Volume 7MA,Volume 15MA,Volume 30MA,Volume 60MA,Value 7MA,Value 15MA,Value 30MA,Value 60MA,#Trades 7MA,#Trades 15MA,#Trades 30MA,#Trades 60MA,Target 3D,Target 7D,Target 15D,Target 30D
0,1.005,1.011,0.995,0.998,1.002,1.747,0.788,0,0,0,1,1,2020,2,1,1,1,1.000,1.000,1.000,1.000,0.016,0.016,0.016,0.016,1.002,1.002,1.002,1.002,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,0.993,1.098,1.134,1.106
1,0.965,1.020,0.963,0.997,0.998,1.684,0.759,1,0,0,2,1,2020,3,2,1,1,0.982,0.982,0.982,0.982,0.037,0.037,0.037,0.037,0.982,0.982,0.982,0.982,0.620,0.620,0.620,0.620,0.616,0.616,0.616,0.616,0.710,0.710,0.710,0.710,1.000,1.093,1.112,1.029
2,0.990,1.040,0.983,1.004,1.011,1.677,0.756,1,0,0,3,1,2020,4,3,1,1,0.985,0.985,0.985,0.985,0.043,0.043,0.043,0.043,0.989,0.989,0.989,0.989,0.801,0.801,0.801,0.801,0.791,0.791,0.791,0.791,0.815,0.815,0.815,0.815,0.982,1.144,1.140,0.987
3,1.046,1.049,0.991,0.996,1.019,1.759,0.793,0,0,0,6,1,2020,0,6,1,3,1.025,1.025,1.025,1.025,0.049,0.049,0.049,0.049,1.033,1.033,1.033,1.033,1.288,1.288,1.288,1.288,1.315,1.315,1.315,1.315,1.117,1.117,1.117,1.117,1.106,1.247,1.262,1.040
4,0.970,1.006,0.967,0.996,0.987,1.683,0.759,1,0,0,7,1,2020,1,7,1,1,0.985,0.985,0.985,0.985,0.045,0.045,0.045,0.045,0.988,0.988,0.988,0.988,1.294,1.294,1.294,1.294,1.304,1.304,1.304,1.304,1.135,1.135,1.135,1.135,1.058,1.177,1.161,0.962
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
976,1.031,1.034,0.996,1.005,1.013,1.429,0.996,0,1,0,12,2,2024,0,43,1,3,1.065,1.163,1.247,1.269,0.045,0.047,0.043,0.044,1.076,1.173,1.255,1.275,1.419,1.193,1.038,1.114,1.527,1.360,1.269,1.411,1.347,1.133,0.935,0.924,1.016,,,
977,1.010,1.018,0.976,1.008,0.998,1.444,0.976,0,1,0,13,2,2024,1,44,1,1,1.053,1.154,1.247,1.278,0.044,0.048,0.043,0.045,1.062,1.163,1.254,1.284,1.037,0.976,0.794,0.894,1.113,1.124,0.977,1.156,0.881,0.804,0.632,0.637,0.999,,,
978,1.009,1.018,0.995,0.999,1.005,1.456,0.985,0,0,0,14,2,2024,2,45,1,1,1.047,1.145,1.244,1.285,0.036,0.046,0.043,0.045,1.054,1.153,1.251,1.291,1.161,1.290,1.010,1.179,1.221,1.471,1.226,1.522,0.967,1.008,0.781,0.796,,,,
979,0.972,1.015,0.969,1.000,1.000,1.407,0.952,1,0,0,15,2,2024,3,46,1,1,1.004,1.090,1.190,1.239,0.037,0.044,0.042,0.044,1.009,1.098,1.197,1.244,0.715,0.864,0.659,0.761,0.720,0.946,0.769,0.950,0.651,0.691,0.525,0.524,,,,


## Modelling

### Target columns

In [7]:
target_cols = standardized_df.filter(regex = "Target.*").columns.to_list()
target_cols

['Target 3D', 'Target 7D', 'Target 15D', 'Target 30D']

In [8]:
pred_input_df = standardized_df.drop(columns = target_cols)
pred_input_df.shape

(981, 41)

In [9]:
standardized_df[target_cols].describe()

Unnamed: 0,Target 3D,Target 7D,Target 15D,Target 30D
count,978.0,974.0,966.0,951.0
mean,1.006635,1.015753,1.034381,1.073529
std,0.05873,0.092773,0.138773,0.20192
min,0.803,0.711,0.688,0.645
25%,0.974,0.965,0.94025,0.931
50%,1.001,1.0055,1.016,1.028
75%,1.034,1.057,1.10175,1.1695
max,1.414,1.526,1.562,1.77


### Data processing

In [10]:
def get_training_data(target_col: str):
    print(f"Target: {target_col}")
    X_df = standardized_df[standardized_df[target_col].notnull()].drop(columns = target_cols).copy()
    y = standardized_df[standardized_df[target_col].notnull()][target_col].copy()
    print(f"X.shape: {X_df.shape}")
    print(f"y.shape: {y.shape}")

    return X_df, y

### Grid search parameters

In [11]:
param_dict = {
    "n_estimators": [100, 125, 150],
    "learning_rate": [0.05, 0.1, 0.2],
    "max_depth": [2, 3, 4],
    "max_features": ["log2", "sqrt", 0.25],
    "subsample": [0.75, 1.0]
}

feature_importances = pd.DataFrame(
    index = standardized_df.drop(columns = target_cols).columns.to_list()
)

### Model building

In [12]:
def get_model():
    gb_model = GradientBoostingRegressor(
        loss = "squared_error",
        random_state = cnst.RANDOM_STATE
    )

    grid_cv = RandomizedSearchCV(
        gb_model,
        param_dict,
        n_iter = 12,
        cv = 5,
        n_jobs = -1,
        random_state = cnst.RANDOM_STATE
    )

    return grid_cv

def get_quantile_model(quantile: float):
    gb_model = GradientBoostingRegressor(
        loss = "quantile",
        alpha = quantile,
        random_state = cnst.RANDOM_STATE
    )

    grid_cv = RandomizedSearchCV(
        gb_model,
        param_dict,
        n_iter = 12,
        cv = 5,
        n_jobs = -1,
        random_state = cnst.RANDOM_STATE
    )

    return grid_cv

def print_results(y, preds):
    print(f"Target std: {y.std():.3f}")
    print(f"R2: {r2_score(y, preds):.3f}")
    print(f"MSE: {mean_squared_error(y, preds, squared = False):.3f}")
    print(f"MAE: {mean_absolute_error(y, preds):.3f}")

## Training

### `Target 3D`

#### Forecasting model

In [13]:
target_col = target_cols[0]
X, y = get_training_data(target_col)

Target: Target 3D
X.shape: (978, 41)
y.shape: (978,)


In [14]:
model = get_model()
model.fit(X, y)
model.best_params_

{'subsample': 1.0,
 'n_estimators': 125,
 'max_features': 'sqrt',
 'max_depth': 4,
 'learning_rate': 0.05}

In [15]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_subsample,param_n_estimators,param_max_features,param_max_depth,param_learning_rate,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
8,0.593735,0.050043,0.006845,0.00169,1.0,125,sqrt,4,0.05,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",-0.080243,-0.072605,0.030347,0.052788,-0.202619,-0.054467,0.091257,1
2,0.492346,0.037271,0.009058,0.002904,1.0,100,log2,4,0.05,"{'subsample': 1.0, 'n_estimators': 100, 'max_f...",-0.080245,-0.088111,0.015186,0.111864,-0.255828,-0.059427,0.12226,2
10,0.371945,0.038826,0.006186,0.00152,0.75,100,sqrt,3,0.05,"{'subsample': 0.75, 'n_estimators': 100, 'max_...",-0.059741,-0.055374,0.020023,0.033285,-0.322983,-0.076958,0.128724,3
11,0.545979,0.007975,0.004852,0.000791,0.75,150,sqrt,4,0.05,"{'subsample': 0.75, 'n_estimators': 150, 'max_...",-0.100154,-0.072314,0.024434,0.055825,-0.385426,-0.095527,0.156141,4
4,0.504537,0.076628,0.006031,0.001053,1.0,125,sqrt,2,0.1,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",-0.071152,-0.024063,0.031002,0.007294,-0.606004,-0.132584,0.239167,5


In [16]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

preds = model.predict(X)
print_results(y, preds)

Target std: 0.059
R2: 0.634
MSE: 0.036
MAE: 0.026


#### Lower quantile model

In [17]:
lb_model = get_quantile_model(QUANTILE_LB)
lb_model.fit(X, y)
lb_model.best_params_

{'subsample': 1.0,
 'n_estimators': 150,
 'max_features': 0.25,
 'max_depth': 4,
 'learning_rate': 0.1}

#### Upper quantile model

In [18]:
ub_model = get_quantile_model(QUANTILE_UB)
ub_model.fit(X, y)
ub_model.best_params_

{'subsample': 1.0,
 'n_estimators': 150,
 'max_features': 'log2',
 'max_depth': 4,
 'learning_rate': 0.1}

#### Predictions

In [19]:
pred_col_name = f'Pred {target_col}'
lb_col_name, ub_col_name = f'LB {target_col}', f'UB {target_col}'

stock_df[pred_col_name] = (model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[lb_col_name] = (lb_model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[ub_col_name] = (ub_model.predict(pred_input_df) * stock_df['Close']).round(2)

stock_df[['Date', 'Close', target_col, pred_col_name, lb_col_name, ub_col_name]].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 3D,Pred Target 3D,LB Target 3D,UB Target 3D
968,2024-01-31,618.15,540.75,580.56,563.39,625.44
969,2024-02-01,579.75,537.95,544.94,535.98,594.84
970,2024-02-02,574.05,535.1,547.34,533.5,584.84
971,2024-02-05,540.75,524.35,521.9,508.53,556.42
972,2024-02-06,537.95,515.3,518.9,510.17,556.1
973,2024-02-07,535.1,500.05,514.27,499.87,552.15
974,2024-02-08,524.35,495.05,505.41,495.31,544.19
975,2024-02-09,515.3,490.8,493.54,485.14,531.44
976,2024-02-12,500.05,507.9,501.66,475.49,521.57
977,2024-02-13,495.05,494.5,496.33,481.1,514.55


### `Target 7D`

#### Forecasting model

In [20]:
target_col = target_cols[1]
X, y = get_training_data(target_col)

Target: Target 7D
X.shape: (974, 41)
y.shape: (974,)


In [21]:
model = get_model()
model.fit(X, y)
model.best_params_

{'subsample': 1.0,
 'n_estimators': 100,
 'max_features': 'log2',
 'max_depth': 4,
 'learning_rate': 0.05}

In [22]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_subsample,param_n_estimators,param_max_features,param_max_depth,param_learning_rate,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
2,0.46462,0.054993,0.008127,0.002844,1.0,100,log2,4,0.05,"{'subsample': 1.0, 'n_estimators': 100, 'max_f...",-0.012542,-0.267734,-0.206816,0.072585,-0.238475,-0.130597,0.135258,1
10,0.316285,0.008413,0.006422,0.002864,0.75,100,sqrt,3,0.05,"{'subsample': 0.75, 'n_estimators': 100, 'max_...",-0.124929,-0.21742,-0.097991,0.017626,-0.293195,-0.143182,0.106066,2
8,0.776614,0.193522,0.005987,0.000948,1.0,125,sqrt,4,0.05,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",-0.147867,-0.302397,-0.197025,-0.030876,-0.098069,-0.155247,0.091859,3
4,0.488584,0.0479,0.013401,0.008111,1.0,125,sqrt,2,0.1,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",-0.111685,-0.160459,-0.212551,0.035259,-0.673813,-0.22465,0.239302,4
1,0.519476,0.077883,0.00702,0.001577,0.75,125,sqrt,3,0.1,"{'subsample': 0.75, 'n_estimators': 125, 'max_...",-0.3521,-0.405187,-0.188553,-0.020734,-0.409696,-0.275254,0.150439,5


In [23]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

preds = model.predict(X)
print_results(y, preds)

Target std: 0.093
R2: 0.628
MSE: 0.057
MAE: 0.040


#### Lower quantile model

In [24]:
lb_model = get_quantile_model(QUANTILE_LB)
lb_model.fit(X, y)
lb_model.best_params_

{'subsample': 0.75,
 'n_estimators': 150,
 'max_features': 'log2',
 'max_depth': 4,
 'learning_rate': 0.1}

#### Upper quantile model

In [25]:
ub_model = get_quantile_model(QUANTILE_UB)
ub_model.fit(X, y)
ub_model.best_params_

{'subsample': 1.0,
 'n_estimators': 150,
 'max_features': 'log2',
 'max_depth': 4,
 'learning_rate': 0.1}

#### Predictions

In [26]:
pred_col_name = f'Pred {target_col}'
lb_col_name, ub_col_name = f'LB {target_col}', f'UB {target_col}'

stock_df[pred_col_name] = (model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[lb_col_name] = (lb_model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[ub_col_name] = (ub_model.predict(pred_input_df) * stock_df['Close']).round(2)

stock_df[['Date', 'Close', target_col, pred_col_name, lb_col_name, ub_col_name]].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 7D,Pred Target 7D,LB Target 7D,UB Target 7D
964,2024-01-24,637.7,540.75,569.71,536.19,669.35
965,2024-01-25,637.4,537.95,572.17,542.72,666.04
966,2024-01-29,628.1,535.1,568.87,535.22,661.77
967,2024-01-30,615.9,524.35,557.39,523.48,637.69
968,2024-01-31,618.15,515.3,556.92,526.11,636.75
969,2024-02-01,579.75,500.05,530.03,501.52,589.26
970,2024-02-02,574.05,495.05,525.68,495.05,594.13
971,2024-02-05,540.75,490.8,501.51,485.91,555.34
972,2024-02-06,537.95,507.9,507.52,495.3,564.28
973,2024-02-07,535.1,494.5,510.59,493.39,552.25


### `Target 15D`

#### Forecasting model

In [27]:
target_col = target_cols[2]
X, y = get_training_data(target_col)

Target: Target 15D
X.shape: (966, 41)
y.shape: (966,)


In [28]:
model = get_model()
model.fit(X, y)
model.best_params_

{'subsample': 0.75,
 'n_estimators': 100,
 'max_features': 'sqrt',
 'max_depth': 3,
 'learning_rate': 0.05}

In [29]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_subsample,param_n_estimators,param_max_features,param_max_depth,param_learning_rate,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
10,0.36348,0.033771,0.004667,0.000473,0.75,100,sqrt,3,0.05,"{'subsample': 0.75, 'n_estimators': 100, 'max_...",-0.007276,-0.510165,-0.320396,-0.081955,-0.235246,-0.231008,0.177882,1
8,0.635265,0.104986,0.006717,0.002018,1.0,125,sqrt,4,0.05,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",-0.017061,-0.608054,-0.349776,-0.002459,-0.181196,-0.231709,0.226608,2
2,0.494089,0.084431,0.009447,0.004505,1.0,100,log2,4,0.05,"{'subsample': 1.0, 'n_estimators': 100, 'max_f...",0.029171,-0.592906,-0.349116,0.004809,-0.322964,-0.246201,0.234744,3
11,0.439197,0.020192,0.00312,0.00015,0.75,150,sqrt,4,0.05,"{'subsample': 0.75, 'n_estimators': 150, 'max_...",0.13183,-0.591699,-0.417737,-0.078001,-0.493588,-0.289839,0.272643,4
4,0.474804,0.112467,0.007789,0.001471,1.0,125,sqrt,2,0.1,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",0.092919,-0.537974,-0.331596,0.07864,-0.814045,-0.302411,0.352022,5


In [30]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

preds = model.predict(X)
print_results(y, preds)

Target std: 0.139
R2: 0.594
MSE: 0.088
MAE: 0.065


#### Lower quantile model

In [31]:
lb_model = get_quantile_model(QUANTILE_LB)
lb_model.fit(X, y)
lb_model.best_params_

{'subsample': 1.0,
 'n_estimators': 150,
 'max_features': 0.25,
 'max_depth': 4,
 'learning_rate': 0.1}

#### Upper quantile model

In [32]:
ub_model = get_quantile_model(QUANTILE_UB)
ub_model.fit(X, y)
ub_model.best_params_

{'subsample': 1.0,
 'n_estimators': 150,
 'max_features': 'log2',
 'max_depth': 4,
 'learning_rate': 0.1}

#### Predictions

In [33]:
pred_col_name = f'Pred {target_col}'
lb_col_name, ub_col_name = f'LB {target_col}', f'UB {target_col}'

stock_df[pred_col_name] = (model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[lb_col_name] = (lb_model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[ub_col_name] = (ub_model.predict(pred_input_df) * stock_df['Close']).round(2)

stock_df[['Date', 'Close', target_col, pred_col_name, lb_col_name, ub_col_name]].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 15D,Pred Target 15D,LB Target 15D,UB Target 15D
956,2024-01-12,655.7,540.75,584.89,540.92,671.16
957,2024-01-15,647.65,537.95,563.75,531.06,666.9
958,2024-01-16,662.25,535.1,590.71,534.93,676.84
959,2024-01-17,653.6,524.35,570.9,523.96,672.27
960,2024-01-18,647.8,515.3,559.13,512.51,667.48
961,2024-01-19,644.75,500.05,560.06,499.73,664.11
962,2024-01-20,648.4,495.05,567.29,496.41,680.08
963,2024-01-23,628.9,490.8,566.81,490.07,676.95
964,2024-01-24,637.7,507.9,574.34,505.65,675.69
965,2024-01-25,637.4,494.5,576.22,498.81,674.73


### `Target 30D`

#### Forecasting model

In [34]:
target_col = target_cols[3]
X, y = get_training_data(target_col)

Target: Target 30D
X.shape: (951, 41)
y.shape: (951,)


In [35]:
model = get_model()
model.fit(X, y)
model.best_params_

{'subsample': 1.0,
 'n_estimators': 100,
 'max_features': 'log2',
 'max_depth': 4,
 'learning_rate': 0.05}

In [36]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_subsample,param_n_estimators,param_max_features,param_max_depth,param_learning_rate,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
2,0.549923,0.098545,0.00566,0.001748,1.0,100,log2,4,0.05,"{'subsample': 1.0, 'n_estimators': 100, 'max_f...",0.071227,-0.561349,-0.435807,-0.078446,-0.470638,-0.295002,0.246022,1
9,0.612445,0.033721,0.004979,0.001015,1.0,150,log2,4,0.1,"{'subsample': 1.0, 'n_estimators': 150, 'max_f...",-0.039283,-0.656964,-0.560252,-0.186452,-0.373664,-0.363323,0.228647,2
7,0.710815,0.164784,0.005457,0.001058,0.75,125,0.25,3,0.1,"{'subsample': 0.75, 'n_estimators': 125, 'max_...",-0.377073,-0.743552,-0.594123,-0.117136,-0.119397,-0.390256,0.250804,3
4,0.488437,0.117675,0.005434,0.001692,1.0,125,sqrt,2,0.1,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",0.061473,-0.684413,-0.713019,-0.15415,-0.528446,-0.403711,0.30617,4
8,0.692572,0.141513,0.005005,0.000669,1.0,125,sqrt,4,0.05,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",0.031091,-0.880079,-0.623679,-0.209365,-0.365757,-0.409558,0.317303,5


In [37]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

preds = model.predict(X)
print_results(y, preds)

Target std: 0.202
R2: 0.867
MSE: 0.073
MAE: 0.054


#### Lower quantile model

In [38]:
lb_model = get_quantile_model(QUANTILE_LB)
lb_model.fit(X, y)
lb_model.best_params_

{'subsample': 0.75,
 'n_estimators': 150,
 'max_features': 'log2',
 'max_depth': 4,
 'learning_rate': 0.1}

#### Upper quantile model

In [39]:
ub_model = get_quantile_model(QUANTILE_UB)
ub_model.fit(X, y)
ub_model.best_params_

{'subsample': 1.0,
 'n_estimators': 150,
 'max_features': 0.25,
 'max_depth': 4,
 'learning_rate': 0.1}

#### Predictions

In [40]:
pred_col_name = f'Pred {target_col}'
lb_col_name, ub_col_name = f'LB {target_col}', f'UB {target_col}'

stock_df[pred_col_name] = (model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[lb_col_name] = (lb_model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[ub_col_name] = (ub_model.predict(pred_input_df) * stock_df['Close']).round(2)

stock_df[['Date', 'Close', target_col, pred_col_name, lb_col_name, ub_col_name]].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 30D,Pred Target 30D,LB Target 30D,UB Target 30D
941,2023-12-21,663.85,540.75,603.58,536.52,689.95
942,2023-12-22,676.75,537.95,583.72,532.1,676.51
943,2023-12-26,673.95,535.1,575.2,533.42,679.79
944,2023-12-27,677.75,524.35,567.78,513.26,687.36
945,2023-12-28,675.8,515.3,572.42,528.62,682.1
946,2023-12-29,679.4,500.05,571.78,527.37,690.8
947,2024-01-01,687.8,495.05,611.39,494.88,831.48
948,2024-01-02,692.25,490.8,559.75,487.53,813.56
949,2024-01-03,686.8,507.9,563.41,506.96,797.46
950,2024-01-04,695.8,494.5,551.03,493.44,811.33


## Feature importances

In [41]:
feature_importances['Mean'] = feature_importances.mean(axis = 1)
(feature_importances.sort_values('Mean', ascending = False) * 100).round(1)

Unnamed: 0,Target 3D,Target 7D,Target 15D,Target 30D,Mean
DayOfYear,6.1,9.9,11.3,9.9,9.3
52W L,4.2,6.0,9.6,10.6,7.6
52W H,4.4,7.1,8.7,8.6,7.2
Year,1.9,5.5,7.9,12.9,7.0
Range 30MA,5.4,6.7,6.4,4.9,5.8
Range 60MA,3.4,5.7,6.1,5.9,5.3
Month,1.5,4.1,4.5,9.5,4.9
Range 15MA,4.0,5.1,6.2,3.5,4.7
Close 60MA,3.0,5.7,2.2,4.2,3.8
Range 7MA,2.5,2.5,6.2,3.3,3.6


## Forecasts

In [42]:
stock_df.filter(regex = "(Date)|(Close$)|(Pred.*)").iloc[-10:, :]

Unnamed: 0,Date,Close,Pred Target 3D,Pred Target 7D,Pred Target 15D,Pred Target 30D
971,2024-02-05,540.75,521.9,501.51,522.31,525.03
972,2024-02-06,537.95,518.9,507.52,520.32,525.93
973,2024-02-07,535.1,514.27,510.59,519.93,512.77
974,2024-02-08,524.35,505.41,501.16,517.38,511.7
975,2024-02-09,515.3,493.54,497.71,502.73,510.06
976,2024-02-12,500.05,501.66,481.44,486.11,503.27
977,2024-02-13,495.05,496.33,478.46,479.5,502.76
978,2024-02-14,490.8,486.87,476.31,474.31,484.62
979,2024-02-15,507.9,495.72,490.12,493.33,485.44
980,2024-02-16,494.5,487.74,473.82,475.61,468.51


In [43]:
latest_preds = stock_df.iloc[-1]
print(f"Date: {latest_preds['Date'].date()}")
print(f"Close: {latest_preds['Close']}")

for target_col in target_cols:
    print(f"{target_col}: {latest_preds[f'Pred {target_col}']}", end = " ")
    print(f"({latest_preds[f'LB {target_col}']} to {latest_preds[f'UB {target_col}']})")

Date: 2024-02-16
Close: 494.5
Target 3D: 487.74 (466.74 to 511.96)
Target 7D: 473.82 (459.78 to 528.47)
Target 15D: 475.61 (429.54 to 542.02)
Target 30D: 468.51 (386.22 to 545.21)
