# 05 Quantile forecasting using Gradient Boosted Trees

In [1]:
import sys
sys.executable

'/usr/local/bin/python'

## Imports

In [2]:
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

import constants as cnst
import stock_utils as su

pd.set_option('display.max_columns', None)

NSE_DATA_DIR = PosixPath('../data/NSE') | Valid: True
PROCESSED_DATA_DIR = PosixPath('../data/processed') | Valid: True


## Constants

In [3]:
QUANTILE_LB, QUANTILE_UB = 0.1, 0.9

stock_symbols = su.get_all_stock_symbols(
    cnst.NSE_DATA_DIR
)

stock_symbols

['HDFCBANK', 'ITBEES', 'JUBLFOOD']

In [4]:
STOCK_SYMBOL = stock_symbols[2]
STOCK_SYMBOL

'JUBLFOOD'

## Data loading

### Stock data

In [5]:
stock_df = pd.read_parquet(
    cnst.PROCESSED_DATA_DIR.joinpath(f'{STOCK_SYMBOL}-processed.parquet')
)

stock_df

Unnamed: 0,Date,Open,High,Low,LTP,Close,VWAP,52W H,52W L,Volume,Value,#Trades,Range,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter,DaysSinceLastTradingSession,Close 7MA,Close 15MA,Close 30MA,Close 60MA,Range 7MA,Range 15MA,Range 30MA,Range 60MA,VWAP 7MA,VWAP 15MA,VWAP 30MA,VWAP 60MA,Volume 7MA,Volume 15MA,Volume 30MA,Volume 60MA,Value 7MA,Value 15MA,Value 30MA,Value 60MA,#Trades 7MA,#Trades 15MA,#Trades 30MA,#Trades 60MA,Target 3D,Target 7D,Target 15D,Target 30D
0,2020-01-01,1656.95,1673.00,1652.80,1664.6,1663.15,1664.37,1673.00,1077.9,1142694,1.901871e+09,27428,20.20,1,0,1,1,1,2020,2,1,1,1,1663.15,1663.15,1663.15,1663.15,20.20,20.20,20.20,20.20,1664.37,1664.37,1664.37,1664.37,1142694,1142694,1142694,1142694,1901871196,1901871196,1901871196,1901871196,27428,27428,27428,27428,1679.60,1727.80,1773.80,1825.55
1,2020-01-02,1665.00,1688.35,1655.50,1686.0,1682.70,1675.06,1688.35,1077.9,840514,1.407907e+09,26148,32.85,1,0,1,2,1,2020,3,2,1,1,1672.93,1672.93,1672.93,1672.93,26.52,26.52,26.52,26.52,1669.72,1669.72,1669.72,1669.72,991604,991604,991604,991604,1654889284,1654889284,1654889284,1654889284,26788,26788,26788,26788,1693.65,1731.35,1793.55,1820.00
2,2020-01-03,1681.00,1696.90,1668.05,1679.0,1682.75,1685.46,1696.90,1077.9,975751,1.644585e+09,26219,28.85,1,0,1,3,1,2020,4,3,1,1,1676.20,1676.20,1676.20,1676.20,27.30,27.30,27.30,27.30,1674.96,1674.96,1674.96,1674.96,986319,986319,986319,986319,1651454555,1651454555,1651454555,1651454555,26598,26598,26598,26598,1725.70,1720.50,1810.10,1835.10
3,2020-01-06,1670.30,1688.85,1664.25,1679.0,1679.60,1676.81,1696.90,1077.9,586899,9.841203e+08,20012,24.60,1,0,0,6,1,2020,0,6,1,3,1677.05,1677.05,1677.05,1677.05,26.62,26.62,26.62,26.62,1675.42,1675.42,1675.42,1675.42,886464,886464,886464,886464,1484620980,1484620980,1484620980,1484620980,24951,24951,24951,24951,1714.30,1753.45,1822.90,1812.60
4,2020-01-07,1688.00,1702.50,1684.05,1689.4,1693.65,1693.57,1702.50,1077.9,845385,1.431717e+09,23848,18.45,1,0,1,7,1,2020,1,7,1,1,1680.37,1680.37,1680.37,1680.37,24.99,24.99,24.99,24.99,1679.05,1679.05,1679.05,1679.05,878248,878248,878248,878248,1474040255,1474040255,1474040255,1474040255,24731,24731,24731,24731,1727.80,1746.70,1754.30,1833.05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1027,2024-01-29,508.75,518.45,504.10,518.0,513.15,510.72,586.95,412.1,6212204,3.172690e+09,77868,14.35,1,0,0,29,1,2024,0,29,1,4,520.95,524.86,544.52,539.78,14.04,13.58,13.55,11.82,520.32,525.42,545.46,540.22,3169424,2587621,2427123,2134424,1640447875,1353563596,1320179854,1158761061,64065,56940,50662,42078,501.90,,,
1028,2024-01-30,517.15,521.90,508.05,513.0,510.15,513.73,586.95,412.1,2464481,1.266071e+09,34342,13.85,0,0,0,30,1,2024,1,30,1,1,518.93,523.25,542.67,539.93,14.17,12.90,13.50,11.96,519.10,523.67,543.66,540.42,3160963,2591987,2453664,2166395,1633013466,1351599355,1330811888,1175295089,58966,54449,50569,42443,494.85,,,
1029,2024-01-31,514.00,526.15,509.50,518.2,519.55,518.54,586.95,412.1,4259420,2.208667e+09,92424,16.65,1,0,0,31,1,2024,2,31,1,1,517.20,522.91,541.35,540.14,15.08,12.91,13.80,12.12,517.10,522.96,542.20,540.61,3445825,2697419,2544398,2211641,1776201812,1404377593,1375624151,1199049589,60480,58094,52034,43717,,,,
1030,2024-02-01,513.00,518.45,498.10,501.4,501.90,505.93,586.95,412.1,6644517,3.361652e+09,131585,20.35,0,0,0,1,2,2024,3,32,1,1,514.16,521.51,539.25,540.04,15.99,13.71,13.99,12.34,514.12,521.94,540.13,540.57,4301178,3028044,2686249,2305913,2206989109,1569930520,1442436250,1246708400,76467,64211,54870,45393,,,,


### Standardized data

In [6]:
standardized_df = pd.read_parquet(
    cnst.PROCESSED_DATA_DIR.joinpath(f'{STOCK_SYMBOL}-standardized.parquet')
)

standardized_df

Unnamed: 0,Open,High,Low,LTP,VWAP,52W H,52W L,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter,DaysSinceLastTradingSession,Close 7MA,Close 15MA,Close 30MA,Close 60MA,Range 7MA,Range 15MA,Range 30MA,Range 60MA,VWAP 7MA,VWAP 15MA,VWAP 30MA,VWAP 60MA,Volume 7MA,Volume 15MA,Volume 30MA,Volume 60MA,Value 7MA,Value 15MA,Value 30MA,Value 60MA,#Trades 7MA,#Trades 15MA,#Trades 30MA,#Trades 60MA,Target 3D,Target 7D,Target 15D,Target 30D
0,0.996,1.006,0.994,1.001,1.001,1.006,0.648,1,0,1,1,1,2020,2,1,1,1,1.000,1.000,1.000,1.000,0.012,0.012,0.012,0.012,1.001,1.001,1.001,1.001,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.010,1.039,1.067,1.098
1,0.989,1.003,0.984,1.002,0.995,1.003,0.641,1,0,1,2,1,2020,3,2,1,1,0.994,0.994,0.994,0.994,0.016,0.016,0.016,0.016,0.992,0.992,0.992,0.992,1.180,1.180,1.180,1.180,1.175,1.175,1.175,1.175,1.024,1.024,1.024,1.024,1.007,1.029,1.066,1.082
2,0.999,1.008,0.991,0.998,1.002,1.008,0.641,1,0,1,3,1,2020,4,3,1,1,0.996,0.996,0.996,0.996,0.016,0.016,0.016,0.016,0.995,0.995,0.995,0.995,1.011,1.011,1.011,1.011,1.004,1.004,1.004,1.004,1.014,1.014,1.014,1.014,1.026,1.022,1.076,1.091
3,0.994,1.006,0.991,1.000,0.998,1.010,0.642,1,0,0,6,1,2020,0,6,1,3,0.998,0.998,0.998,0.998,0.016,0.016,0.016,0.016,0.998,0.998,0.998,0.998,1.510,1.510,1.510,1.510,1.509,1.509,1.509,1.509,1.247,1.247,1.247,1.247,1.021,1.044,1.085,1.079
4,0.997,1.005,0.994,0.997,1.000,1.005,0.636,1,0,1,7,1,2020,1,7,1,1,0.992,0.992,0.992,0.992,0.015,0.015,0.015,0.015,0.991,0.991,0.991,0.991,1.039,1.039,1.039,1.039,1.030,1.030,1.030,1.030,1.037,1.037,1.037,1.037,1.020,1.031,1.036,1.082
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1027,0.991,1.010,0.982,1.009,0.995,1.144,0.803,1,0,0,29,1,2024,0,29,1,4,1.015,1.023,1.061,1.052,0.027,0.026,0.026,0.023,1.014,1.024,1.063,1.053,0.510,0.417,0.391,0.344,0.517,0.427,0.416,0.365,0.823,0.731,0.651,0.540,0.978,,,
1028,1.014,1.023,0.996,1.006,1.007,1.151,0.808,0,0,0,30,1,2024,1,30,1,1,1.017,1.026,1.064,1.058,0.028,0.025,0.026,0.023,1.018,1.027,1.066,1.059,1.283,1.052,0.996,0.879,1.290,1.068,1.051,0.928,1.717,1.585,1.473,1.236,0.970,,,
1029,0.989,1.013,0.981,0.997,0.998,1.130,0.793,1,0,0,31,1,2024,2,31,1,1,0.995,1.006,1.042,1.040,0.029,0.025,0.027,0.023,0.995,1.007,1.044,1.041,0.809,0.633,0.597,0.519,0.804,0.636,0.623,0.543,0.654,0.629,0.563,0.473,,,,
1030,1.022,1.033,0.992,0.999,1.008,1.169,0.821,0,0,0,1,2,2024,3,32,1,1,1.024,1.039,1.074,1.076,0.032,0.027,0.028,0.025,1.024,1.040,1.076,1.077,0.647,0.456,0.404,0.347,0.657,0.467,0.429,0.371,0.581,0.488,0.417,0.345,,,,


## Modelling

### Target columns

In [7]:
target_cols = standardized_df.filter(regex = "Target.*").columns.to_list()
target_cols

['Target 3D', 'Target 7D', 'Target 15D', 'Target 30D']

In [8]:
pred_input_df = standardized_df.drop(columns = target_cols)
pred_input_df.shape

(1032, 41)

In [9]:
standardized_df[target_cols].describe()

Unnamed: 0,Target 3D,Target 7D,Target 15D,Target 30D
count,1029.0,1025.0,1017.0,1002.0
mean,0.999603,0.99888,0.997663,0.995811
std,0.057643,0.08587,0.125663,0.180719
min,0.197,0.187,0.169,0.166
25%,0.982,0.973,0.955,0.931
50%,1.001,1.004,1.004,1.019
75%,1.023,1.036,1.061,1.096
max,1.22,1.232,1.222,1.353


### Data processing

In [10]:
def get_training_data(target_col: str):
    print(f"Target: {target_col}")
    X_df = standardized_df[standardized_df[target_col].notnull()].drop(columns = target_cols).copy()
    y = standardized_df[standardized_df[target_col].notnull()][target_col].copy()
    print(f"X.shape: {X_df.shape}")
    print(f"y.shape: {y.shape}")

    return X_df, y

### Grid search parameters

In [11]:
param_dict = {
    "n_estimators": [100, 125, 150],
    "learning_rate": [0.05, 0.1, 0.2],
    "max_depth": [2, 3, 4],
    "max_features": ["log2", "sqrt", 0.25],
    "subsample": [0.75, 1.0]
}

feature_importances = pd.DataFrame(
    index = standardized_df.drop(columns = target_cols).columns.to_list()
)

### Model building

In [12]:
def get_model():
    gb_model = GradientBoostingRegressor(
        loss = "squared_error",
        random_state = cnst.RANDOM_STATE
    )

    grid_cv = RandomizedSearchCV(
        gb_model,
        param_dict,
        n_iter = 12,
        cv = 5,
        n_jobs = -1,
        random_state = cnst.RANDOM_STATE
    )

    return grid_cv

def get_quantile_model(quantile: float):
    gb_model = GradientBoostingRegressor(
        loss = "quantile",
        alpha = quantile,
        random_state = cnst.RANDOM_STATE
    )

    grid_cv = RandomizedSearchCV(
        gb_model,
        param_dict,
        n_iter = 12,
        cv = 5,
        n_jobs = -1,
        random_state = cnst.RANDOM_STATE
    )

    return grid_cv

def print_results(y, preds):
    print(f"Target std: {y.std():.3f}")
    print(f"R2: {r2_score(y, preds):.3f}")
    print(f"MSE: {mean_squared_error(y, preds, squared = False):.3f}")
    print(f"MAE: {mean_absolute_error(y, preds):.3f}")

## Training

### `Target 3D`

#### Forecasting model

In [13]:
target_col = target_cols[0]
X, y = get_training_data(target_col)

Target: Target 3D
X.shape: (1029, 41)
y.shape: (1029,)


In [14]:
model = get_model()
model.fit(X, y)
model.best_params_

{'subsample': 1.0,
 'n_estimators': 100,
 'max_features': 'log2',
 'max_depth': 4,
 'learning_rate': 0.05}

In [15]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_subsample,param_n_estimators,param_max_features,param_max_depth,param_learning_rate,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
2,0.406167,0.01775,0.010526,0.003965,1.0,100,log2,4,0.05,"{'subsample': 1.0, 'n_estimators': 100, 'max_f...",-1.095898,-0.880904,-0.071186,-0.676791,-0.693353,-0.683626,0.341793,1
10,0.342399,0.046016,0.006146,0.00218,0.75,100,sqrt,3,0.05,"{'subsample': 0.75, 'n_estimators': 100, 'max_...",-0.356781,-0.765592,-0.074743,-1.904843,-1.304923,-0.881376,0.6582,2
4,0.685896,0.083248,0.011437,0.00509,1.0,125,sqrt,2,0.1,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",-2.277586,-1.460971,-0.1114,-1.149002,-0.969657,-1.193723,0.702824,3
8,0.70587,0.149374,0.006222,0.001771,1.0,125,sqrt,4,0.05,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",-0.889887,-1.097139,-0.080894,-2.549252,-1.438902,-1.211215,0.804497,4
1,0.461542,0.032244,0.006565,0.00169,0.75,125,sqrt,3,0.1,"{'subsample': 0.75, 'n_estimators': 125, 'max_...",-0.478071,-2.070754,-0.093368,-2.045528,-1.676387,-1.272821,0.826967,5


In [16]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

preds = model.predict(X)
print_results(y, preds)

Target std: 0.058
R2: 0.749
MSE: 0.029
MAE: 0.021


#### Lower quantile model

In [17]:
lb_model = get_quantile_model(QUANTILE_LB)
lb_model.fit(X, y)
lb_model.best_params_

{'subsample': 0.75,
 'n_estimators': 150,
 'max_features': 'sqrt',
 'max_depth': 4,
 'learning_rate': 0.05}

#### Upper quantile model

In [18]:
ub_model = get_quantile_model(QUANTILE_UB)
ub_model.fit(X, y)
ub_model.best_params_

{'subsample': 0.75,
 'n_estimators': 150,
 'max_features': 'sqrt',
 'max_depth': 3,
 'learning_rate': 0.1}

#### Predictions

In [19]:
pred_col_name = f'Pred {target_col}'
lb_col_name, ub_col_name = f'LB {target_col}', f'UB {target_col}'

stock_df[pred_col_name] = (model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[lb_col_name] = (lb_model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[ub_col_name] = (ub_model.predict(pred_input_df) * stock_df['Close']).round(2)

stock_df[['Date', 'Close', target_col, pred_col_name, lb_col_name, ub_col_name]].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 3D,Pred Target 3D,LB Target 3D,UB Target 3D
1019,2024-01-16,530.4,531.65,526.66,508.86,544.16
1020,2024-01-17,527.7,523.2,523.08,512.55,539.88
1021,2024-01-18,524.3,520.6,519.56,509.57,538.27
1022,2024-01-19,531.65,525.5,526.06,513.47,545.75
1023,2024-01-20,523.2,508.25,515.58,506.15,538.62
1024,2024-01-23,520.6,513.15,515.52,505.01,533.59
1025,2024-01-24,525.5,510.15,519.23,508.59,538.01
1026,2024-01-25,508.25,519.55,501.99,494.66,519.72
1027,2024-01-29,513.15,501.9,503.43,495.82,530.42
1028,2024-01-30,510.15,494.85,501.96,491.5,527.86


### `Target 7D`

#### Forecasting model

In [20]:
target_col = target_cols[1]
X, y = get_training_data(target_col)

Target: Target 7D
X.shape: (1025, 41)
y.shape: (1025,)


In [21]:
model = get_model()
model.fit(X, y)
model.best_params_

{'subsample': 1.0,
 'n_estimators': 125,
 'max_features': 'sqrt',
 'max_depth': 4,
 'learning_rate': 0.05}

In [22]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_subsample,param_n_estimators,param_max_features,param_max_depth,param_learning_rate,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
8,0.51092,0.085497,0.005126,0.000536,1.0,125,sqrt,4,0.05,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",-0.688193,-0.982416,-0.117965,-0.514307,-3.813867,-1.22335,1.32513,1
2,0.427113,0.030489,0.008124,0.00259,1.0,100,log2,4,0.05,"{'subsample': 1.0, 'n_estimators': 100, 'max_f...",-0.664249,-1.004281,-0.102769,-1.72443,-4.906371,-1.68042,1.696316,2
11,0.599411,0.0266,0.004746,0.001164,0.75,150,sqrt,4,0.05,"{'subsample': 0.75, 'n_estimators': 150, 'max_...",-1.296255,-1.085192,-0.122345,-1.32434,-4.855001,-1.736627,1.61978,3
4,0.379072,0.034666,0.004968,0.000536,1.0,125,sqrt,2,0.1,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",-1.981665,-1.306665,-0.122198,-3.365211,-1.987115,-1.752571,1.054993,4
10,0.432429,0.027774,0.008367,0.004226,0.75,100,sqrt,3,0.05,"{'subsample': 0.75, 'n_estimators': 100, 'max_...",-0.679068,-1.023088,-0.10049,-2.081058,-6.099005,-1.996542,2.15015,5


In [23]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

preds = model.predict(X)
print_results(y, preds)

Target std: 0.086
R2: 0.865
MSE: 0.032
MAE: 0.024


#### Lower quantile model

In [24]:
lb_model = get_quantile_model(QUANTILE_LB)
lb_model.fit(X, y)
lb_model.best_params_

{'subsample': 1.0,
 'n_estimators': 125,
 'max_features': 'sqrt',
 'max_depth': 4,
 'learning_rate': 0.05}

#### Upper quantile model

In [25]:
ub_model = get_quantile_model(QUANTILE_UB)
ub_model.fit(X, y)
ub_model.best_params_

{'subsample': 1.0,
 'n_estimators': 150,
 'max_features': 'log2',
 'max_depth': 4,
 'learning_rate': 0.1}

#### Predictions

In [26]:
pred_col_name = f'Pred {target_col}'
lb_col_name, ub_col_name = f'LB {target_col}', f'UB {target_col}'

stock_df[pred_col_name] = (model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[lb_col_name] = (lb_model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[ub_col_name] = (ub_model.predict(pred_input_df) * stock_df['Close']).round(2)

stock_df[['Date', 'Close', target_col, pred_col_name, lb_col_name, ub_col_name]].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 7D,Pred Target 7D,LB Target 7D,UB Target 7D
1015,2024-01-10,522.9,531.65,512.52,497.85,536.35
1016,2024-01-11,527.7,523.2,517.01,505.96,540.68
1017,2024-01-12,525.65,520.6,514.82,499.75,542.89
1018,2024-01-15,532.95,525.5,524.51,505.19,553.14
1019,2024-01-16,530.4,508.25,516.25,485.09,550.17
1020,2024-01-17,527.7,513.15,518.65,502.03,544.77
1021,2024-01-18,524.3,510.15,514.9,499.29,541.59
1022,2024-01-19,531.65,519.55,520.17,499.79,555.6
1023,2024-01-20,523.2,501.9,505.29,497.05,542.18
1024,2024-01-23,520.6,494.85,506.38,490.73,538.79


### `Target 15D`

#### Forecasting model

In [27]:
target_col = target_cols[2]
X, y = get_training_data(target_col)

Target: Target 15D
X.shape: (1017, 41)
y.shape: (1017,)


In [28]:
model = get_model()
model.fit(X, y)
model.best_params_

{'subsample': 0.75,
 'n_estimators': 150,
 'max_features': 'sqrt',
 'max_depth': 4,
 'learning_rate': 0.05}

In [29]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_subsample,param_n_estimators,param_max_features,param_max_depth,param_learning_rate,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
11,0.478718,0.028481,0.003772,0.000763,0.75,150,sqrt,4,0.05,"{'subsample': 0.75, 'n_estimators': 150, 'max_...",-1.03886,-1.107709,-0.248284,-0.673239,-3.005512,-1.214721,0.946205,1
2,0.590048,0.067684,0.012193,0.002391,1.0,100,log2,4,0.05,"{'subsample': 1.0, 'n_estimators': 100, 'max_f...",-1.016801,-0.80756,-0.207229,-3.038786,-2.307407,-1.475557,1.039326,2
0,0.987827,0.297071,0.009779,0.002631,0.75,150,log2,4,0.1,"{'subsample': 0.75, 'n_estimators': 150, 'max_...",-0.482919,-1.235471,-0.270876,-3.468768,-2.049888,-1.501584,1.16564,3
8,0.641283,0.022968,0.009337,0.002448,1.0,125,sqrt,4,0.05,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",-0.510084,-0.926635,-0.237166,-1.904339,-3.935117,-1.502668,1.341393,4
4,0.711044,0.279632,0.010597,0.004804,1.0,125,sqrt,2,0.1,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",-2.142986,-1.266432,-0.222914,-2.655502,-1.276389,-1.512844,0.834661,5


In [30]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

preds = model.predict(X)
print_results(y, preds)

Target std: 0.126
R2: 0.934
MSE: 0.032
MAE: 0.025


#### Lower quantile model

In [31]:
lb_model = get_quantile_model(QUANTILE_LB)
lb_model.fit(X, y)
lb_model.best_params_

{'subsample': 1.0,
 'n_estimators': 150,
 'max_features': 0.25,
 'max_depth': 4,
 'learning_rate': 0.1}

#### Upper quantile model

In [32]:
ub_model = get_quantile_model(QUANTILE_UB)
ub_model.fit(X, y)
ub_model.best_params_

{'subsample': 0.75,
 'n_estimators': 150,
 'max_features': 'log2',
 'max_depth': 4,
 'learning_rate': 0.1}

#### Predictions

In [33]:
pred_col_name = f'Pred {target_col}'
lb_col_name, ub_col_name = f'LB {target_col}', f'UB {target_col}'

stock_df[pred_col_name] = (model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[lb_col_name] = (lb_model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[ub_col_name] = (ub_model.predict(pred_input_df) * stock_df['Close']).round(2)

stock_df[['Date', 'Close', target_col, pred_col_name, lb_col_name, ub_col_name]].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 15D,Pred Target 15D,LB Target 15D,UB Target 15D
1007,2023-12-29,565.05,531.65,541.11,531.68,585.93
1008,2024-01-01,561.55,523.2,533.91,521.08,588.48
1009,2024-01-02,558.6,520.6,526.17,520.75,573.86
1010,2024-01-03,554.0,525.5,533.93,522.65,586.31
1011,2024-01-04,561.15,508.25,530.84,512.1,582.0
1012,2024-01-05,555.55,513.15,537.05,514.29,596.02
1013,2024-01-08,534.35,510.15,512.82,506.45,561.81
1014,2024-01-09,524.6,519.55,511.28,500.09,541.88
1015,2024-01-10,522.9,501.9,508.57,495.42,542.15
1016,2024-01-11,527.7,494.85,510.55,494.25,544.45


### `Target 30D`

#### Forecasting model

In [34]:
target_col = target_cols[3]
X, y = get_training_data(target_col)

Target: Target 30D
X.shape: (1002, 41)
y.shape: (1002,)


In [35]:
model = get_model()
model.fit(X, y)
model.best_params_

{'subsample': 1.0,
 'n_estimators': 100,
 'max_features': 'log2',
 'max_depth': 4,
 'learning_rate': 0.05}

In [36]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_subsample,param_n_estimators,param_max_features,param_max_depth,param_learning_rate,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
2,0.416689,0.060586,0.010003,0.008805,1.0,100,log2,4,0.05,"{'subsample': 1.0, 'n_estimators': 100, 'max_f...",-2.003164,-1.862297,-0.695959,-2.639965,-3.525609,-2.145399,0.932827,1
7,0.827244,0.088611,0.012594,0.011789,0.75,125,0.25,3,0.1,"{'subsample': 0.75, 'n_estimators': 125, 'max_...",-2.382123,-2.273997,-0.823396,-1.34894,-5.010461,-2.367783,1.443324,2
10,0.382069,0.031263,0.005262,0.000656,0.75,100,sqrt,3,0.05,"{'subsample': 0.75, 'n_estimators': 100, 'max_...",-2.826931,-1.612098,-0.623383,-3.614736,-3.580484,-2.451526,1.1675,3
9,0.715627,0.038968,0.006497,0.002275,1.0,150,log2,4,0.1,"{'subsample': 1.0, 'n_estimators': 150, 'max_f...",-2.520483,-2.508609,-0.768002,-2.83535,-3.735147,-2.473518,0.962793,4
4,0.643569,0.064674,0.008182,0.006161,1.0,125,sqrt,2,0.1,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",-3.604657,-1.773585,-0.72527,-3.517768,-2.85375,-2.495006,1.100771,5


In [37]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

preds = model.predict(X)
print_results(y, preds)

Target std: 0.181
R2: 0.929
MSE: 0.048
MAE: 0.037


#### Lower quantile model

In [38]:
lb_model = get_quantile_model(QUANTILE_LB)
lb_model.fit(X, y)
lb_model.best_params_

{'subsample': 1.0,
 'n_estimators': 150,
 'max_features': 0.25,
 'max_depth': 4,
 'learning_rate': 0.1}

#### Upper quantile model

In [39]:
ub_model = get_quantile_model(QUANTILE_UB)
ub_model.fit(X, y)
ub_model.best_params_

{'subsample': 0.75,
 'n_estimators': 150,
 'max_features': 'sqrt',
 'max_depth': 3,
 'learning_rate': 0.1}

#### Predictions

In [40]:
pred_col_name = f'Pred {target_col}'
lb_col_name, ub_col_name = f'LB {target_col}', f'UB {target_col}'

stock_df[pred_col_name] = (model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[lb_col_name] = (lb_model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[ub_col_name] = (ub_model.predict(pred_input_df) * stock_df['Close']).round(2)

stock_df[['Date', 'Close', target_col, pred_col_name, lb_col_name, ub_col_name]].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 30D,Pred Target 30D,LB Target 30D,UB Target 30D
992,2023-12-07,561.0,531.65,531.64,509.07,563.8
993,2023-12-08,559.75,523.2,531.36,507.38,562.65
994,2023-12-11,560.55,520.6,532.12,509.19,563.75
995,2023-12-12,558.3,525.5,531.79,505.17,562.94
996,2023-12-13,556.15,508.25,525.77,503.79,563.01
997,2023-12-14,569.05,513.15,530.84,513.95,571.56
998,2023-12-15,565.45,510.15,526.85,508.58,569.45
999,2023-12-18,559.1,519.55,526.55,508.15,568.16
1000,2023-12-19,564.95,501.9,527.59,501.79,572.58
1001,2023-12-20,562.35,494.85,526.09,496.59,566.26


## Feature importances

In [41]:
feature_importances['Mean'] = feature_importances.mean(axis = 1)
(feature_importances.sort_values('Mean', ascending = False) * 100).round(1)

Unnamed: 0,Target 3D,Target 7D,Target 15D,Target 30D,Mean
52W H,2.4,13.3,5.2,17.1,9.5
Range 60MA,3.8,5.1,17.1,9.6,8.9
Range 15MA,6.3,12.9,7.2,5.4,8.0
DayOfYear,4.7,6.9,9.7,8.3,7.4
DaysSinceLastTradingSession,15.4,4.9,2.5,1.4,6.0
Month,1.9,6.6,5.3,8.5,5.6
VWAP 60MA,2.3,5.1,4.7,8.7,5.2
52W L,8.7,3.0,2.9,5.5,5.0
Range 30MA,2.7,7.0,2.9,5.3,4.5
Close 60MA,3.9,2.1,5.6,3.8,3.8


## Forecasts

In [42]:
stock_df.filter(regex = "(Date)|(Close$)|(Pred.*)").iloc[-10:, :]

Unnamed: 0,Date,Close,Pred Target 3D,Pred Target 7D,Pred Target 15D,Pred Target 30D
1022,2024-01-19,531.65,526.06,520.17,509.33,494.7
1023,2024-01-20,523.2,515.58,505.29,475.5,509.0
1024,2024-01-23,520.6,515.52,506.38,506.67,489.12
1025,2024-01-24,525.5,519.23,514.91,511.18,473.51
1026,2024-01-25,508.25,501.99,490.66,490.22,444.64
1027,2024-01-29,513.15,503.43,495.75,498.96,470.04
1028,2024-01-30,510.15,501.96,489.43,480.86,473.82
1029,2024-01-31,519.55,513.0,501.79,502.1,477.99
1030,2024-02-01,501.9,495.01,486.55,479.58,443.76
1031,2024-02-02,494.85,488.16,487.7,471.19,437.04


In [43]:
latest_preds = stock_df.iloc[-1]
print(f"Date: {latest_preds['Date'].date()}")
print(f"Close: {latest_preds['Close']}")

for target_col in target_cols:
    print(f"{target_col}: {latest_preds[f'Pred {target_col}']}", end = " ")
    print(f"({latest_preds[f'LB {target_col}']} to {latest_preds[f'UB {target_col}']})")

Date: 2024-02-02
Close: 494.85
Target 3D: 488.16 (476.01 to 506.77)
Target 7D: 487.7 (463.63 to 509.16)
Target 15D: 471.19 (435.53 to 515.82)
Target 30D: 437.04 (433.67 to 516.16)
