# 05 Quantile forecasting using Gradient Boosted Trees

In [1]:
import sys
sys.executable

'/usr/local/bin/python'

## Imports

In [2]:
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

import constants as cnst
import stock_utils as su

pd.set_option('display.max_columns', None)

NSE_DATA_DIR = PosixPath('../data/NSE') | Valid: True
PROCESSED_DATA_DIR = PosixPath('../data/processed') | Valid: True


## Constants

In [3]:
QUANTILE_LB, QUANTILE_UB = 0.1, 0.9

stock_symbols = su.get_all_stock_symbols(
    cnst.NSE_DATA_DIR
)

stock_symbols

['DEEPAKFERT', 'HDFCBANK', 'INDIGOPNTS', 'ITBEES', 'JUBLFOOD']

In [4]:
STOCK_SYMBOL = stock_symbols[2]
STOCK_SYMBOL

'INDIGOPNTS'

## Data loading

### Stock data

In [5]:
stock_df = pd.read_parquet(
    cnst.PROCESSED_DATA_DIR.joinpath(f'{STOCK_SYMBOL}-processed.parquet')
)

stock_df

Unnamed: 0,Date,Open,High,Low,LTP,Close,VWAP,52W H,52W L,Volume,Value,#Trades,Range,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter,DaysSinceLastTradingSession,Close 7MA,Close 15MA,Close 30MA,Close 60MA,Range 7MA,Range 15MA,Range 30MA,Range 60MA,VWAP 7MA,VWAP 15MA,VWAP 30MA,VWAP 60MA,Volume 7MA,Volume 15MA,Volume 30MA,Volume 60MA,Value 7MA,Value 15MA,Value 30MA,Value 60MA,#Trades 7MA,#Trades 15MA,#Trades 30MA,#Trades 60MA,Target 3D,Target 7D,Target 15D,Target 30D
0,2021-02-02,2607.50,3129.00,2436.05,3129.00,3117.15,2684.35,3129.00,2436.05,12652036,3.396245e+10,812580,692.95,1,1,1,2,2,2021,1,33,1,1,3117.15,3117.15,3117.15,3117.15,692.95,692.95,692.95,692.95,2684.35,2684.35,2684.35,2684.35,12652036,12652036,12652036,12652036,33962446981,33962446981,33962446981,33962446981,812580,812580,812580,812580,2630.05,2649.40,2510.95,2367.80
1,2021-02-03,3239.00,3329.95,2831.25,2953.45,2924.25,3085.39,3329.95,2436.05,2955005,9.117329e+09,375333,498.70,0,0,1,3,2,2021,2,34,1,1,3020.70,3020.70,3020.70,3020.70,595.82,595.82,595.82,595.82,2884.87,2884.87,2884.87,2884.87,7803520,7803520,7803520,7803520,21539888009,21539888009,21539888009,21539888009,593956,593956,593956,593956,2677.05,2610.75,2487.95,2305.80
2,2021-02-04,2948.85,2967.95,2853.00,2866.00,2873.75,2902.67,3329.95,2436.05,476054,1.381829e+09,73464,114.95,0,0,0,4,2,2021,3,35,1,1,2971.72,2971.72,2971.72,2971.72,435.53,435.53,435.53,435.53,2890.80,2890.80,2890.80,2890.80,5361031,5361031,5361031,5361031,14820534938,14820534938,14820534938,14820534938,420459,420459,420459,420459,2704.85,2595.65,2499.70,2285.45
3,2021-02-05,2866.00,2900.00,2603.00,2621.00,2630.05,2694.77,3329.95,2436.05,587315,1.582680e+09,103675,297.00,0,0,0,5,2,2021,4,36,1,1,2886.30,2886.30,2886.30,2886.30,400.90,400.90,400.90,400.90,2841.80,2841.80,2841.80,2841.80,4167602,4167602,4167602,4167602,11511071310,11511071310,11511071310,11511071310,341263,341263,341263,341263,2666.25,2564.55,2522.20,2378.50
4,2021-02-08,2590.00,2736.00,2551.00,2700.00,2677.05,2666.44,3329.95,2436.05,507665,1.353659e+09,55033,185.00,1,0,0,8,2,2021,0,39,1,3,2844.45,2844.45,2844.45,2844.45,357.72,357.72,357.72,357.72,2806.72,2806.72,2806.72,2806.72,3435615,3435615,3435615,3435615,9479588872,9479588872,9479588872,9479588872,284017,284017,284017,284017,2649.40,2642.50,2506.15,2367.25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
755,2024-02-19,1517.00,1517.00,1495.10,1512.85,1511.80,1507.80,1679.95,981.95,59787,9.014660e+07,7776,21.90,0,0,0,19,2,2024,0,50,1,3,1473.09,1449.51,1460.62,1472.00,55.49,47.82,45.23,36.15,1482.80,1455.74,1463.98,1474.14,229740,141834,134431,98886,338786942,207849552,197851208,145956748,24460,16612,13663,10986,1451.05,,,
756,2024-02-20,1521.00,1521.00,1492.25,1503.00,1503.50,1503.27,1679.95,981.95,48951,7.358659e+07,6424,28.75,0,0,0,20,2,2024,1,51,1,1,1484.54,1453.20,1461.24,1472.52,43.88,46.74,45.16,36.13,1490.49,1459.14,1464.43,1474.61,170199,142950,134887,98819,252862455,209635951,198552770,145880554,18176,16698,13675,10962,1443.50,,,
757,2024-02-21,1505.00,1515.95,1475.50,1490.10,1492.05,1496.00,1679.95,981.95,66167,9.898576e+07,9274,40.45,0,0,0,21,2,2024,2,52,1,1,1490.64,1456.44,1461.30,1472.68,40.30,47.90,45.71,36.48,1493.76,1462.89,1464.66,1474.92,104563,144848,135534,99479,156387430,212617675,199532276,146875939,12797,16863,13722,11043,,,,
758,2024-02-22,1487.00,1488.90,1446.05,1451.00,1451.05,1460.34,1679.95,981.95,98408,1.437093e+08,14285,42.85,0,0,0,22,2,2024,3,53,1,1,1487.30,1458.40,1460.31,1471.87,39.04,47.95,46.40,36.79,1491.52,1464.93,1464.15,1474.39,94564,148361,137455,100396,141408559,217840828,202316250,148192613,11945,17202,13916,11152,,,,


### Standardized data

In [6]:
standardized_df = pd.read_parquet(
    cnst.PROCESSED_DATA_DIR.joinpath(f'{STOCK_SYMBOL}-standardized.parquet')
)

standardized_df

Unnamed: 0,Open,High,Low,LTP,VWAP,52W H,52W L,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter,DaysSinceLastTradingSession,Close 7MA,Close 15MA,Close 30MA,Close 60MA,Range 7MA,Range 15MA,Range 30MA,Range 60MA,VWAP 7MA,VWAP 15MA,VWAP 30MA,VWAP 60MA,Volume 7MA,Volume 15MA,Volume 30MA,Volume 60MA,Value 7MA,Value 15MA,Value 30MA,Value 60MA,#Trades 7MA,#Trades 15MA,#Trades 30MA,#Trades 60MA,Target 3D,Target 7D,Target 15D,Target 30D
0,0.837,1.004,0.781,1.004,0.861,1.004,0.781,1,1,1,2,2,2021,1,33,1,1,1.000,1.000,1.000,1.000,0.222,0.222,0.222,0.222,0.861,0.861,0.861,0.861,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,0.844,0.850,0.806,0.760
1,1.108,1.139,0.968,1.010,1.055,1.139,0.833,0,0,1,3,2,2021,2,34,1,1,1.033,1.033,1.033,1.033,0.204,0.204,0.204,0.204,0.987,0.987,0.987,0.987,2.641,2.641,2.641,2.641,2.363,2.363,2.363,2.363,1.582,1.582,1.582,1.582,0.915,0.893,0.851,0.789
2,1.026,1.033,0.993,0.997,1.010,1.159,0.848,0,0,0,4,2,2021,3,35,1,1,1.034,1.034,1.034,1.034,0.152,0.152,0.152,0.152,1.006,1.006,1.006,1.006,11.261,11.261,11.261,11.261,10.725,10.725,10.725,10.725,5.723,5.723,5.723,5.723,0.941,0.903,0.870,0.795
3,1.090,1.103,0.990,0.997,1.025,1.266,0.926,0,0,0,5,2,2021,4,36,1,1,1.097,1.097,1.097,1.097,0.152,0.152,0.152,0.152,1.081,1.081,1.081,1.081,7.096,7.096,7.096,7.096,7.273,7.273,7.273,7.273,3.292,3.292,3.292,3.292,1.014,0.975,0.959,0.904
4,0.967,1.022,0.953,1.009,0.996,1.244,0.910,1,0,0,8,2,2021,0,39,1,3,1.063,1.063,1.063,1.063,0.134,0.134,0.134,0.134,1.048,1.048,1.048,1.048,6.767,6.767,6.767,6.767,7.003,7.003,7.003,7.003,5.161,5.161,5.161,5.161,0.990,0.987,0.936,0.884
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
755,1.003,1.003,0.989,1.001,0.997,1.111,0.650,0,0,0,19,2,2024,0,50,1,3,0.974,0.959,0.966,0.974,0.037,0.032,0.030,0.024,0.981,0.963,0.968,0.975,3.843,2.372,2.248,1.654,3.758,2.306,2.195,1.619,3.146,2.136,1.757,1.413,0.960,,,
756,1.012,1.012,0.993,1.000,1.000,1.117,0.653,0,0,0,20,2,2024,1,51,1,1,0.987,0.967,0.972,0.979,0.029,0.031,0.030,0.024,0.991,0.970,0.974,0.981,3.477,2.920,2.756,2.019,3.436,2.849,2.698,1.982,2.829,2.599,2.129,1.706,0.960,,,
757,1.009,1.016,0.989,0.999,1.003,1.126,0.658,0,0,0,21,2,2024,2,52,1,1,0.999,0.976,0.979,0.987,0.027,0.032,0.031,0.024,1.001,0.980,0.982,0.989,1.580,2.189,2.048,1.503,1.580,2.148,2.016,1.484,1.380,1.818,1.480,1.191,,,,
758,1.025,1.026,0.997,1.000,1.006,1.158,0.677,0,0,0,22,2,2024,3,53,1,1,1.025,1.005,1.006,1.014,0.027,0.033,0.032,0.025,1.028,1.010,1.009,1.016,0.961,1.508,1.397,1.020,0.984,1.516,1.408,1.031,0.836,1.204,0.974,0.781,,,,


## Modelling

### Target columns

In [7]:
target_cols = standardized_df.filter(regex = "Target.*").columns.to_list()
target_cols

['Target 3D', 'Target 7D', 'Target 15D', 'Target 30D']

In [8]:
pred_input_df = standardized_df.drop(columns = target_cols)
pred_input_df.shape

(760, 41)

In [9]:
standardized_df[target_cols].describe()

Unnamed: 0,Target 3D,Target 7D,Target 15D,Target 30D
count,757.0,753.0,745.0,730.0
mean,0.997703,0.995247,0.990046,0.981677
std,0.032368,0.046782,0.06778,0.10407
min,0.844,0.831,0.786,0.741
25%,0.981,0.967,0.949,0.913
50%,0.996,0.99,0.982,0.973
75%,1.012,1.018,1.026,1.03
max,1.13,1.206,1.246,1.399


### Data processing

In [10]:
def get_training_data(target_col: str):
    print(f"Target: {target_col}")
    X_df = standardized_df[standardized_df[target_col].notnull()].drop(columns = target_cols).copy()
    y = standardized_df[standardized_df[target_col].notnull()][target_col].copy()
    print(f"X.shape: {X_df.shape}")
    print(f"y.shape: {y.shape}")

    return X_df, y

### Grid search parameters

In [11]:
param_dict = {
    "n_estimators": [100, 125, 150],
    "learning_rate": [0.05, 0.1, 0.2],
    "max_depth": [2, 3, 4],
    "max_features": ["log2", "sqrt", 0.25],
    "subsample": [0.75, 1.0]
}

feature_importances = pd.DataFrame(
    index = standardized_df.drop(columns = target_cols).columns.to_list()
)

### Model building

In [12]:
def get_model():
    gb_model = GradientBoostingRegressor(
        loss = "squared_error",
        random_state = cnst.RANDOM_STATE
    )

    grid_cv = RandomizedSearchCV(
        gb_model,
        param_dict,
        n_iter = 12,
        cv = 5,
        n_jobs = -1,
        random_state = cnst.RANDOM_STATE
    )

    return grid_cv

def get_quantile_model(quantile: float):
    gb_model = GradientBoostingRegressor(
        loss = "quantile",
        alpha = quantile,
        random_state = cnst.RANDOM_STATE
    )

    grid_cv = RandomizedSearchCV(
        gb_model,
        param_dict,
        n_iter = 12,
        cv = 5,
        n_jobs = -1,
        random_state = cnst.RANDOM_STATE
    )

    return grid_cv

def print_results(y, preds):
    print(f"Target std: {y.std():.3f}")
    print(f"R2: {r2_score(y, preds):.3f}")
    print(f"MSE: {mean_squared_error(y, preds, squared = False):.3f}")
    print(f"MAE: {mean_absolute_error(y, preds):.3f}")

## Training

### `Target 3D`

#### Forecasting model

In [13]:
target_col = target_cols[0]
X, y = get_training_data(target_col)

Target: Target 3D
X.shape: (757, 41)
y.shape: (757,)


In [14]:
model = get_model()
model.fit(X, y)
model.best_params_

{'subsample': 0.75,
 'n_estimators': 100,
 'max_features': 'sqrt',
 'max_depth': 3,
 'learning_rate': 0.05}

In [15]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_subsample,param_n_estimators,param_max_features,param_max_depth,param_learning_rate,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
10,0.255051,0.021683,0.005257,0.001423,0.75,100,sqrt,3,0.05,"{'subsample': 0.75, 'n_estimators': 100, 'max_...",0.08216,-0.008352,-0.012773,-0.23665,-0.536088,-0.142341,0.223191,1
2,0.285559,0.024556,0.007287,0.003243,1.0,100,log2,4,0.05,"{'subsample': 1.0, 'n_estimators': 100, 'max_f...",-0.044,-0.025695,0.033709,-0.166105,-0.759279,-0.192274,0.290854,2
7,0.420454,0.060632,0.005493,0.00253,0.75,125,0.25,3,0.1,"{'subsample': 0.75, 'n_estimators': 125, 'max_...",-0.087189,-0.161947,-0.053202,-0.234973,-0.634661,-0.234394,0.209744,3
8,0.418287,0.057876,0.005898,0.003039,1.0,125,sqrt,4,0.05,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",-0.050867,-0.105959,0.028201,-0.187713,-0.941417,-0.251551,0.352058,4
11,0.325731,0.036279,0.003032,0.000482,0.75,150,sqrt,4,0.05,"{'subsample': 0.75, 'n_estimators': 150, 'max_...",-0.032211,0.003611,-0.022257,-0.17933,-1.203184,-0.286674,0.462698,5


In [16]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

preds = model.predict(X)
print_results(y, preds)

Target std: 0.032
R2: 0.484
MSE: 0.023
MAE: 0.017


#### Lower quantile model

In [17]:
lb_model = get_quantile_model(QUANTILE_LB)
lb_model.fit(X, y)
lb_model.best_params_

{'subsample': 1.0,
 'n_estimators': 150,
 'max_features': 'log2',
 'max_depth': 4,
 'learning_rate': 0.1}

#### Upper quantile model

In [18]:
ub_model = get_quantile_model(QUANTILE_UB)
ub_model.fit(X, y)
ub_model.best_params_

{'subsample': 0.75,
 'n_estimators': 150,
 'max_features': 'log2',
 'max_depth': 4,
 'learning_rate': 0.1}

#### Predictions

In [19]:
pred_col_name = f'Pred {target_col}'
lb_col_name, ub_col_name = f'LB {target_col}', f'UB {target_col}'

stock_df[pred_col_name] = (model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[lb_col_name] = (lb_model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[ub_col_name] = (ub_model.predict(pred_input_df) * stock_df['Close']).round(2)

stock_df[['Date', 'Close', target_col, pred_col_name, lb_col_name, ub_col_name]].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 3D,Pred Target 3D,LB Target 3D,UB Target 3D
747,2024-02-07,1436.45,1449.4,1428.96,1397.81,1461.8
748,2024-02-08,1442.3,1474.4,1433.19,1408.87,1476.15
749,2024-02-09,1423.3,1458.4,1421.1,1388.0,1459.36
750,2024-02-12,1449.4,1499.6,1442.28,1407.65,1498.56
751,2024-02-13,1474.4,1494.7,1462.92,1432.47,1496.21
752,2024-02-14,1458.4,1511.8,1455.47,1402.1,1508.15
753,2024-02-15,1499.6,1503.5,1481.85,1459.0,1511.36
754,2024-02-16,1494.7,1492.05,1485.88,1461.76,1516.24
755,2024-02-19,1511.8,1451.05,1496.09,1436.45,1531.29
756,2024-02-20,1503.5,1443.5,1483.02,1444.57,1524.53


### `Target 7D`

#### Forecasting model

In [20]:
target_col = target_cols[1]
X, y = get_training_data(target_col)

Target: Target 7D
X.shape: (753, 41)
y.shape: (753,)


In [21]:
model = get_model()
model.fit(X, y)
model.best_params_

{'subsample': 1.0,
 'n_estimators': 100,
 'max_features': 'log2',
 'max_depth': 4,
 'learning_rate': 0.05}

In [22]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_subsample,param_n_estimators,param_max_features,param_max_depth,param_learning_rate,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
2,0.377934,0.085535,0.004932,0.000775,1.0,100,log2,4,0.05,"{'subsample': 1.0, 'n_estimators': 100, 'max_f...",-0.19101,-0.108745,0.044019,-0.410302,-1.630309,-0.459269,0.603632,1
10,0.312264,0.065592,0.004834,0.00079,0.75,100,sqrt,3,0.05,"{'subsample': 0.75, 'n_estimators': 100, 'max_...",-0.665272,-0.117592,-0.019887,-0.34478,-1.186562,-0.466819,0.422851,2
11,0.349964,0.018829,0.004258,0.001451,0.75,150,sqrt,4,0.05,"{'subsample': 0.75, 'n_estimators': 150, 'max_...",-0.400878,-0.22713,0.061148,-0.380515,-1.68926,-0.527327,0.603995,3
8,0.551532,0.031294,0.007417,0.00439,1.0,125,sqrt,4,0.05,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",-0.452472,-0.087588,0.004119,-0.365786,-1.781885,-0.536722,0.645132,4
5,1.012729,0.182063,0.009211,0.004126,1.0,150,0.25,4,0.1,"{'subsample': 1.0, 'n_estimators': 150, 'max_f...",-0.263729,-0.155332,0.0533,-0.299233,-2.11258,-0.555515,0.788172,5


In [23]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

preds = model.predict(X)
print_results(y, preds)

Target std: 0.047
R2: 0.768
MSE: 0.023
MAE: 0.017


#### Lower quantile model

In [24]:
lb_model = get_quantile_model(QUANTILE_LB)
lb_model.fit(X, y)
lb_model.best_params_

{'subsample': 0.75,
 'n_estimators': 150,
 'max_features': 'log2',
 'max_depth': 4,
 'learning_rate': 0.1}

#### Upper quantile model

In [25]:
ub_model = get_quantile_model(QUANTILE_UB)
ub_model.fit(X, y)
ub_model.best_params_

{'subsample': 0.75,
 'n_estimators': 125,
 'max_features': 0.25,
 'max_depth': 3,
 'learning_rate': 0.1}

#### Predictions

In [26]:
pred_col_name = f'Pred {target_col}'
lb_col_name, ub_col_name = f'LB {target_col}', f'UB {target_col}'

stock_df[pred_col_name] = (model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[lb_col_name] = (lb_model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[ub_col_name] = (ub_model.predict(pred_input_df) * stock_df['Close']).round(2)

stock_df[['Date', 'Close', target_col, pred_col_name, lb_col_name, ub_col_name]].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 7D,Pred Target 7D,LB Target 7D,UB Target 7D
743,2024-02-01,1421.6,1449.4,1431.21,1368.85,1470.38
744,2024-02-02,1418.9,1474.4,1433.82,1363.44,1474.27
745,2024-02-05,1413.15,1458.4,1427.72,1356.09,1457.74
746,2024-02-06,1407.05,1499.6,1429.02,1355.28,1464.37
747,2024-02-07,1436.45,1494.7,1445.02,1376.1,1496.37
748,2024-02-08,1442.3,1511.8,1466.55,1386.11,1509.83
749,2024-02-09,1423.3,1503.5,1449.23,1361.19,1503.02
750,2024-02-12,1449.4,1492.05,1463.86,1389.66,1511.96
751,2024-02-13,1474.4,1451.05,1454.39,1425.64,1522.02
752,2024-02-14,1458.4,1443.5,1452.0,1414.78,1503.29


### `Target 15D`

#### Forecasting model

In [27]:
target_col = target_cols[2]
X, y = get_training_data(target_col)

Target: Target 15D
X.shape: (745, 41)
y.shape: (745,)


In [28]:
model = get_model()
model.fit(X, y)
model.best_params_

{'subsample': 1.0,
 'n_estimators': 125,
 'max_features': 'sqrt',
 'max_depth': 4,
 'learning_rate': 0.05}

In [29]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_subsample,param_n_estimators,param_max_features,param_max_depth,param_learning_rate,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
8,0.525901,0.059564,0.007726,0.004403,1.0,125,sqrt,4,0.05,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",0.159213,-0.25391,-0.090033,0.154952,-0.670156,-0.139987,0.307592,1
5,0.850451,0.105637,0.005701,0.000977,1.0,150,0.25,4,0.1,"{'subsample': 1.0, 'n_estimators': 150, 'max_f...",0.123365,-0.06708,0.035857,0.222778,-1.205368,-0.17809,0.522488,2
10,0.357281,0.015589,0.005492,0.00206,0.75,100,sqrt,3,0.05,"{'subsample': 0.75, 'n_estimators': 100, 'max_...",0.000686,-0.203987,-0.133874,0.117611,-0.769361,-0.197785,0.306492,3
2,0.366483,0.043889,0.007447,0.003124,1.0,100,log2,4,0.05,"{'subsample': 1.0, 'n_estimators': 100, 'max_f...",-0.004027,-0.338363,-0.071735,0.126918,-0.707635,-0.198968,0.296164,4
1,0.372994,0.078284,0.010785,0.005358,0.75,125,sqrt,3,0.1,"{'subsample': 0.75, 'n_estimators': 125, 'max_...",0.017972,-0.257759,-0.216992,0.179994,-0.879161,-0.231189,0.361089,5


In [30]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

preds = model.predict(X)
print_results(y, preds)

Target std: 0.068
R2: 0.919
MSE: 0.019
MAE: 0.014


#### Lower quantile model

In [31]:
lb_model = get_quantile_model(QUANTILE_LB)
lb_model.fit(X, y)
lb_model.best_params_

{'subsample': 0.75,
 'n_estimators': 150,
 'max_features': 'sqrt',
 'max_depth': 3,
 'learning_rate': 0.1}

#### Upper quantile model

In [32]:
ub_model = get_quantile_model(QUANTILE_UB)
ub_model.fit(X, y)
ub_model.best_params_

{'subsample': 1.0,
 'n_estimators': 150,
 'max_features': 'log2',
 'max_depth': 4,
 'learning_rate': 0.1}

#### Predictions

In [33]:
pred_col_name = f'Pred {target_col}'
lb_col_name, ub_col_name = f'LB {target_col}', f'UB {target_col}'

stock_df[pred_col_name] = (model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[lb_col_name] = (lb_model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[ub_col_name] = (ub_model.predict(pred_input_df) * stock_df['Close']).round(2)

stock_df[['Date', 'Close', target_col, pred_col_name, lb_col_name, ub_col_name]].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 15D,Pred Target 15D,LB Target 15D,UB Target 15D
735,2024-01-19,1469.25,1449.4,1434.56,1390.3,1448.39
736,2024-01-20,1460.5,1474.4,1472.26,1360.0,1474.99
737,2024-01-23,1416.0,1458.4,1438.19,1354.52,1459.08
738,2024-01-24,1431.1,1499.6,1475.32,1333.74,1500.01
739,2024-01-25,1450.0,1494.7,1493.98,1368.31,1495.61
740,2024-01-29,1465.4,1511.8,1482.41,1361.86,1511.59
741,2024-01-30,1448.15,1503.5,1498.07,1370.25,1503.59
742,2024-01-31,1443.45,1492.05,1481.32,1351.16,1491.28
743,2024-02-01,1421.6,1451.05,1432.36,1322.91,1451.36
744,2024-02-02,1418.9,1443.5,1420.34,1319.67,1442.13


### `Target 30D`

#### Forecasting model

In [34]:
target_col = target_cols[3]
X, y = get_training_data(target_col)

Target: Target 30D
X.shape: (730, 41)
y.shape: (730,)


In [35]:
model = get_model()
model.fit(X, y)
model.best_params_

{'subsample': 1.0,
 'n_estimators': 150,
 'max_features': 0.25,
 'max_depth': 4,
 'learning_rate': 0.1}

In [36]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_subsample,param_n_estimators,param_max_features,param_max_depth,param_learning_rate,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
5,0.832713,0.082826,0.005426,0.00124,1.0,150,0.25,4,0.1,"{'subsample': 1.0, 'n_estimators': 150, 'max_f...",0.074329,-0.195863,0.027741,0.21501,-0.952308,-0.166218,0.41464,1
7,0.435884,0.041254,0.005051,0.00114,0.75,125,0.25,3,0.1,"{'subsample': 0.75, 'n_estimators': 125, 'max_...",-0.281127,-0.394108,-0.019161,0.201345,-0.762272,-0.251065,0.328984,2
8,0.43731,0.04562,0.005015,0.000947,1.0,125,sqrt,4,0.05,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",-0.224227,-0.731985,-0.044094,0.170984,-0.463301,-0.258524,0.31551,3
4,0.427577,0.104461,0.009596,0.006469,1.0,125,sqrt,2,0.1,"{'subsample': 1.0, 'n_estimators': 125, 'max_f...",-0.395364,-0.819026,-0.036219,0.254783,-0.970567,-0.393279,0.460869,4
2,0.515787,0.153152,0.008033,0.001898,1.0,100,log2,4,0.05,"{'subsample': 1.0, 'n_estimators': 100, 'max_f...",0.066529,-1.148827,-0.354012,0.145723,-0.857143,-0.429546,0.506544,5


In [37]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

preds = model.predict(X)
print_results(y, preds)

Target std: 0.104
R2: 0.994
MSE: 0.008
MAE: 0.006


#### Lower quantile model

In [38]:
lb_model = get_quantile_model(QUANTILE_LB)
lb_model.fit(X, y)
lb_model.best_params_

{'subsample': 0.75,
 'n_estimators': 150,
 'max_features': 'log2',
 'max_depth': 4,
 'learning_rate': 0.1}

#### Upper quantile model

In [39]:
ub_model = get_quantile_model(QUANTILE_UB)
ub_model.fit(X, y)
ub_model.best_params_

{'subsample': 1.0,
 'n_estimators': 150,
 'max_features': 0.25,
 'max_depth': 4,
 'learning_rate': 0.1}

#### Predictions

In [40]:
pred_col_name = f'Pred {target_col}'
lb_col_name, ub_col_name = f'LB {target_col}', f'UB {target_col}'

stock_df[pred_col_name] = (model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[lb_col_name] = (lb_model.predict(pred_input_df) * stock_df['Close']).round(2)
stock_df[ub_col_name] = (ub_model.predict(pred_input_df) * stock_df['Close']).round(2)

stock_df[['Date', 'Close', target_col, pred_col_name, lb_col_name, ub_col_name]].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 30D,Pred Target 30D,LB Target 30D,UB Target 30D
720,2023-12-29,1490.5,1449.4,1442.02,1433.16,1484.36
721,2024-01-01,1493.2,1474.4,1477.29,1342.06,1490.26
722,2024-01-02,1482.15,1458.4,1467.75,1339.52,1457.19
723,2024-01-03,1490.35,1499.6,1492.77,1338.47,1498.37
724,2024-01-04,1494.8,1494.7,1494.35,1384.46,1495.73
725,2024-01-05,1498.75,1511.8,1504.81,1377.54,1515.92
726,2024-01-08,1484.9,1503.5,1499.78,1366.31,1503.54
727,2024-01-09,1490.1,1492.05,1493.06,1368.81,1493.76
728,2024-01-10,1480.65,1451.05,1454.62,1351.3,1451.39
729,2024-01-11,1471.5,1443.5,1440.88,1316.34,1443.92


## Feature importances

In [41]:
feature_importances['Mean'] = feature_importances.mean(axis = 1)
(feature_importances.sort_values('Mean', ascending = False) * 100).round(1)

Unnamed: 0,Target 3D,Target 7D,Target 15D,Target 30D,Mean
DayOfYear,7.5,11.3,14.7,28.9,15.6
52W H,6.3,11.2,10.8,4.9,8.3
Range 60MA,4.3,5.1,9.5,12.2,7.8
Year,1.4,3.6,7.8,14.6,6.8
Month,2.4,6.2,10.5,7.5,6.7
52W L,3.0,4.9,4.0,7.3,4.8
Close 60MA,5.8,5.9,3.4,3.7,4.7
Range 15MA,6.3,3.1,4.4,2.9,4.2
Range 30MA,1.7,4.0,4.9,4.7,3.8
Range 7MA,6.9,3.2,1.9,0.7,3.2


## Forecasts

In [42]:
stock_df.filter(regex = "(Date)|(Close$)|(Pred.*)").iloc[-10:, :]

Unnamed: 0,Date,Close,Pred Target 3D,Pred Target 7D,Pred Target 15D,Pred Target 30D
750,2024-02-12,1449.4,1442.28,1463.86,1430.25,1379.54
751,2024-02-13,1474.4,1462.92,1454.39,1425.81,1425.76
752,2024-02-14,1458.4,1455.47,1452.0,1469.26,1396.13
753,2024-02-15,1499.6,1481.85,1480.47,1434.88,1470.97
754,2024-02-16,1494.7,1485.88,1499.53,1454.33,1474.09
755,2024-02-19,1511.8,1496.09,1522.76,1500.79,1555.96
756,2024-02-20,1503.5,1483.02,1508.45,1490.09,1566.93
757,2024-02-21,1492.05,1484.98,1495.9,1454.78,1520.61
758,2024-02-22,1451.05,1450.16,1463.1,1447.84,1501.81
759,2024-02-23,1443.5,1440.02,1446.83,1453.44,1510.61


In [43]:
latest_preds = stock_df.iloc[-1]
print(f"Date: {latest_preds['Date'].date()}")
print(f"Close: {latest_preds['Close']}")

for target_col in target_cols:
    print(f"{target_col}: {latest_preds[f'Pred {target_col}']}", end = " ")
    print(f"({latest_preds[f'LB {target_col}']} to {latest_preds[f'UB {target_col}']})")

Date: 2024-02-23
Close: 1443.5
Target 3D: 1440.02 (1418.42 to 1494.23)
Target 7D: 1446.83 (1415.02 to 1497.05)
Target 15D: 1453.44 (1363.96 to 1478.05)
Target 30D: 1510.61 (1335.18 to 1455.9)
