# 04 - Forecasting using Random Forests

In [1]:
import sys
sys.executable

'/usr/local/bin/python'

## Imports

In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

import constants as cnst
import stock_utils as su

pd.set_option('display.max_columns', None)

NSE_DATA_DIR = PosixPath('../data/NSE') | Valid: True
PROCESSED_DATA_DIR = PosixPath('../data/processed') | Valid: True


## Constants

In [3]:
stock_symbols = su.get_all_stock_symbols(
    cnst.NSE_DATA_DIR
)

stock_symbols

['HDFCBANK', 'ITBEES']

In [4]:
STOCK_SYMBOL = stock_symbols[0]
STOCK_SYMBOL

'HDFCBANK'

## Data loading

### Stock data

In [5]:
stock_df = pd.read_parquet(
    cnst.PROCESSED_DATA_DIR.joinpath(f'{STOCK_SYMBOL}-processed.parquet')
)

stock_df

Unnamed: 0,Date,Open,High,Low,LTP,Close,VWAP,52W H,52W L,Volume,Value,#Trades,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter,DaysSinceLastTradingSession,Close 3MA,Close 7MA,Close 15MA,Close 30MA,VWAP 3MA,VWAP 7MA,VWAP 15MA,VWAP 30MA,Volume 3MA,Volume 7MA,Volume 15MA,Volume 30MA,Value 3MA,Value 7MA,Value 15MA,Value 30MA,#Trades 3MA,#Trades 7MA,#Trades 15MA,#Trades 30MA,Target 3D,Target 7D,Target 15D,Target 30D
0,2020-01-01,1276.10,1280.00,1270.60,1279.00,1278.60,1276.64,2503.3,1084.00,1836849,2.345001e+09,46625,1,0,0,1,1,2020,2,1,1,1,1278.60,1278.60,1278.60,1278.60,1276.64,1276.64,1276.64,1276.64,1836849,1836849,1836849,1836849,2345000988,2345000988,2345000988,2345000988,46625,46625,46625,46625,1240.95,1282.70,1240.85,1240.60
1,2020-01-02,1279.00,1288.00,1279.00,1286.00,1286.75,1284.56,2503.3,1084.00,3068583,3.941792e+09,104570,1,0,0,2,1,2020,3,2,1,1,1282.68,1282.68,1282.68,1282.68,1280.60,1280.60,1280.60,1280.60,2452716,2452716,2452716,2452716,3143396262,3143396262,3143396262,3143396262,75597,75597,75597,75597,1260.60,1286.00,1244.85,1249.00
2,2020-01-03,1282.20,1285.00,1263.60,1268.50,1268.40,1270.48,2503.3,1084.00,5427775,6.895886e+09,157066,0,0,0,3,1,2020,4,3,1,1,1277.92,1277.92,1277.92,1277.92,1277.23,1277.23,1277.23,1277.23,3444402,3444402,3444402,3444402,4394226092,4394226092,4394226092,4394226092,102753,102753,102753,102753,1257.30,1289.50,1244.55,1241.40
3,2020-01-06,1260.00,1261.80,1236.00,1240.25,1240.95,1247.24,2503.3,1084.00,5445093,6.791348e+09,155007,0,0,0,6,1,2020,0,6,1,3,1265.37,1268.68,1268.68,1268.68,1267.43,1269.73,1269.73,1269.73,4647150,3944575,3944575,3944575,5876341707,4993506527,4993506527,4993506527,138881,115817,115817,115817,1271.40,1284.25,1213.20,1219.35
4,2020-01-07,1258.90,1271.45,1252.25,1261.00,1260.60,1261.48,2503.3,1084.00,7362247,9.287302e+09,189026,1,0,0,7,1,2020,1,7,1,1,1256.65,1267.06,1267.06,1267.06,1259.73,1268.08,1268.08,1268.08,6078371,4628109,4628109,4628109,7658178376,5852265530,5852265530,5852265530,167033,130458,130458,130458,1282.70,1287.65,1223.20,1217.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,2024-01-16,1673.00,1683.65,1658.10,1678.00,1679.15,1672.44,1757.5,1460.25,12661250,2.117514e+10,347404,1,0,0,16,1,2024,1,16,1,1,1664.38,1658.86,1678.20,1664.56,1661.38,1660.48,1678.64,1664.72,12236068,10797971,12592121,16089685,20346476315,17939626497,21173698410,26756826044,346951,297593,324871,336519,1470.65,,,
1021,2024-01-17,1570.00,1596.80,1528.40,1542.15,1537.50,1565.65,1757.5,1460.25,85072618,1.331936e+11,2098772,0,0,0,17,1,2024,2,17,1,1,1629.82,1640.87,1667.15,1661.69,1634.90,1645.31,1669.96,1662.71,37298015,21665597,17363326,18054925,59322762592,34817957164,28526480964,29781095464,953645,544785,443365,393241,1478.85,,,
1022,2024-01-18,1494.00,1515.00,1480.05,1490.00,1486.15,1495.03,1757.5,1460.25,80535465,1.204027e+11,1582497,0,0,0,18,1,2024,3,18,1,1,1567.60,1617.39,1652.54,1656.97,1577.71,1621.62,1655.54,1658.30,59423111,31361600,21263141,20095890,91590480296,49013774376,34039123030,32747198969,1342891,732341,520660,434624,,,,
1023,2024-01-19,1505.95,1510.25,1468.40,1474.90,1470.65,1483.88,1757.5,1460.25,54800269,8.131686e+10,1275220,0,0,0,19,1,2024,4,19,1,1,1498.10,1590.92,1636.63,1651.64,1514.85,1597.43,1640.64,1653.48,73469450,38039080,24082779,21581725,111637719963,58727408569,38036670502,34902710598,1652163,889730,588066,469801,,,,


### Standardized data

In [6]:
standardized_df = pd.read_parquet(
    cnst.PROCESSED_DATA_DIR.joinpath(f'{STOCK_SYMBOL}-standardized.parquet')
)

standardized_df

Unnamed: 0,Open,High,Low,LTP,VWAP,52W H,52W L,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter,DaysSinceLastTradingSession,Close 3MA,Close 7MA,Close 15MA,Close 30MA,VWAP 3MA,VWAP 7MA,VWAP 15MA,VWAP 30MA,Volume 3MA,Volume 7MA,Volume 15MA,Volume 30MA,Value 3MA,Value 7MA,Value 15MA,Value 30MA,#Trades 3MA,#Trades 7MA,#Trades 15MA,#Trades 30MA,Target 3D,Target 7D,Target 15D,Target 30D
0,0.998,1.001,0.994,1.000,0.998,1.958,0.848,1,0,0,1,1,2020,2,1,1,1,1.000,1.000,1.000,1.000,0.998,0.998,0.998,0.998,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,0.971,1.003,0.970,0.970
1,0.994,1.001,0.994,0.999,0.998,1.945,0.842,1,0,0,2,1,2020,3,2,1,1,0.997,0.997,0.997,0.997,0.995,0.995,0.995,0.995,0.799,0.799,0.799,0.799,0.797,0.797,0.797,0.797,0.723,0.723,0.723,0.723,0.980,0.999,0.967,0.971
2,1.011,1.013,0.996,1.000,1.002,1.974,0.855,0,0,0,3,1,2020,4,3,1,1,1.008,1.008,1.008,1.008,1.007,1.007,1.007,1.007,0.635,0.635,0.635,0.635,0.637,0.637,0.637,0.637,0.654,0.654,0.654,0.654,0.991,1.017,0.981,0.979
3,1.015,1.017,0.996,0.999,1.005,2.017,0.874,0,0,0,6,1,2020,0,6,1,3,1.020,1.022,1.022,1.022,1.021,1.023,1.023,1.023,0.853,0.724,0.724,0.724,0.865,0.735,0.735,0.735,0.896,0.747,0.747,0.747,1.025,1.035,0.978,0.983
4,0.999,1.009,0.993,1.000,1.001,1.986,0.860,1,0,0,7,1,2020,1,7,1,1,0.997,1.005,1.005,1.005,0.999,1.006,1.006,1.006,0.826,0.629,0.629,0.629,0.825,0.630,0.630,0.630,0.884,0.690,0.690,0.690,1.018,1.021,0.970,0.966
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,0.996,1.003,0.987,0.999,0.996,1.047,0.870,1,0,0,16,1,2024,1,16,1,1,0.991,0.988,0.999,0.991,0.989,0.989,1.000,0.991,0.966,0.853,0.995,1.271,0.961,0.847,1.000,1.264,0.999,0.857,0.935,0.969,0.876,,,
1021,1.021,1.039,0.994,1.003,1.018,1.143,0.950,0,0,0,17,1,2024,2,17,1,1,1.060,1.067,1.084,1.081,1.063,1.070,1.086,1.081,0.438,0.255,0.204,0.212,0.445,0.261,0.214,0.224,0.454,0.260,0.211,0.187,0.962,,,
1022,1.005,1.019,0.996,1.003,1.006,1.183,0.983,0,0,0,18,1,2024,3,18,1,1,1.055,1.088,1.112,1.115,1.062,1.091,1.114,1.116,0.738,0.389,0.264,0.250,0.761,0.407,0.283,0.272,0.849,0.463,0.329,0.275,,,,
1023,1.024,1.027,0.998,1.003,1.009,1.195,0.993,0,0,0,19,1,2024,4,19,1,1,1.019,1.082,1.113,1.123,1.030,1.086,1.116,1.124,1.341,0.694,0.439,0.394,1.373,0.722,0.468,0.429,1.296,0.698,0.461,0.368,,,,


## Modelling

### Target columns

In [7]:
target_cols = standardized_df.filter(regex = "Target.*").columns.to_list()
target_cols

['Target 3D', 'Target 7D', 'Target 15D', 'Target 30D']

In [8]:
standardized_df[target_cols].describe()

Unnamed: 0,Target 3D,Target 7D,Target 15D,Target 30D
count,1022.0,1018.0,1010.0,995.0
mean,1.000904,1.002511,1.005721,1.012089
std,0.031153,0.046105,0.064164,0.086329
min,0.82,0.718,0.668,0.629
25%,0.985,0.978,0.968,0.963
50%,1.0,1.002,1.003,1.009
75%,1.01675,1.026,1.043,1.06
max,1.178,1.219,1.231,1.322


### Data processing

In [9]:
def get_training_data(target_col: str):
    print(f"Target: {target_col}")
    X_df = standardized_df[standardized_df[target_col].notnull()].drop(columns = target_cols).copy()
    y = standardized_df[standardized_df[target_col].notnull()][target_col].copy()
    print(f"X.shape: {X_df.shape}")
    print(f"y.shape: {y.shape}")

    return X_df, y

### Grid search parameters

In [10]:
param_dict = {
    "n_estimators": [75, 100, 125],
    "max_depth": [4, 5, 6],
    "max_features": ["log2", "sqrt", 0.25],
    "max_samples": [0.75, 1.0]
}

feature_importances = pd.DataFrame(
    index = standardized_df.drop(columns = target_cols).columns.to_list()
)

### Model building

In [11]:
def get_model():
    rf_model = RandomForestRegressor(
        criterion = "squared_error",
        n_jobs = -1,
        random_state = cnst.RANDOM_STATE
    )

    grid_cv = RandomizedSearchCV(
        rf_model,
        param_dict,
        n_iter = 10,
        cv = 5, 
        random_state = cnst.RANDOM_STATE
    )

    return grid_cv

def print_results(model, X, y):
    preds = model.predict(X)
    print(f"Target std: {y.std():.3f}")
    print(f"R2: {r2_score(y, preds):.3f}")
    print(f"MSE: {mean_squared_error(y, preds, squared = False):.3f}")
    print(f"MAE: {mean_absolute_error(y, preds):.3f}")

## Training

### `Target 3D`

In [12]:
target_col = target_cols[0]
X, y = get_training_data(target_col)

Target: Target 3D
X.shape: (1022, 37)
y.shape: (1022,)


In [13]:
model = get_model()
model.fit(X, y)
model.best_params_

{'n_estimators': 100,
 'max_samples': 1.0,
 'max_features': 'sqrt',
 'max_depth': 4}

In [14]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,param_max_samples,param_max_features,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
8,0.25622,0.010191,0.048205,0.007833,100,1.0,sqrt,4,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",-0.05012,-0.018,-0.042307,0.00165,0.003465,-0.021062,0.022011,1
9,0.262103,0.006494,0.038111,0.004792,100,0.75,log2,4,"{'n_estimators': 100, 'max_samples': 0.75, 'ma...",-0.049237,-0.0221,-0.042504,0.009265,-0.00204,-0.021323,0.022521,2
3,0.19657,0.007393,0.033852,0.007945,75,1.0,0.25,4,"{'n_estimators': 75, 'max_samples': 1.0, 'max_...",-0.059623,-0.01315,-0.047051,-0.001813,0.004731,-0.023381,0.025432,3
0,0.344619,0.062861,0.044164,0.003954,100,1.0,log2,5,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",-0.059853,-0.016675,-0.052321,0.007496,0.004037,-0.023463,0.027992,4
4,0.198915,0.004395,0.030555,0.001456,75,0.75,log2,5,"{'n_estimators': 75, 'max_samples': 0.75, 'max...",-0.065547,-0.023169,-0.060264,0.003571,0.004484,-0.028185,0.030086,5


In [15]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

print_results(model, X, y)

Target std: 0.031
R2: 0.211
MSE: 0.028
MAE: 0.020


In [16]:
stock_df[f'Pred {target_col}'] = (
    model.predict(standardized_df.drop(columns = target_cols)) * stock_df['Close']
).round(2)
stock_df[['Date', 'Close', target_col, f'Pred {target_col}']].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 3D,Pred Target 3D
1012,2024-01-04,1690.85,1650.5,1686.14
1013,2024-01-05,1682.2,1655.95,1677.52
1014,2024-01-08,1663.45,1649.0,1658.49
1015,2024-01-09,1650.5,1641.2,1646.62
1016,2024-01-10,1655.95,1672.8,1651.56
1017,2024-01-11,1649.0,1679.15,1645.43
1018,2024-01-12,1641.2,1537.5,1632.89
1019,2024-01-15,1672.8,1486.15,1659.23
1020,2024-01-16,1679.15,1470.65,1666.71
1021,2024-01-17,1537.5,1478.85,1518.87


### `Target 7D`

In [17]:
target_col = target_cols[1]
X, y = get_training_data(target_col)

Target: Target 7D
X.shape: (1018, 37)
y.shape: (1018,)


In [18]:
model = get_model()
model.fit(X, y)
model.best_params_

In [None]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,param_max_samples,param_max_features,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
8,0.288481,0.00499,0.042047,0.00109,100,1.0,sqrt,4,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",-0.077273,-0.049769,-0.11693,0.034086,-0.029044,-0.047786,0.050416,1
9,0.291293,0.006469,0.041886,0.000525,100,0.75,log2,4,"{'n_estimators': 100, 'max_samples': 0.75, 'ma...",-0.065703,-0.059577,-0.135089,0.026353,-0.021251,-0.051054,0.053347,2
4,0.261719,0.01835,0.03605,0.004762,75,0.75,log2,5,"{'n_estimators': 75, 'max_samples': 0.75, 'max...",-0.094345,-0.069594,-0.117838,0.018466,-0.029128,-0.058488,0.048405,3
3,0.231258,0.0213,0.031467,0.001287,75,1.0,0.25,4,"{'n_estimators': 75, 'max_samples': 1.0, 'max_...",-0.125756,-0.05931,-0.108515,0.036431,-0.037832,-0.058996,0.057382,4
6,0.418033,0.019289,0.049393,0.006242,125,0.75,0.25,5,"{'n_estimators': 125, 'max_samples': 0.75, 'ma...",-0.09123,-0.078813,-0.182782,0.04082,-0.013041,-0.065009,0.075695,5


In [None]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

print_results(model, X, y)

Target std: 0.046
R2: 0.268
MSE: 0.039
MAE: 0.029


In [None]:
stock_df[f'Pred {target_col}'] = (
    model.predict(standardized_df.drop(columns = target_cols)) * stock_df['Close']
).round(2)
stock_df[['Date', 'Close', target_col, f'Pred {target_col}']].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 7D,Pred Target 7D
1008,2023-12-29,1709.25,1650.5,1717.0
1009,2024-01-01,1698.1,1655.95,1678.9
1010,2024-01-02,1699.1,1649.0,1684.87
1011,2024-01-03,1672.9,1641.2,1657.83
1012,2024-01-04,1690.85,1672.8,1677.78
1013,2024-01-05,1682.2,1679.15,1669.49
1014,2024-01-08,1663.45,1537.5,1640.09
1015,2024-01-09,1650.5,1486.15,1630.19
1016,2024-01-10,1655.95,1470.65,1628.88
1017,2024-01-11,1649.0,1478.85,1626.63


### `Target 15D`

In [None]:
target_col = target_cols[2]
X, y = get_training_data(target_col)

Target: Target 15D
X.shape: (1010, 37)
y.shape: (1010,)


In [None]:
model = get_model()
model.fit(X, y)
model.best_params_

{'n_estimators': 100,
 'max_samples': 0.75,
 'max_features': 'log2',
 'max_depth': 4}

In [None]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,param_max_samples,param_max_features,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
9,0.394142,0.02079,0.051346,0.004224,100,0.75,log2,4,"{'n_estimators': 100, 'max_samples': 0.75, 'ma...",-0.144242,-0.08226,-0.177313,0.058198,-0.248818,-0.118887,0.103575,1
0,0.385838,0.049346,0.043355,0.002433,100,1.0,log2,5,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",-0.204038,-0.101458,-0.221351,0.102964,-0.243059,-0.133388,0.12778,2
4,0.237175,0.005821,0.031357,0.002073,75,0.75,log2,5,"{'n_estimators': 75, 'max_samples': 0.75, 'max...",-0.251312,-0.079728,-0.209183,0.111611,-0.243851,-0.134493,0.13766,3
8,0.308641,0.017508,0.044625,0.00694,100,1.0,sqrt,4,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",-0.216418,-0.069708,-0.211173,0.066251,-0.262384,-0.138686,0.121124,4
3,0.225014,0.003692,0.03286,0.004263,75,1.0,0.25,4,"{'n_estimators': 75, 'max_samples': 1.0, 'max_...",-0.357147,-0.081209,-0.160509,0.121428,-0.273947,-0.150277,0.165401,5


In [None]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

print_results(model, X, y)

Target std: 0.064
R2: 0.352
MSE: 0.052
MAE: 0.040


In [None]:
stock_df[f'Pred {target_col}'] = (
    model.predict(standardized_df.drop(columns = target_cols)) * stock_df['Close']
).round(2)
stock_df[['Date', 'Close', target_col, f'Pred {target_col}']].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 15D,Pred Target 15D
1000,2023-12-18,1655.7,1650.5,1668.93
1001,2023-12-19,1652.9,1655.95,1663.68
1002,2023-12-20,1657.0,1649.0,1669.59
1003,2023-12-21,1686.7,1641.2,1699.44
1004,2023-12-22,1670.85,1672.8,1684.63
1005,2023-12-26,1682.45,1679.15,1694.66
1006,2023-12-27,1703.3,1537.5,1714.35
1007,2023-12-28,1705.25,1486.15,1717.83
1008,2023-12-29,1709.25,1470.65,1719.39
1009,2024-01-01,1698.1,1478.85,1673.97


### `Target 30D`

In [None]:
target_col = target_cols[3]
X, y = get_training_data(target_col)

Target: Target 30D
X.shape: (995, 37)
y.shape: (995,)


In [None]:
model = get_model()
model.fit(X, y)
model.best_params_

{'n_estimators': 100,
 'max_samples': 0.75,
 'max_features': 'log2',
 'max_depth': 4}

In [None]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,param_max_samples,param_max_features,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
9,0.299857,0.014563,0.043453,0.002165,100,0.75,log2,4,"{'n_estimators': 100, 'max_samples': 0.75, 'ma...",-0.464071,-0.028794,-0.436321,0.098886,-0.384824,-0.243025,0.232004,1
0,0.480171,0.06839,0.05763,0.012795,100,1.0,log2,5,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",-0.595103,-0.009222,-0.580303,0.135407,-0.295183,-0.268881,0.294947,2
8,0.299128,0.008916,0.04292,0.001708,100,1.0,sqrt,4,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",-0.790413,-0.014551,-0.437596,0.145117,-0.304796,-0.280448,0.327768,3
4,0.254781,0.008177,0.034523,0.004786,75,0.75,log2,5,"{'n_estimators': 75, 'max_samples': 0.75, 'max...",-0.674931,-0.005484,-0.55165,0.100873,-0.305285,-0.287295,0.300187,4
5,0.441536,0.022503,0.051975,0.003184,125,1.0,sqrt,5,"{'n_estimators': 125, 'max_samples': 1.0, 'max...",-0.777962,0.023647,-0.70115,0.147535,-0.23972,-0.30953,0.373519,5


In [None]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

print_results(model, X, y)

Target std: 0.086
R2: 0.472
MSE: 0.063
MAE: 0.049


In [None]:
stock_df[f'Pred {target_col}'] = (
    model.predict(standardized_df.drop(columns = target_cols)) * stock_df['Close']
).round(2)
stock_df[['Date', 'Close', target_col, f'Pred {target_col}']].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 30D,Pred Target 30D
985,2023-11-24,1532.1,1650.5,1564.23
986,2023-11-28,1528.65,1655.95,1565.61
987,2023-11-29,1559.15,1649.0,1591.69
988,2023-11-30,1558.8,1641.2,1584.54
989,2023-12-01,1555.4,1672.8,1585.05
990,2023-12-04,1609.4,1679.15,1634.39
991,2023-12-05,1623.7,1537.5,1647.08
992,2023-12-06,1627.8,1486.15,1651.08
993,2023-12-07,1630.45,1470.65,1656.77
994,2023-12-08,1653.2,1478.85,1678.45


## Feature importances

In [None]:
feature_importances['Mean'] = feature_importances.mean(axis = 1)
(feature_importances.sort_values('Mean', ascending = False) * 100).round(1)

Unnamed: 0,Target 3D,Target 7D,Target 15D,Target 30D,Mean
52W H,17.3,20.2,12.6,12.5,15.6
DayOfYear,2.7,8.0,9.6,13.3,8.4
VWAP 15MA,9.1,4.3,5.0,4.4,5.7
DaysSinceLastTradingSession,0.0,0.7,10.2,8.6,4.9
Month,0.6,3.4,4.4,9.5,4.5
Close 15MA,8.0,4.3,2.5,2.0,4.2
VWAP 30MA,6.7,4.0,2.5,1.5,3.7
VWAP 7MA,6.2,2.8,2.7,3.0,3.7
52W L,1.0,3.0,4.2,6.2,3.6
Quarter,0.5,1.0,3.5,9.0,3.5


In [None]:
stock_df.filter(regex = "(Date)|(Close$)|(Pred.*)").iloc[-10:, :]

Unnamed: 0,Date,Close,Pred Target 3D,Pred Target 7D,Pred Target 15D,Pred Target 30D
995,2023-12-11,1651.0,1652.83,1655.66,1662.33,1677.53
996,2023-12-12,1634.6,1637.25,1640.9,1647.54,1661.5
997,2023-12-13,1630.9,1633.55,1638.93,1643.77,1660.39
998,2023-12-14,1650.15,1652.83,1656.46,1663.46,1677.09
999,2023-12-15,1656.55,1647.26,1649.35,1661.01,1665.82
1000,2023-12-18,1655.7,1657.27,1661.19,1668.93,1686.1
1001,2023-12-19,1652.9,1654.47,1658.5,1663.68,1680.53
1002,2023-12-20,1657.0,1659.39,1662.82,1669.59,1683.5
1003,2023-12-21,1686.7,1689.59,1694.77,1699.44,1718.34
1004,2023-12-22,1670.85,1673.27,1676.67,1684.63,1690.46
