# 04 - Forecasting using Random Forests

In [1]:
import sys
sys.executable

'/usr/local/bin/python'

## Imports

In [2]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

import constants as cnst
import stock_utils as su

pd.set_option('display.max_columns', None)

NSE_DATA_DIR = PosixPath('../data/NSE') | Valid: True
PROCESSED_DATA_DIR = PosixPath('../data/processed') | Valid: True


## Constants

In [3]:
stock_symbols = su.get_all_stock_symbols(
    cnst.NSE_DATA_DIR
)

stock_symbols

['HDFCBANK', 'ITBEES']

In [4]:
STOCK_SYMBOL = stock_symbols[1]
STOCK_SYMBOL

'ITBEES'

## Data loading

### Stock data

In [5]:
stock_df = pd.read_parquet(
    cnst.PROCESSED_DATA_DIR.joinpath(f'{STOCK_SYMBOL}-processed.parquet')
)

stock_df

Unnamed: 0,Date,Open,High,Low,LTP,Close,VWAP,52W H,52W L,Volume,Value,#Trades,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter,DaysSinceLastTradingSession,Close 3MA,Close 7MA,Close 15MA,Close 30MA,VWAP 3MA,VWAP 7MA,VWAP 15MA,VWAP 30MA,Volume 3MA,Volume 7MA,Volume 15MA,Volume 30MA,Value 3MA,Value 7MA,Value 15MA,Value 30MA,#Trades 3MA,#Trades 7MA,#Trades 15MA,#Trades 30MA,Target 3D,Target 7D,Target 15D,Target 30D
0,2020-07-01,17.71,17.71,14.65,14.65,14.65,14.97,17.71,14.65,26187,3.919319e+05,55,0,1,1,1,7,2020,2,183,3,1,14.65,14.65,14.65,14.65,14.97,14.97,14.97,14.97,26187,26187,26187,26187,391931,391931,391931,391931,55,55,55,55,15.55,15.59,17.16,18.07
1,2020-07-02,14.65,15.74,14.65,15.21,15.26,15.07,17.71,14.65,5602,8.443024e+04,31,1,1,0,2,7,2020,3,184,3,1,14.96,14.96,14.96,14.96,15.02,15.02,15.02,15.02,15894,15894,15894,15894,238181,238181,238181,238181,43,43,43,43,15.74,15.91,17.17,18.15
2,2020-07-03,15.41,15.41,15.28,15.39,15.38,15.31,17.71,14.65,13559,2.076389e+05,18,0,0,0,3,7,2020,4,185,3,1,15.10,15.10,15.10,15.10,15.12,15.12,15.12,15.12,15116,15116,15116,15116,228000,228000,228000,228000,34,34,34,34,15.50,15.73,17.25,18.14
3,2020-07-06,15.50,18.16,15.41,15.54,15.55,16.05,18.16,14.65,33643,5.400182e+05,168,1,0,1,6,7,2020,0,188,3,3,15.40,15.21,15.21,15.21,15.48,15.35,15.35,15.35,17601,19747,19747,19747,277362,306004,306004,306004,72,68,68,68,15.55,16.55,17.62,18.25
4,2020-07-07,15.55,16.29,15.55,15.75,15.74,15.73,18.16,14.65,15727,2.473728e+05,53,1,0,0,7,7,2020,1,189,3,1,15.56,15.32,15.32,15.32,15.70,15.43,15.43,15.43,20976,18943,18943,18943,331676,294278,294278,294278,79,65,65,65,15.59,17.01,18.05,18.37
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
882,2024-01-16,39.52,39.55,38.65,38.82,38.82,38.85,40.00,27.56,5450878,2.117865e+08,22817,0,0,0,16,1,2024,1,16,1,1,38.86,37.60,37.38,36.80,38.88,37.65,37.41,36.79,15391384,8630370,7433435,8159666,597276951,330737649,280538679,302315985,29262,20814,20739,19773,39.21,,,
883,2024-01-17,38.80,39.16,38.07,38.82,38.98,38.94,40.00,27.56,7129906,2.776618e+08,18290,1,0,0,17,1,2024,2,17,1,1,39.01,37.96,37.47,36.95,39.10,37.99,37.50,36.94,9938041,9156060,7581290,8203049,390192564,352367793,286710859,304917101,24415,20489,21146,19874,38.89,,,
884,2024-01-18,38.95,39.16,38.12,38.83,38.76,38.52,40.00,27.56,5298842,2.041244e+08,20714,0,0,0,18,1,2024,3,18,1,1,38.85,38.26,37.53,37.08,38.77,38.22,37.55,37.07,5959875,9242842,7696713,8070116,231190909,356796592,291344742,300943699,20607,21654,21675,20022,,,,
885,2024-01-19,38.74,39.43,38.74,39.21,39.21,39.21,40.00,27.56,3740394,1.466584e+08,12440,1,0,0,19,1,2024,4,19,1,1,38.98,38.61,37.66,37.22,38.89,38.58,37.66,37.21,5389714,9367806,7437123,8046963,209481538,362734009,282027474,300672473,17148,21631,21299,20010,,,,


### Standardized data

In [6]:
standardized_df = pd.read_parquet(
    cnst.PROCESSED_DATA_DIR.joinpath(f'{STOCK_SYMBOL}-standardized.parquet')
)

standardized_df

Unnamed: 0,Open,High,Low,LTP,VWAP,52W H,52W L,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter,DaysSinceLastTradingSession,Close 3MA,Close 7MA,Close 15MA,Close 30MA,VWAP 3MA,VWAP 7MA,VWAP 15MA,VWAP 30MA,Volume 3MA,Volume 7MA,Volume 15MA,Volume 30MA,Value 3MA,Value 7MA,Value 15MA,Value 30MA,#Trades 3MA,#Trades 7MA,#Trades 15MA,#Trades 30MA,Target 3D,Target 7D,Target 15D,Target 30D
0,1.209,1.209,1.000,1.000,1.022,1.209,1.000,0,1,1,1,7,2020,2,183,3,1,1.000,1.000,1.000,1.000,1.022,1.022,1.022,1.022,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.061,1.064,1.171,1.233
1,0.960,1.031,0.960,0.997,0.988,1.161,0.960,1,1,0,2,7,2020,3,184,3,1,0.980,0.980,0.980,0.980,0.984,0.984,0.984,0.984,2.837,2.837,2.837,2.837,2.821,2.821,2.821,2.821,1.387,1.387,1.387,1.387,1.031,1.043,1.125,1.189
2,1.002,1.002,0.993,1.001,0.995,1.151,0.953,0,0,0,3,7,2020,4,185,3,1,0.982,0.982,0.982,0.982,0.983,0.983,0.983,0.983,1.115,1.115,1.115,1.115,1.098,1.098,1.098,1.098,1.889,1.889,1.889,1.889,1.008,1.023,1.122,1.179
3,0.997,1.168,0.991,0.999,1.032,1.168,0.942,1,0,1,6,7,2020,0,188,3,3,0.990,0.978,0.978,0.978,0.995,0.987,0.987,0.987,0.523,0.587,0.587,0.587,0.514,0.567,0.567,0.567,0.429,0.405,0.405,0.405,1.000,1.064,1.133,1.174
4,0.988,1.035,0.988,1.001,0.999,1.154,0.931,1,0,0,7,7,2020,1,189,3,1,0.989,0.973,0.973,0.973,0.997,0.980,0.980,0.980,1.334,1.204,1.204,1.204,1.341,1.190,1.190,1.190,1.491,1.226,1.226,1.226,0.990,1.081,1.147,1.167
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
882,1.018,1.019,0.996,1.000,1.001,1.030,0.710,0,0,0,16,1,2024,1,16,1,1,1.001,0.969,0.963,0.948,1.002,0.970,0.964,0.948,2.824,1.583,1.364,1.497,2.820,1.562,1.325,1.427,1.282,0.912,0.909,0.867,1.010,,,
883,0.995,1.005,0.977,0.996,0.999,1.026,0.707,1,0,0,17,1,2024,2,17,1,1,1.001,0.974,0.961,0.948,1.003,0.975,0.962,0.948,1.394,1.284,1.063,1.151,1.405,1.269,1.033,1.098,1.335,1.120,1.156,1.087,0.998,,,
884,1.005,1.010,0.983,1.002,0.994,1.032,0.711,0,0,0,18,1,2024,3,18,1,1,1.002,0.987,0.968,0.957,1.000,0.986,0.969,0.956,1.125,1.744,1.453,1.523,1.133,1.748,1.427,1.474,0.995,1.045,1.046,0.967,,,,
885,0.988,1.006,0.988,1.000,1.000,1.020,0.703,1,0,0,19,1,2024,4,19,1,1,0.994,0.985,0.960,0.949,0.992,0.984,0.960,0.949,1.441,2.504,1.988,2.151,1.428,2.473,1.923,2.050,1.378,1.739,1.712,1.609,,,,


## Modelling

### Target columns

In [7]:
target_cols = standardized_df.filter(regex = "Target.*").columns.to_list()
target_cols

['Target 3D', 'Target 7D', 'Target 15D', 'Target 30D']

In [8]:
standardized_df[target_cols].describe()

Unnamed: 0,Target 3D,Target 7D,Target 15D,Target 30D
count,884.0,880.0,872.0,857.0
mean,1.003498,1.008088,1.01636,1.031392
std,0.023867,0.036782,0.054312,0.081275
min,0.913,0.869,0.868,0.78
25%,0.99,0.987,0.98,0.982
50%,1.004,1.01,1.019,1.036
75%,1.017,1.032,1.054,1.082
max,1.09,1.174,1.196,1.318


### Data processing

In [9]:
def get_training_data(target_col: str):
    print(f"Target: {target_col}")
    X_df = standardized_df[standardized_df[target_col].notnull()].drop(columns = target_cols).copy()
    y = standardized_df[standardized_df[target_col].notnull()][target_col].copy()
    print(f"X.shape: {X_df.shape}")
    print(f"y.shape: {y.shape}")

    return X_df, y

### Grid search parameters

In [10]:
param_dict = {
    "n_estimators": [75, 100, 125],
    "max_depth": [4, 5, 6],
    "max_features": ["log2", "sqrt", 0.25],
    "max_samples": [0.75, 1.0]
}

feature_importances = pd.DataFrame(
    index = standardized_df.drop(columns = target_cols).columns.to_list()
)

### Model building

In [11]:
def get_model():
    rf_model = RandomForestRegressor(
        criterion = "squared_error",
        n_jobs = -1,
        random_state = cnst.RANDOM_STATE
    )

    grid_cv = RandomizedSearchCV(
        rf_model,
        param_dict,
        n_iter = 10,
        cv = 5, 
        random_state = cnst.RANDOM_STATE
    )

    return grid_cv

def print_results(y, preds):
    print(f"Target std: {y.std():.3f}")
    print(f"R2: {r2_score(y, preds):.3f}")
    print(f"MSE: {mean_squared_error(y, preds, squared = False):.3f}")
    print(f"MAE: {mean_absolute_error(y, preds):.3f}")

## Training

### `Target 3D`

In [12]:
target_col = target_cols[0]
X, y = get_training_data(target_col)

Target: Target 3D
X.shape: (884, 37)
y.shape: (884,)


In [13]:
model = get_model()
model.fit(X, y)
model.best_params_

{'n_estimators': 75,
 'max_samples': 0.75,
 'max_features': 'log2',
 'max_depth': 5}

In [14]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,param_max_samples,param_max_features,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
4,0.222222,0.009569,0.031336,0.002001,75,0.75,log2,5,"{'n_estimators': 75, 'max_samples': 0.75, 'max...",-0.092705,-0.039749,-0.045669,-0.061553,-0.068902,-0.061716,0.018719,1
9,0.284578,0.029974,0.039289,0.003463,100,0.75,log2,4,"{'n_estimators': 100, 'max_samples': 0.75, 'ma...",-0.088258,-0.034948,-0.06933,-0.072273,-0.051126,-0.063187,0.018395,2
0,0.354959,0.04828,0.04427,0.004612,100,1.0,log2,5,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",-0.093567,-0.026022,-0.067243,-0.073971,-0.066365,-0.065434,0.022008,3
8,0.271807,0.011283,0.038477,0.004295,100,1.0,sqrt,4,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",-0.102854,-0.026655,-0.064876,-0.087271,-0.052237,-0.066778,0.026632,4
5,0.344172,0.009837,0.040962,0.001467,125,1.0,sqrt,5,"{'n_estimators': 125, 'max_samples': 1.0, 'max...",-0.090924,-0.028363,-0.066593,-0.078829,-0.0694,-0.066822,0.021028,5


In [15]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

print_results(y, model.predict(X))

Target std: 0.024
R2: 0.244
MSE: 0.021
MAE: 0.016


In [16]:
stock_df[f'Pred {target_col}'] = (
    model.predict(standardized_df.drop(columns = target_cols)) * stock_df['Close']
).round(2)

stock_df[['Date', 'Close', target_col, f'Pred {target_col}']].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 3D,Pred Target 3D
874,2024-01-04,36.41,36.64,36.32
875,2024-01-05,36.85,36.76,36.88
876,2024-01-08,36.51,36.72,36.53
877,2024-01-09,36.64,38.54,36.76
878,2024-01-10,36.76,39.23,36.95
879,2024-01-11,36.72,38.82,36.84
880,2024-01-12,38.54,38.98,38.6
881,2024-01-15,39.23,38.76,39.22
882,2024-01-16,38.82,39.21,38.82
883,2024-01-17,38.98,38.89,38.99


### `Target 7D`

In [17]:
target_col = target_cols[1]
X, y = get_training_data(target_col)

Target: Target 7D
X.shape: (880, 37)
y.shape: (880,)


In [18]:
model = get_model()
model.fit(X, y)
model.best_params_

{'n_estimators': 100,
 'max_samples': 0.75,
 'max_features': 'log2',
 'max_depth': 4}

In [19]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,param_max_samples,param_max_features,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
9,0.275614,0.009804,0.042815,0.001427,100,0.75,log2,4,"{'n_estimators': 100, 'max_samples': 0.75, 'ma...",-0.113596,-0.147883,-0.087016,-0.133542,-0.178596,-0.132127,0.030945,1
8,0.285098,0.013286,0.040614,0.002984,100,1.0,sqrt,4,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",-0.065679,-0.164628,-0.096928,-0.160439,-0.252476,-0.14803,0.064392,2
4,0.245131,0.012083,0.032015,0.003467,75,0.75,log2,5,"{'n_estimators': 75, 'max_samples': 0.75, 'max...",-0.148386,-0.136458,-0.083729,-0.180556,-0.233251,-0.156476,0.049462,3
0,0.361929,0.058907,0.045752,0.006671,100,1.0,log2,5,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",-0.108711,-0.220154,-0.09447,-0.168463,-0.22293,-0.162946,0.053911,4
5,0.41128,0.019561,0.049022,0.004297,125,1.0,sqrt,5,"{'n_estimators': 125, 'max_samples': 1.0, 'max...",-0.113361,-0.221463,-0.07019,-0.199077,-0.252108,-0.17124,0.068408,5


In [20]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

print_results(y, model.predict(X))

Target std: 0.037
R2: 0.269
MSE: 0.031
MAE: 0.025


In [21]:
stock_df[f'Pred {target_col}'] = (
    model.predict(standardized_df.drop(columns = target_cols)) * stock_df['Close']
).round(2)
stock_df[['Date', 'Close', target_col, f'Pred {target_col}']].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 7D,Pred Target 7D
870,2023-12-29,37.36,36.64,37.82
871,2024-01-01,37.68,36.76,37.76
872,2024-01-02,37.33,36.72,37.43
873,2024-01-03,36.43,38.54,36.77
874,2024-01-04,36.41,39.23,36.73
875,2024-01-05,36.85,38.82,37.07
876,2024-01-08,36.51,38.98,36.77
877,2024-01-09,36.64,38.76,36.96
878,2024-01-10,36.76,39.21,37.02
879,2024-01-11,36.72,38.89,36.96


### `Target 15D`

In [22]:
target_col = target_cols[2]
X, y = get_training_data(target_col)

Target: Target 15D
X.shape: (872, 37)
y.shape: (872,)


In [23]:
model = get_model()
model.fit(X, y)
model.best_params_

{'n_estimators': 100,
 'max_samples': 0.75,
 'max_features': 'log2',
 'max_depth': 4}

In [24]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,param_max_samples,param_max_features,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
9,0.283502,0.002246,0.042917,0.00167,100,0.75,log2,4,"{'n_estimators': 100, 'max_samples': 0.75, 'ma...",-0.13973,-0.489239,-0.078678,-0.30781,-0.18029,-0.239149,0.145848,1
8,0.306292,0.016394,0.042057,0.001135,100,1.0,sqrt,4,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",-0.116728,-0.549997,-0.100465,-0.354744,-0.32338,-0.289063,0.166641,2
0,0.341396,0.051424,0.045945,0.005161,100,1.0,log2,5,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",-0.132313,-0.606811,-0.076995,-0.381746,-0.255289,-0.290631,0.189855,3
4,0.236442,0.005063,0.031333,0.001212,75,0.75,log2,5,"{'n_estimators': 75, 'max_samples': 0.75, 'max...",-0.129794,-0.624281,-0.080893,-0.376586,-0.2439,-0.291091,0.195321,4
3,0.221875,0.004918,0.030742,0.00103,75,1.0,0.25,4,"{'n_estimators': 75, 'max_samples': 1.0, 'max_...",-0.119386,-0.771418,-0.046576,-0.397164,-0.483731,-0.363655,0.261541,5


In [25]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

print_results(y, model.predict(X))

Target std: 0.054
R2: 0.401
MSE: 0.042
MAE: 0.035


In [26]:
stock_df[f'Pred {target_col}'] = (
    model.predict(standardized_df.drop(columns = target_cols)) * stock_df['Close']
).round(2)
stock_df[['Date', 'Close', target_col, f'Pred {target_col}']].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 15D,Pred Target 15D
862,2023-12-18,37.86,36.64,38.44
863,2023-12-19,37.39,36.76,38.08
864,2023-12-20,36.71,36.72,37.86
865,2023-12-21,36.89,38.54,37.77
866,2023-12-22,37.7,39.23,38.49
867,2023-12-26,37.55,38.82,38.38
868,2023-12-27,37.73,38.98,38.59
869,2023-12-28,37.72,38.76,38.62
870,2023-12-29,37.36,39.21,38.27
871,2024-01-01,37.68,38.89,36.97


### `Target 30D`

In [27]:
target_col = target_cols[3]
X, y = get_training_data(target_col)

Target: Target 30D
X.shape: (857, 37)
y.shape: (857,)


In [28]:
model = get_model()
model.fit(X, y)
model.best_params_

{'n_estimators': 100,
 'max_samples': 1.0,
 'max_features': 'sqrt',
 'max_depth': 4}

In [29]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,param_max_samples,param_max_features,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
8,0.294793,0.008953,0.041701,0.001071,100,1.0,sqrt,4,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",-0.452949,-1.142145,-0.467859,-0.106999,-0.487323,-0.531455,0.336232,1
9,0.323594,0.019689,0.04153,0.003725,100,0.75,log2,4,"{'n_estimators': 100, 'max_samples': 0.75, 'ma...",-0.552779,-1.14583,-0.498811,-0.15982,-0.419047,-0.555257,0.32462,2
4,0.249509,0.015709,0.035403,0.004831,75,0.75,log2,5,"{'n_estimators': 75, 'max_samples': 0.75, 'max...",-0.497341,-1.316847,-0.441243,-0.217236,-0.514284,-0.59739,0.375082,3
0,0.380242,0.055289,0.045144,0.004527,100,1.0,log2,5,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",-0.529739,-1.288029,-0.488583,-0.160235,-0.558464,-0.60501,0.370331,4
5,0.382158,0.002698,0.049804,0.003783,125,1.0,sqrt,5,"{'n_estimators': 125, 'max_samples': 1.0, 'max...",-0.560027,-1.274998,-0.458364,-0.161878,-0.727108,-0.636475,0.368439,5


In [30]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

print_results(y, model.predict(X))

Target std: 0.081
R2: 0.558
MSE: 0.054
MAE: 0.043


In [31]:
stock_df[f'Pred {target_col}'] = (
    model.predict(standardized_df.drop(columns = target_cols)) * stock_df['Close']
).round(2)
stock_df[['Date', 'Close', target_col, f'Pred {target_col}']].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 30D,Pred Target 30D
847,2023-11-24,33.91,36.64,35.43
848,2023-11-28,33.96,36.76,35.44
849,2023-11-29,34.41,36.72,35.56
850,2023-11-30,34.4,38.54,35.71
851,2023-12-01,34.42,39.23,35.82
852,2023-12-04,34.49,38.82,35.79
853,2023-12-05,34.27,38.98,35.63
854,2023-12-06,34.95,38.76,35.73
855,2023-12-07,34.84,39.21,35.91
856,2023-12-08,35.27,38.89,36.05


## Feature importances

In [32]:
feature_importances['Mean'] = feature_importances.mean(axis = 1)
(feature_importances.sort_values('Mean', ascending = False) * 100).round(1)

Unnamed: 0,Target 3D,Target 7D,Target 15D,Target 30D,Mean
DayOfYear,6.9,11.4,13.6,14.9,11.7
Year,4.0,10.3,13.4,15.4,10.8
Month,3.0,5.6,10.3,12.4,7.8
52W H,4.9,8.3,7.8,8.6,7.4
52W L,4.1,5.6,6.3,7.9,6.0
Quarter,1.3,4.3,8.7,6.6,5.2
VWAP 30MA,4.4,2.9,3.5,6.5,4.3
Close 30MA,3.1,2.9,3.9,4.7,3.7
Close 15MA,4.0,2.6,2.4,2.1,2.8
Day,3.4,5.3,1.7,0.7,2.8


In [33]:
stock_df.filter(regex = "(Date)|(Close$)|(Pred.*)").iloc[-10:, :]

Unnamed: 0,Date,Close,Pred Target 3D,Pred Target 7D,Pred Target 15D,Pred Target 30D
877,2024-01-09,36.64,36.76,36.96,36.54,35.6
878,2024-01-10,36.76,36.95,37.02,36.59,35.93
879,2024-01-11,36.72,36.84,36.96,36.52,35.83
880,2024-01-12,38.54,38.6,38.58,37.64,36.55
881,2024-01-15,39.23,39.22,39.23,38.29,37.09
882,2024-01-16,38.82,38.82,38.78,37.94,36.79
883,2024-01-17,38.98,38.99,39.07,38.37,36.89
884,2024-01-18,38.76,38.78,38.76,38.01,36.69
885,2024-01-19,39.21,39.21,39.22,38.39,37.02
886,2024-01-20,38.89,38.78,38.86,38.11,37.14
