# 04 - Forecasting using Random Forests

In [1]:
import sys
sys.executable

'/usr/local/bin/python'

## Imports

In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

import constants as cnst
import stock_utils as su

pd.set_option('display.max_columns', None)

NSE_DATA_DIR = PosixPath('../data/NSE') | Valid: True
PROCESSED_DATA_DIR = PosixPath('../data/processed') | Valid: True


## Constants

In [3]:
stock_symbols = su.get_all_stock_symbols(
    cnst.NSE_DATA_DIR
)

stock_symbols

['DEEPAKFERT', 'HDFCBANK', 'ITBEES', 'JUBLFOOD']

In [4]:
STOCK_SYMBOL = stock_symbols[0]
STOCK_SYMBOL

'DEEPAKFERT'

## Data loading

### Stock data

In [5]:
stock_df = pd.read_parquet(
    cnst.PROCESSED_DATA_DIR.joinpath(f'{STOCK_SYMBOL}-processed.parquet')
)

stock_df

Unnamed: 0,Date,Open,High,Low,LTP,Close,VWAP,52W H,52W L,Volume,Value,#Trades,Range,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter,DaysSinceLastTradingSession,Close 7MA,Close 15MA,Close 30MA,Close 60MA,Range 7MA,Range 15MA,Range 30MA,Range 60MA,VWAP 7MA,VWAP 15MA,VWAP 30MA,VWAP 60MA,Volume 7MA,Volume 15MA,Volume 30MA,Volume 60MA,Value 7MA,Value 15MA,Value 30MA,Value 60MA,#Trades 7MA,#Trades 15MA,#Trades 30MA,#Trades 60MA,Target 3D,Target 7D,Target 15D,Target 30D
0,2020-01-01,97.20,97.85,96.30,96.60,96.75,96.95,169.0,76.20,153261,1.485918e+07,3161,1.55,0,0,0,1,1,2020,2,1,1,1,96.75,96.75,96.75,96.75,1.55,1.55,1.55,1.55,96.95,96.95,96.95,96.95,153261,153261,153261,153261,14859179,14859179,14859179,14859179,3161,3161,3161,3161,96.10,106.20,109.70,107.00
1,2020-01-02,96.85,102.40,96.60,100.00,100.35,100.17,169.0,76.20,637039,6.381519e+07,7526,5.80,1,0,0,2,1,2020,3,2,1,1,98.55,98.55,98.55,98.55,3.68,3.68,3.68,3.68,98.56,98.56,98.56,98.56,395150,395150,395150,395150,39337186,39337186,39337186,39337186,5343,5343,5343,5343,100.40,109.65,111.55,103.30
2,2020-01-03,99.70,104.80,99.00,101.20,100.75,101.90,169.0,76.20,562843,5.735485e+07,7402,5.80,1,0,0,3,1,2020,4,3,1,1,99.28,99.28,99.28,99.28,4.38,4.38,4.38,4.38,99.67,99.67,99.67,99.67,451047,451047,451047,451047,45343075,45343075,45343075,45343075,6029,6029,6029,6029,98.95,115.30,114.85,99.45
3,2020-01-06,100.50,100.85,95.25,95.70,96.10,97.91,169.0,76.20,326011,3.192018e+07,5214,5.60,0,0,0,6,1,2020,0,6,1,3,98.49,98.49,98.49,98.49,4.69,4.69,4.69,4.69,99.23,99.23,99.23,99.23,419788,419788,419788,419788,41987352,41987352,41987352,41987352,5825,5825,5825,5825,106.30,119.85,121.25,99.95
4,2020-01-07,97.35,101.00,97.10,99.95,100.40,99.09,169.0,76.20,307107,3.043237e+07,4983,3.90,1,0,0,7,1,2020,1,7,1,1,98.87,98.87,98.87,98.87,4.53,4.53,4.53,4.53,99.20,99.20,99.20,99.20,397252,397252,397252,397252,39676356,39676356,39676356,39676356,5657,5657,5657,5657,106.20,118.20,116.60,96.55
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
976,2024-02-12,515.50,517.10,497.85,502.80,500.05,506.51,714.7,497.85,654659,3.315936e+08,26328,19.25,0,1,0,12,2,2024,0,43,1,3,532.51,581.46,623.67,634.67,22.49,23.74,21.57,21.98,538.22,586.38,627.50,637.57,928983,781001,679848,729576,506187770,450855191,420705230,467952521,35476,29835,24604,24325,507.90,,,
977,2024-02-13,500.10,503.80,483.35,498.95,495.05,493.89,714.7,483.35,827799,4.088438e+08,39023,20.45,0,1,0,13,2,2024,1,44,1,1,521.22,571.23,617.25,632.67,21.92,23.85,21.44,22.16,525.74,575.63,620.93,635.50,858821,808121,657226,739972,455081470,459721553,399636888,472665722,34396,31390,24655,24855,494.50,,,
978,2024-02-14,495.00,499.70,488.50,490.25,490.80,493.16,714.7,483.35,633883,3.126049e+08,31700,11.20,0,0,0,14,2,2024,2,45,1,1,514.09,562.03,610.53,630.67,17.86,22.40,21.00,22.15,517.33,566.06,614.19,633.49,735754,818019,640038,747111,381813171,459958157,383406355,475772685,30659,31969,24742,25240,,,,
979,2024-02-15,493.50,515.30,491.95,508.10,507.90,508.02,714.7,483.35,995334,5.056480e+08,49286,23.35,1,0,0,15,2,2024,3,46,1,1,509.79,553.37,604.57,629.15,18.63,22.59,21.34,22.26,512.59,557.76,608.17,631.92,711399,860316,656385,757426,363912813,478450515,388672600,480421170,32106,34057,25877,25807,,,,


### Standardized data

In [6]:
standardized_df = pd.read_parquet(
    cnst.PROCESSED_DATA_DIR.joinpath(f'{STOCK_SYMBOL}-standardized.parquet')
)

standardized_df

Unnamed: 0,Open,High,Low,LTP,VWAP,52W H,52W L,IsGreen,Is52WLow,Is52WHigh,Day,Month,Year,Weekday,DayOfYear,Quarter,DaysSinceLastTradingSession,Close 7MA,Close 15MA,Close 30MA,Close 60MA,Range 7MA,Range 15MA,Range 30MA,Range 60MA,VWAP 7MA,VWAP 15MA,VWAP 30MA,VWAP 60MA,Volume 7MA,Volume 15MA,Volume 30MA,Volume 60MA,Value 7MA,Value 15MA,Value 30MA,Value 60MA,#Trades 7MA,#Trades 15MA,#Trades 30MA,#Trades 60MA,Target 3D,Target 7D,Target 15D,Target 30D
0,1.005,1.011,0.995,0.998,1.002,1.747,0.788,0,0,0,1,1,2020,2,1,1,1,1.000,1.000,1.000,1.000,0.016,0.016,0.016,0.016,1.002,1.002,1.002,1.002,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,1.000,0.993,1.098,1.134,1.106
1,0.965,1.020,0.963,0.997,0.998,1.684,0.759,1,0,0,2,1,2020,3,2,1,1,0.982,0.982,0.982,0.982,0.037,0.037,0.037,0.037,0.982,0.982,0.982,0.982,0.620,0.620,0.620,0.620,0.616,0.616,0.616,0.616,0.710,0.710,0.710,0.710,1.000,1.093,1.112,1.029
2,0.990,1.040,0.983,1.004,1.011,1.677,0.756,1,0,0,3,1,2020,4,3,1,1,0.985,0.985,0.985,0.985,0.043,0.043,0.043,0.043,0.989,0.989,0.989,0.989,0.801,0.801,0.801,0.801,0.791,0.791,0.791,0.791,0.815,0.815,0.815,0.815,0.982,1.144,1.140,0.987
3,1.046,1.049,0.991,0.996,1.019,1.759,0.793,0,0,0,6,1,2020,0,6,1,3,1.025,1.025,1.025,1.025,0.049,0.049,0.049,0.049,1.033,1.033,1.033,1.033,1.288,1.288,1.288,1.288,1.315,1.315,1.315,1.315,1.117,1.117,1.117,1.117,1.106,1.247,1.262,1.040
4,0.970,1.006,0.967,0.996,0.987,1.683,0.759,1,0,0,7,1,2020,1,7,1,1,0.985,0.985,0.985,0.985,0.045,0.045,0.045,0.045,0.988,0.988,0.988,0.988,1.294,1.294,1.294,1.294,1.304,1.304,1.304,1.304,1.135,1.135,1.135,1.135,1.058,1.177,1.161,0.962
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
976,1.031,1.034,0.996,1.005,1.013,1.429,0.996,0,1,0,12,2,2024,0,43,1,3,1.065,1.163,1.247,1.269,0.045,0.047,0.043,0.044,1.076,1.173,1.255,1.275,1.419,1.193,1.038,1.114,1.527,1.360,1.269,1.411,1.347,1.133,0.935,0.924,1.016,,,
977,1.010,1.018,0.976,1.008,0.998,1.444,0.976,0,1,0,13,2,2024,1,44,1,1,1.053,1.154,1.247,1.278,0.044,0.048,0.043,0.045,1.062,1.163,1.254,1.284,1.037,0.976,0.794,0.894,1.113,1.124,0.977,1.156,0.881,0.804,0.632,0.637,0.999,,,
978,1.009,1.018,0.995,0.999,1.005,1.456,0.985,0,0,0,14,2,2024,2,45,1,1,1.047,1.145,1.244,1.285,0.036,0.046,0.043,0.045,1.054,1.153,1.251,1.291,1.161,1.290,1.010,1.179,1.221,1.471,1.226,1.522,0.967,1.008,0.781,0.796,,,,
979,0.972,1.015,0.969,1.000,1.000,1.407,0.952,1,0,0,15,2,2024,3,46,1,1,1.004,1.090,1.190,1.239,0.037,0.044,0.042,0.044,1.009,1.098,1.197,1.244,0.715,0.864,0.659,0.761,0.720,0.946,0.769,0.950,0.651,0.691,0.525,0.524,,,,


## Modelling

### Target columns

In [7]:
target_cols = standardized_df.filter(regex = "Target.*").columns.to_list()
target_cols

['Target 3D', 'Target 7D', 'Target 15D', 'Target 30D']

In [8]:
standardized_df[target_cols].describe()

Unnamed: 0,Target 3D,Target 7D,Target 15D,Target 30D
count,978.0,974.0,966.0,951.0
mean,1.006635,1.015753,1.034381,1.073529
std,0.05873,0.092773,0.138773,0.20192
min,0.803,0.711,0.688,0.645
25%,0.974,0.965,0.94025,0.931
50%,1.001,1.0055,1.016,1.028
75%,1.034,1.057,1.10175,1.1695
max,1.414,1.526,1.562,1.77


### Data processing

In [9]:
def get_training_data(target_col: str):
    print(f"Target: {target_col}")
    X_df = standardized_df[standardized_df[target_col].notnull()].drop(columns = target_cols).copy()
    y = standardized_df[standardized_df[target_col].notnull()][target_col].copy()
    print(f"X.shape: {X_df.shape}")
    print(f"y.shape: {y.shape}")

    return X_df, y

### Grid search parameters

In [10]:
param_dict = {
    "n_estimators": [75, 100, 125],
    "max_depth": [4, 5, 6],
    "max_features": ["log2", "sqrt", 0.25],
    "max_samples": [0.75, 1.0]
}

feature_importances = pd.DataFrame(
    index = standardized_df.drop(columns = target_cols).columns.to_list()
)

expected_errors = {}

### Model building

In [11]:
def get_model():
    rf_model = RandomForestRegressor(
        criterion = "squared_error",
        n_jobs = -1,
        random_state = cnst.RANDOM_STATE
    )

    grid_cv = RandomizedSearchCV(
        rf_model,
        param_dict,
        n_iter = 12,
        cv = 5, 
        random_state = cnst.RANDOM_STATE
    )

    return grid_cv

def print_results(y, preds):
    print(f"Target std: {y.std():.3f}")
    print(f"R2: {r2_score(y, preds):.3f}")
    print(f"MSE: {mean_squared_error(y, preds, squared = False):.3f}")
    print(f"MAE: {mean_absolute_error(y, preds):.3f}")

def expected_error(y, preds):
    return mean_squared_error(y, preds, squared = False)

## Training

### `Target 3D`

In [12]:
target_col = target_cols[0]
X, y = get_training_data(target_col)

Target: Target 3D
X.shape: (978, 41)
y.shape: (978,)


In [13]:
model = get_model()
model.fit(X, y)
model.best_params_

{'n_estimators': 125,
 'max_samples': 1.0,
 'max_features': 'log2',
 'max_depth': 6}

In [14]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,param_max_samples,param_max_features,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
10,0.527505,0.038379,0.058367,0.006812,125,1.0,log2,6,"{'n_estimators': 125, 'max_samples': 1.0, 'max...",-0.020781,-0.039276,0.023059,0.048787,-0.069882,-0.011619,0.042649,1
2,0.378233,0.026256,0.047096,0.00447,100,1.0,0.25,5,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",-0.017215,-0.043434,0.02646,0.028416,-0.053626,-0.01188,0.034236,2
8,0.336369,0.01109,0.049765,0.014759,100,1.0,sqrt,4,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",-0.001688,-0.03907,0.023611,0.011291,-0.075368,-0.016245,0.036264,3
7,0.274206,0.012245,0.035845,0.004916,75,0.75,0.25,5,"{'n_estimators': 75, 'max_samples': 0.75, 'max...",-0.018586,-0.048755,0.010519,0.022883,-0.057231,-0.018234,0.031527,4
1,0.353251,0.041147,0.045139,0.007589,75,1.0,sqrt,5,"{'n_estimators': 75, 'max_samples': 1.0, 'max_...",-0.006251,-0.046146,0.015587,0.01942,-0.07549,-0.018576,0.036775,5


In [15]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

preds = model.predict(X)
print_results(y, preds)

Target std: 0.059
R2: 0.335
MSE: 0.048
MAE: 0.036


In [16]:
pred_col_name = f'Pred {target_col}'
expected_errors[pred_col_name] = expected_error(y, preds)

stock_df[pred_col_name] = (
    model.predict(standardized_df.drop(columns = target_cols)) * stock_df['Close']
).round(2)
stock_df[['Date', 'Close', target_col, pred_col_name]].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 3D,Pred Target 3D
968,2024-01-31,618.15,540.75,608.66
969,2024-02-01,579.75,537.95,563.66
970,2024-02-02,574.05,535.1,563.45
971,2024-02-05,540.75,524.35,531.78
972,2024-02-06,537.95,515.3,533.5
973,2024-02-07,535.1,500.05,527.78
974,2024-02-08,524.35,495.05,518.4
975,2024-02-09,515.3,490.8,508.18
976,2024-02-12,500.05,507.9,499.43
977,2024-02-13,495.05,494.5,494.25


### `Target 7D`

In [17]:
target_col = target_cols[1]
X, y = get_training_data(target_col)

Target: Target 7D
X.shape: (974, 41)
y.shape: (974,)


In [18]:
model = get_model()
model.fit(X, y)
model.best_params_

{'n_estimators': 100,
 'max_samples': 1.0,
 'max_features': 'log2',
 'max_depth': 5}

In [19]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,param_max_samples,param_max_features,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.488227,0.093889,0.057007,0.011276,100,1.0,log2,5,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",0.06136,-0.110181,-0.046097,0.048762,-0.164785,-0.042188,0.087935,1
4,0.29081,0.04249,0.041059,0.00734,75,0.75,log2,5,"{'n_estimators': 75, 'max_samples': 0.75, 'max...",0.051066,-0.112585,-0.061142,0.010942,-0.1394,-0.050224,0.072057,2
5,0.485633,0.035842,0.054153,0.007337,125,1.0,sqrt,5,"{'n_estimators': 125, 'max_samples': 1.0, 'max...",0.05413,-0.128678,-0.065718,0.026651,-0.171178,-0.056959,0.086715,3
1,0.332759,0.018132,0.041142,0.004335,75,1.0,sqrt,5,"{'n_estimators': 75, 'max_samples': 1.0, 'max_...",0.071712,-0.132311,-0.079916,0.023946,-0.187638,-0.060841,0.096238,4
7,0.314568,0.041354,0.042779,0.009044,75,0.75,0.25,5,"{'n_estimators': 75, 'max_samples': 0.75, 'max...",0.048981,-0.167087,-0.117079,0.048692,-0.13329,-0.063957,0.093498,5


In [20]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

preds = model.predict(X)
print_results(y, preds)

Target std: 0.093
R2: 0.334
MSE: 0.076
MAE: 0.053


In [21]:
pred_col_name = f'Pred {target_col}'
expected_errors[pred_col_name] = expected_error(y, preds)

stock_df[pred_col_name] = (
    model.predict(standardized_df.drop(columns = target_cols)) * stock_df['Close']
).round(2)
stock_df[['Date', 'Close', target_col, pred_col_name]].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 7D,Pred Target 7D
964,2024-01-24,637.7,540.75,597.06
965,2024-01-25,637.4,537.95,591.8
966,2024-01-29,628.1,535.1,582.95
967,2024-01-30,615.9,524.35,574.94
968,2024-01-31,618.15,515.3,578.36
969,2024-02-01,579.75,500.05,541.08
970,2024-02-02,574.05,495.05,537.85
971,2024-02-05,540.75,490.8,513.29
972,2024-02-06,537.95,507.9,518.61
973,2024-02-07,535.1,494.5,519.4


### `Target 15D`

In [22]:
target_col = target_cols[2]
X, y = get_training_data(target_col)

Target: Target 15D
X.shape: (966, 41)
y.shape: (966,)


In [23]:
model = get_model()
model.fit(X, y)
model.best_params_

{'n_estimators': 100,
 'max_samples': 1.0,
 'max_features': 'log2',
 'max_depth': 5}

In [24]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,param_max_samples,param_max_features,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.440998,0.103281,0.05091,0.007206,100,1.0,log2,5,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",0.124055,-0.291138,-0.189313,0.021752,-0.281088,-0.123147,0.167122,1
3,0.285633,0.008198,0.03969,0.007246,75,1.0,0.25,4,"{'n_estimators': 75, 'max_samples': 1.0, 'max_...",0.110848,-0.399205,-0.179637,0.09316,-0.265684,-0.128104,0.200567,2
1,0.28937,0.027741,0.039578,0.005385,75,1.0,sqrt,5,"{'n_estimators': 75, 'max_samples': 1.0, 'max_...",0.163302,-0.349136,-0.191741,0.066355,-0.34082,-0.130408,0.21017,3
10,0.481232,0.065076,0.054673,0.003814,125,1.0,log2,6,"{'n_estimators': 125, 'max_samples': 1.0, 'max...",0.149121,-0.386828,-0.179652,0.053117,-0.292644,-0.131377,0.20313,4
11,0.385115,0.019376,0.049487,0.005158,100,0.75,0.25,4,"{'n_estimators': 100, 'max_samples': 0.75, 'ma...",0.129907,-0.3559,-0.169621,0.066975,-0.328757,-0.131479,0.199221,5


In [25]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

preds = model.predict(X)
print_results(y, preds)

Target std: 0.139
R2: 0.463
MSE: 0.102
MAE: 0.075


In [26]:
pred_col_name = f'Pred {target_col}'
expected_errors[pred_col_name] = expected_error(y, preds)

stock_df[pred_col_name] = (
    model.predict(standardized_df.drop(columns = target_cols)) * stock_df['Close']
).round(2)
stock_df[['Date', 'Close', target_col, pred_col_name]].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 15D,Pred Target 15D
956,2024-01-12,655.7,540.75,601.19
957,2024-01-15,647.65,537.95,587.61
958,2024-01-16,662.25,535.1,603.22
959,2024-01-17,653.6,524.35,594.02
960,2024-01-18,647.8,515.3,580.19
961,2024-01-19,644.75,500.05,575.19
962,2024-01-20,648.4,495.05,581.84
963,2024-01-23,628.9,490.8,585.58
964,2024-01-24,637.7,507.9,591.18
965,2024-01-25,637.4,494.5,593.02


### `Target 30D`

In [27]:
target_col = target_cols[3]
X, y = get_training_data(target_col)

Target: Target 30D
X.shape: (951, 41)
y.shape: (951,)


In [28]:
model = get_model()
model.fit(X, y)
model.best_params_

{'n_estimators': 75, 'max_samples': 0.75, 'max_features': 0.25, 'max_depth': 5}

In [29]:
pd.DataFrame(model.cv_results_).sort_values('rank_test_score').iloc[:5]

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,param_max_samples,param_max_features,param_max_depth,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
7,0.296585,0.019688,0.03787,0.005568,75,0.75,0.25,5,"{'n_estimators': 75, 'max_samples': 0.75, 'max...",0.199526,-0.500946,-0.625296,-0.090729,-0.159285,-0.235346,0.296114,1
8,0.390911,0.027473,0.0512,0.006485,100,1.0,sqrt,4,"{'n_estimators': 100, 'max_samples': 1.0, 'max...",0.20022,-0.430967,-0.461109,-0.10698,-0.380586,-0.235884,0.251605,2
11,0.377794,0.034224,0.048338,0.007109,100,0.75,0.25,4,"{'n_estimators': 100, 'max_samples': 0.75, 'ma...",0.164665,-0.544652,-0.603095,-0.066233,-0.215996,-0.253062,0.289248,3
3,0.307537,0.013938,0.044475,0.003186,75,1.0,0.25,4,"{'n_estimators': 75, 'max_samples': 1.0, 'max_...",0.23086,-0.613394,-0.555866,-0.044438,-0.344336,-0.265435,0.318371,4
6,0.542667,0.024674,0.060047,0.002889,125,0.75,0.25,5,"{'n_estimators': 125, 'max_samples': 0.75, 'ma...",0.180352,-0.533238,-0.680316,-0.139725,-0.183258,-0.271237,0.304946,5


In [30]:
feature_importances = feature_importances.join(
    pd.DataFrame(
        model.best_estimator_.feature_importances_,
        index = model.best_estimator_.feature_names_in_,
        columns = [target_col]
    )
)

preds = model.predict(X)
print_results(y, preds)

Target std: 0.202
R2: 0.762
MSE: 0.098
MAE: 0.073


In [31]:
pred_col_name = f'Pred {target_col}'
expected_errors[pred_col_name] = expected_error(y, preds)

stock_df[pred_col_name] = (
    model.predict(standardized_df.drop(columns = target_cols)) * stock_df['Close']
).round(2)
stock_df[['Date', 'Close', target_col, pred_col_name]].dropna().iloc[-10:, :]

Unnamed: 0,Date,Close,Target 30D,Pred Target 30D
941,2023-12-21,663.85,540.75,607.24
942,2023-12-22,676.75,537.95,604.15
943,2023-12-26,673.95,535.1,597.89
944,2023-12-27,677.75,524.35,592.2
945,2023-12-28,675.8,515.3,596.41
946,2023-12-29,679.4,500.05,599.29
947,2024-01-01,687.8,495.05,614.82
948,2024-01-02,692.25,490.8,609.79
949,2024-01-03,686.8,507.9,613.89
950,2024-01-04,695.8,494.5,622.63


## Feature importances

In [32]:
feature_importances['Mean'] = feature_importances.mean(axis = 1)
(feature_importances.sort_values('Mean', ascending = False) * 100).round(1)

Unnamed: 0,Target 3D,Target 7D,Target 15D,Target 30D,Mean
DayOfYear,4.2,9.6,9.6,13.3,9.2
52W L,3.3,5.6,8.1,12.2,7.3
52W H,3.9,6.7,7.0,11.0,7.2
Range 60MA,3.4,6.5,8.3,6.2,6.1
Month,1.3,3.9,7.0,11.3,5.9
Year,1.5,5.4,6.1,9.3,5.6
Range 30MA,4.8,6.0,6.6,3.7,5.3
Range 15MA,3.4,4.3,5.8,3.2,4.2
VWAP 60MA,3.5,3.5,3.0,5.5,3.9
Close 60MA,3.0,3.5,3.7,4.3,3.6


## Forecasts

In [33]:
stock_df.filter(regex = "(Date)|(Close$)|(Pred.*)").iloc[-10:, :]

Unnamed: 0,Date,Close,Pred Target 3D,Pred Target 7D,Pred Target 15D,Pred Target 30D
971,2024-02-05,540.75,531.78,513.29,549.37,544.21
972,2024-02-06,537.95,533.5,518.61,543.12,542.17
973,2024-02-07,535.1,527.78,519.4,537.19,543.65
974,2024-02-08,524.35,518.4,513.76,527.45,535.83
975,2024-02-09,515.3,508.18,501.94,518.31,521.33
976,2024-02-12,500.05,499.43,493.26,502.49,503.84
977,2024-02-13,495.05,494.25,484.23,497.11,494.44
978,2024-02-14,490.8,487.6,481.93,492.04,491.21
979,2024-02-15,507.9,503.15,491.31,500.06,499.2
980,2024-02-16,494.5,492.36,482.56,489.81,494.52


In [34]:
latest_preds = stock_df.iloc[-1]
print(f"Date: {latest_preds['Date'].date()}")
print(f"Close: {latest_preds['Close']}")

for pred in expected_errors.keys():
    print(f"{pred}: {latest_preds[pred]} ± {expected_errors[pred] * latest_preds['Close']:.2f}")

Date: 2024-02-16
Close: 494.5
Pred Target 3D: 492.36 ± 23.68
Pred Target 7D: 482.56 ± 37.41
Pred Target 15D: 489.81 ± 50.25
Pred Target 30D: 494.52 ± 48.69
