In [None]:
import numpy as np
import pandas as pd
import sklearn

In [None]:
df_ml = pd.read_csv("../data/data_ml.csv")

In [None]:
separation_date = "2013-12-31"
df_train = df_ml.query("date < @separation_date").copy()
df_test = df_ml.query("@separation_date <= date & date < '2018-12-31'").copy()

In [None]:
columns_to_drop = [
    "stock_id", "date", # non-feature identifiers
    "R1M_Usd", "R3M_Usd", "R6M_Usd", "R12M_Usd", # numerical labels
    "R1M_Usd_C", "R12M_Usd_C" # categorical labels
]

features = list(df_ml.drop(columns=columns_to_drop).columns)

In [None]:
X_train = df_train[features]
y_train = df_train["R1M_Usd_C"]

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

In [None]:
parameters = {
    "n_estimators":[25, 50, 100, 150, 200],
    "max_depth":[5]
}
scoring = [
    "accuracy",
    "precision",
    "recall",
    "roc_auc"
]

In [None]:
model = RandomForestClassifier(n_jobs=-1)

In [None]:
cv = sklearn.model_selection.StratifiedKFold(shuffle=True)
grid_search = GridSearchCV(model, parameters, scoring=scoring, refit=False, verbose=1, cv=cv)

In [None]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 5 candidates, totalling 25 fits


In [None]:
df_cv_results = pd.DataFrame(grid_search.cv_results_)#.sort_values("rank_test_score")
#df_cv_results

In [None]:
df_cv_results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 39 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   mean_fit_time          5 non-null      float64
 1   std_fit_time           5 non-null      float64
 2   mean_score_time        5 non-null      float64
 3   std_score_time         5 non-null      float64
 4   param_max_depth        5 non-null      int64  
 5   param_n_estimators     5 non-null      int64  
 6   params                 5 non-null      object 
 7   split0_test_accuracy   5 non-null      float64
 8   split1_test_accuracy   5 non-null      float64
 9   split2_test_accuracy   5 non-null      float64
 10  split3_test_accuracy   5 non-null      float64
 11  split4_test_accuracy   5 non-null      float64
 12  mean_test_accuracy     5 non-null      float64
 13  std_test_accuracy      5 non-null      float64
 14  rank_test_accuracy     5 non-null      int32  
 15  split0_tes

In [None]:
cols = [
    "mean_fit_time",         
    # "std_fit_time",          
    # "mean_score_time",       
    # "std_score_time",        
    "param_max_depth",       
    "param_n_estimators",    
    "params",  
    # "split0_test_accuracy",  
    # "split1_test_accuracy",  
    # "split2_test_accuracy",  
    # "split3_test_accuracy",  
    # "split4_test_accuracy",  
    "mean_test_accuracy",    
    # "std_test_accuracy",     
    "rank_test_accuracy",    
    # "split0_test_precision", 
    # "split1_test_precision", 
    # "split2_test_precision", 
    # "split3_test_precision", 
    # "split4_test_precision", 
    "mean_test_precision",   
    # "std_test_precision",    
    "rank_test_precision",   
    # "split0_test_recall",    
    # "split1_test_recall",    
    # "split2_test_recall",    
    # "split3_test_recall",    
    # "split4_test_recall",    
    "mean_test_recall",      
    # "std_test_recall",       
    "rank_test_recall",      
    # "split0_test_roc_auc",   
    # "split1_test_roc_auc",   
    # "split2_test_roc_auc",   
    # "split3_test_roc_auc",   
    # "split4_test_roc_auc",   
    "mean_test_roc_auc",     
    # "std_test_roc_auc",      
    "rank_test_roc_auc",
]

In [None]:
df_cv_results[cols]

Unnamed: 0,mean_fit_time,param_max_depth,param_n_estimators,params,mean_test_accuracy,rank_test_accuracy,mean_test_precision,rank_test_precision,mean_test_recall,rank_test_recall,mean_test_roc_auc,rank_test_roc_auc
0,1.980644,5,25,"{'max_depth': 5, 'n_estimators': 25}",0.518647,5,0.514612,5,0.561008,1,0.527572,5
1,3.189296,5,50,"{'max_depth': 5, 'n_estimators': 50}",0.518759,4,0.514745,4,0.560226,2,0.528218,4
2,5.893857,5,100,"{'max_depth': 5, 'n_estimators': 100}",0.519481,3,0.515427,3,0.559718,3,0.529398,2
3,8.74433,5,150,"{'max_depth': 5, 'n_estimators': 150}",0.520693,1,0.516604,1,0.559098,5,0.529471,1
4,11.604611,5,200,"{'max_depth': 5, 'n_estimators': 200}",0.520006,2,0.515953,2,0.559443,4,0.52938,3
