@@ -59,3 +59,51 @@ test_roc_auc = roc_auc_score(y_test, y_pred_proba)
5959# Print test_roc_auc
6060print('Test set ROC AUC score: {:.3f}'.format(test_roc_auc))
6161
62+
63+ # Inspecting RF Hyperparameters in sklearn
64+ # Import RandomForestRegressor
65+ from sklearn.ensemble import RandomForestRegressor
66+
67+ # Set seed for reproducibility
68+ SEED = 1
69+
70+ # Instantiate a random forests regressor 'rf'
71+ rf = RandomForestRegressor(random_state= SEED)
72+
73+ # Inspect rf' s hyperparameters
74+ rf.get_params()
75+
76+ # Basic imports
77+ from sklearn.metrics import mean_squared_error as MSE
78+ from sklearn.model_selection import GridSearchCV
79+
80+ # Define a grid of hyperparameter 'params_rf'
81+ params_rf = {
82+ 'n_estimators': [300, 400, 500],
83+ 'max_depth': [4, 6, 8],
84+ 'min_samples_leaf': [0.1, 0.2],
85+ 'max_features': ['log2','sqrt']
86+ }
87+
88+ # Instantiate 'grid_rf'
89+ grid_rf = GridSearchCV(estimator=rf,param_grid=params_rf, cv=3, scoring= 'neg_mean_squared_error',verbose=1, n_jobs=-1)
90+
91+ # Searching for the best hyperparameters
92+ # Fit 'grid_rf' to the training set
93+ grid_rf.fit(X_train, y_train)
94+
95+ # Extract best hyperparameters from 'grid_rf'
96+ best_hyperparams = grid_rf.best_params_
97+ print('Best hyerparameters:\n', best_hyperparams)
98+
99+ # Extract best model from 'grid_rf'
100+ best_model = grid_rf.best_estimator_
101+
102+ # Predict the test set labels
103+ y_pred = best_model.predict(X_test)
104+
105+ # Evaluate the test set RMSE
106+ rmse_test = MSE(y_test, y_pred)**(1/2)
107+
108+ # Print the test set RMSE
109+ print('Test set RMSE of rf: {:.2f}'.format(rmse_test))
0 commit comments