In [1]:
from sklearn.ensemble import RandomForestClassifier # used to define random forest model
# GridSearchCV used to perform hyperparameter search:
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.datasets import load_iris
import numpy as np 
import pandas as pd 

In [3]:
# define the grid used to perform hyperparameter search
# define a range of estimators, which are the number of individual decision trees used to construct the random forest, 
# as 20–100 estimators, with an increment value of 20
n_estimators_range = np.arange(20, 100, 20)

# define the range of max depth values for each decision tree, which is the maximum number of nodes 
# from the root of the decision tree to the farthest leaf, as 5–30 in increments of 5
max_depth_range = np.arange(5, 30, 5)
param_grid = {
    'n_estimators': n_estimators_range,
    'max_depth': max_depth_range,

}

In [4]:
# the random forest classifier model object
rf_classifier = RandomForestClassifier(random_state=64)

In [5]:
# read in features and targets files
X = pd.read_csv("features.csv")

y = pd.read_csv("targets.csv")

In [6]:
# Split data for training and testing 
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=128, test_size = 0.2)

## Perform Hyperparameter Grid Search and Generate Predictions
 Perform a random forest hyperparameter grid search, select the best performing model (based on within sample cross-validation), and generate predictions on the test set.

In [8]:
# Define the grid search object, by calling the GridSearchCV method with the random forest model object rf_classifier; param_grid
# the number of folders for the cross validation as 5, and "precision" as the scoring metric for hyperparameter selection:
grid_search = GridSearchCV(estimator=rf_classifier, param_grid=param_grid, cv=5, scoring='precision')

In [None]:
# Perform the grid search using the training data
grid_search.fit(X_train, y_train)

In [15]:
# Best Hyperparameters
best_rf_model = grid_search.best_estimator_

In [22]:
best_rf_model

In [17]:
# Generate predictions on the test data: 
y_pred = best_rf_model.predict(X_test)