|
| 1 | +Inspecting the hyperparameters of a CART |
| 2 | +# Import DecisionTreeClassifier |
| 3 | +from sklearn.tree import DecisionTreeClassifier |
| 4 | + |
| 5 | +# Set seed to 1 for reproducibility |
| 6 | +SEED = 1 |
| 7 | + |
| 8 | +# Instantiate a DecisionTreeClassifier 'dt' |
| 9 | +dt = DecisionTreeClassifier(random_state=SEED) |
| 10 | + |
| 11 | +# Print out 'dt's hyperparameters |
| 12 | +print(dt.get_params()) |
| 13 | + |
| 14 | +# Import GridSearchCV |
| 15 | +from sklearn.model_selection import GridSearchCV |
| 16 | + |
| 17 | +# Define the grid of hyperparameters 'params_dt' |
| 18 | +params_dt = { |
| 19 | + 'max_depth': [3, 4,5, 6], |
| 20 | + 'min_samples_leaf': [0.04, 0.06, 0.08], |
| 21 | + 'max_features': [0.2, 0.4,0.6, 0.8] |
| 22 | + } |
| 23 | + |
| 24 | +# Instantiate a 10-fold CV grid search object 'grid_dt' |
| 25 | +grid_dt = GridSearchCV(estimator=dt, param_grid=params_dt, scoring='accuracy', cv=10, n_jobs=-1) |
| 26 | + |
| 27 | +# Fit 'grid_dt' to the training data |
| 28 | +grid_dt.fit(X_train, y_train) |
| 29 | + |
| 30 | +# Extract best hyperparameters from 'grid_dt' |
| 31 | +best_hyperparams = grid_dt.best_params_ |
| 32 | +print('Best hyerparameters:\n', best_hyperparams) |
| 33 | + |
| 34 | +# Extract best CV score from 'grid_dt' |
| 35 | +best_CV_score = grid_dt.best_score_ |
| 36 | +print('Best CV accuracy'.format(best_CV_score)) |
| 37 | + |
| 38 | +# Extract best model from 'grid_dt' |
| 39 | +best_model = grid_dt.best_estimator_ |
| 40 | + |
| 41 | +# Evaluate test set accuracy |
| 42 | +test_acc = best_model.score(X_test,y_test) |
| 43 | + |
| 44 | +# Print test set accuracy |
| 45 | +print("Test set accuracy of best model: {:.3f}".format(test_acc)) |
| 46 | + |
| 47 | +# Import roc_auc_score from sklearn.metrics |
| 48 | +from sklearn.metrics import roc_auc_score |
| 49 | + |
| 50 | +# Extract the best estimator |
| 51 | +best_model = grid_dt.best_estimator_ |
| 52 | + |
| 53 | +# Predict the test set probabilities of the positive class |
| 54 | +y_pred_proba = best_model.predict_proba(X_test)[:,1] |
| 55 | + |
| 56 | +# Compute test_roc_auc |
| 57 | +test_roc_auc = roc_auc_score(y_test, y_pred_proba) |
| 58 | + |
| 59 | +# Print test_roc_auc |
| 60 | +print('Test set ROC AUC score: {:.3f}'.format(test_roc_auc)) |
| 61 | + |
0 commit comments