# Case Study

In this part of the paper we present the case study described in Section 4.3 of the manuscript. Specifically, we compare the results of our method RUG with those of FSDT (McTavish et al., 2021) and CG (Lawless et al., 2021).

The results of this analysis are reported in Table 5 of the manuscript.

In [1]:
import pandas as pd
import numpy as np
import os

from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, matthews_corrcoef
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.model_selection import GridSearchCV
# from lightgbm import LGBMClassifier
import statsmodels.api as sm
import shap

# import Datasets as DS
import grid_search_helpers as gs_helpers
from case_study_helpers import *

import warnings
warnings.filterwarnings("ignore")

# for RUG
from ruxg import RUGClassifier

# for FSDT
import FSDT_helpers as FSDT_helpers
from dl85 import DL85Classifier

# for CG
from CG_helpers import *

## Loading the data
The raw data and details for data cleaning are provided here: https://towardsdatascience.com/how-to-develop-a-credit-risk-model-and-scorecard-91335fc01f03

In [2]:
# problem = DS.loan
# pname = problem.__name__.upper()
pname = 'LOAN'

---

# Table 5a

We start with the objective of obtaining an interpretable model. We will then examine the performance of these interpretable model(s) genereated by different approaches.

To ensure the models are interpretable, we set the maximum depth to 2.

In [3]:
depth = 2
y_results = pd.DataFrame() # dataframe to save the predicted values of each method

# some parameters for reproducibility
random_state = 42
test_size = 0.2

## RUG

In [None]:
# prep the data
import Datasets as DS
X_train, X_test, y_train, y_test = gs_helpers.prep_data(DS.loan, binary=False, randomState=random_state,
                                                        testSize=test_size, target='good_bad')

# initialize classifier
RUG = RUGClassifier(max_depth=depth, rule_length_cost=True,
                    solver='gurobi', random_state=1, max_RMP_calls=3)

print('Fitting RUG')
RUG_fit = RUG.fit(X_train, y_train)

# get predited values
y_results['RUG_pred'] = RUG.predict(np.array(X_test))

print('Done')

In [None]:
# Confusion matrix
cm = pd.crosstab(y_test, y_results['RUG_pred'])
print ("Confusion matrix : \n", cm)

print('\nAccuracy  = %.4f' % accuracy_score(y_test, y_results['RUG_pred']))
print('F1 score  = %.4f' % f1_score(y_test, y_results['RUG_pred']))
print('MCC score  = %.4f' % matthews_corrcoef(y_test, y_results['RUG_pred']))
print('Number of rules = %.0f' % RUG.get_num_of_rules())
print('Average rule length = %.2f' % RUG.get_avg_rule_length())
print('Average number of rules used per sample = %.2f' % RUG.get_avg_num_rules_per_sample())
print('Average number of rules used per sample = %.2f' % RUG.get_avg_rule_length_per_sample())

## FSDT

McTavish, H., Zhong, C., Achermann, R., Karimalis, I., Chen, J., Rudin, C., & Seltzer, M. (2022). Fast Sparse Decision Tree Optimization via Reference Ensembles. Proceedings of the AAAI Conference on Artificial Intelligence, 36(9), 9604-9613. https://doi.org/10.1609/aaai.v36i9.21194

In [None]:
import Datasets_binary as DS
X_train, X_test, y_train, y_test = gs_helpers.prep_data(DS.loan, binary=False, randomState=random_state, testSize=test_size, target='y', datasets_path='./FSDT_binarized/')

# initialize classifier
FSDT = DL85Classifier(time_limit=1000, desc=True, max_depth = depth)

print('Fitting FSDT')
FSDT.fit(X_train, y_train)

# get predited values
y_results['FSDT_pred'] = FSDT.predict(X_test)

print('Done')

In [None]:
# Confusion matrix
cm = pd.crosstab(y_test, y_results['FSDT_pred'])
print ("Confusion matrix : \n", cm)

print('\nAccuracy  = %.4f' % accuracy_score(y_test, y_results['FSDT_pred']))
print('F1 score  = %.4f' % f1_score(y_test, y_results['FSDT_pred']))
print('MCC score  = %.4f' % matthews_corrcoef(y_test, y_results['FSDT_pred']))
print('Number of rules = %.0f' % FSDT_helpers.get_num_leaves(FSDT.tree_))
print('Average rule length = %.2f' % FSDT_helpers.get_avg_rule_length(FSDT.tree_))
print('Average number of rules used per sample = %.2f' % 1.00)
print('Average number of rules used per sample = %.2f' % FSDT_helpers.get_avg_rule_length_per_sample(FSDT.tree_, X_test))

### Column Generation (CG)

Lawless, C., Dash, S., Günlük, O., & Wei, D. (2021). Interpretable and Fair Boolean Rule Sets via Column Generation. arXiv preprint arXiv:2111.08466.

This method does not have a maximum depth parameter. Instead, we are specifying a parameter to control sparsity, which they refer to as complexity. This parameter does not directly translate to the depth of the rules. We refer to their paper for details.

In [None]:
CG_pgrid = {'epsilon':1,
            'complexity':5}

import Datasets_binary as DS
X_train, X_test, y_train, y_test = gs_helpers.prep_data(DS.loan, binary=False, randomState=random_state, testSize=test_size, target='y', datasets_path='./datasets/CG_binarized/')

In [None]:
print('Fitting CG')
res, classif = run_CG(pname, X_train, X_test, y_train, y_test, CG_pgrid, fairness_metric='unfair', time_limit=1000)
final_rule_set = classif.fitRuleSet

# get predicted values
y_results['CG_pred'] = classif.predict(X_test)

print('Done')

In [None]:
# Confusion matrix
cm = pd.crosstab(y_test, y_results['CG_pred'])
print ("Confusion matrix : \n", cm)

print('\nAccuracy  = %.4f' % accuracy_score(y_test, y_results['CG_pred']))
print('F1 score  = %.4f' % f1_score(y_test, y_results['CG_pred']))
print('MCC score  = %.4f' % matthews_corrcoef(y_test, y_results['CG_pred']))
print('Number of rules = %.0f' % len(final_rule_set))
print('Average rule length = %.2f' % np.mean(np.sum(final_rule_set, axis=1)))
nr_rules_sample, length_sample = CG_rules_per_sample(X_test, final_rule_set)
print('Average number of rules used per sample = %.2f' % nr_rules_sample)
print('Average number of rules used per sample = %.2f' % length_sample)

---

# Table 5b

For this part, we try to increase the performance of the model and then examine their level of interpretability.

In [None]:
y_results = pd.DataFrame()

### RUG

In [None]:
import Datasets as DS
X_train, X_test, y_train, y_test = gs_helpers.prep_data(DS.loan, binary=False, randomState=random_state, testSize=test_size, target='good_bad')

# initialize model
RUG = RUGClassifier(max_depth=2, rule_length_cost=True,
                    solver='gurobi', random_state=0)

print('Fitting RUG')
RUG_fit = RUG.fit(X_train, y_train)

# get predicted values
y_results['RUG_pred'] = RUG.predict(np.array(X_test))

print('Done')

In [None]:
# Confusion matrix
cm = pd.crosstab(y_test, y_results['RUG_pred'])
print ("Confusion matrix : \n", cm)

print('\nAccuracy  = %.4f' % accuracy_score(y_test, y_results['RUG_pred']))
print('F1 score  = %.4f' % f1_score(y_test, y_results['RUG_pred']))
print('MCC score  = %.4f' % matthews_corrcoef(y_test, y_results['RUG_pred']))
print('Number of rules = %.0f' % RUG.get_num_of_rules())
print('Average rule length = %.2f' % RUG.get_avg_rule_length())
print('Average number of rules used per sample = %.2f' % RUG.get_avg_num_rules_per_sample())
print('Average number of rules used per sample = %.2f' % RUG.get_avg_rule_length_per_sample())

### FSDT

McTavish, H., Zhong, C., Achermann, R., Karimalis, I., Chen, J., Rudin, C., & Seltzer, M. (2022). Fast Sparse Decision Tree Optimization via Reference Ensembles. Proceedings of the AAAI Conference on Artificial Intelligence, 36(9), 9604-9613. https://doi.org/10.1609/aaai.v36i9.21194

In [None]:
import Datasets_binary as DS
X_train, X_test, y_train, y_test = gs_helpers.prep_data(DS.loan, binary=False, randomState=random_state,testSize=test_size, target='y', datasets_path='./datasets/FSDT_binarized/')

# initialize classifier
FSDT = DL85Classifier(time_limit=1000, desc=True, max_depth = 3)

print('Fitting FSDT')
FSDT.fit(X_train, y_train)

# get predited values
y_results['FSDT_pred'] = FSDT.predict(X_test)

print('Done')

In [None]:
# Confusion matrix
cm = pd.crosstab(y_test, y_results['FSDT_pred'])
print ("Confusion matrix : \n", cm)

print('\nAccuracy  = %.4f' % accuracy_score(y_test, y_results['FSDT_pred']))
print('F1 score  = %.4f' % f1_score(y_test, y_results['FSDT_pred']))
print('MCC score  = %.4f' % matthews_corrcoef(y_test, y_results['FSDT_pred']))
print('Number of rules = %.0f' % FSDT_helpers.get_num_leaves(FSDT.tree_))
print('Average rule length = %.2f' % FSDT_helpers.get_avg_rule_length(FSDT.tree_))
print('Average number of rules used per sample = %.2f' % 1.00)
print('Average number of rules used per sample = %.2f' % FSDT_helpers.get_avg_rule_length_per_sample(FSDT.tree_, X_test))

### Column Generation (CG)

Lawless, C., Dash, S., Günlük, O., & Wei, D. (2021). Interpretable and Fair Boolean Rule Sets via Column Generation. arXiv preprint arXiv:2111.08466.

This method does not have a maximum depth parameter. Instead, we are specifying a parameter to control sparsity, which they refer to as complexity. This parameter does not directly translate to the depth of the rules. We refer to their paper for details.

In [None]:
CG_pgrid = {'epsilon':1,
            'complexity':25}

import Datasets_binary as DS
X_train, X_test, y_train, y_test = gs_helpers.prep_data(DS.loan, binary=False, randomState=random_state,testSize=test_size, target='y', datasets_path='./datasets/CG_binarized/')

In [None]:
print('Fitting CG')
res, classif = run_CG(pname, X_train, X_test, y_train, y_test, CG_pgrid, fairness_metric='unfair', time_limit=1000)
final_rule_set = classif.fitRuleSet

# get predicted values
y_results['CG_pred'] = classif.predict(X_test)

print('Done')

In [None]:
# Confusion matrix
cm = pd.crosstab(y_test, y_results['CG_pred'])
print ("Confusion matrix : \n", cm)

print('\nAccuracy  = %.4f' % accuracy_score(y_test, y_results['CG_pred']))
print('F1 score  = %.4f' % f1_score(y_test, y_results['CG_pred']))
print('MCC score  = %.4f' % matthews_corrcoef(y_test, y_results['CG_pred']))
print('Number of rules = %.0f' % len(final_rule_set))
print('Average rule length = %.2f' % np.mean(np.sum(final_rule_set, axis=1)))
nr_rules_sample, length_sample = CG_rules_per_sample(X_test, final_rule_set)
print('Average number of rules used per sample = %.2f' % nr_rules_sample)
print('Average number of rules used per sample = %.2f' % length_sample)

---

## SHAP Figures

We further compare the rules generated by RUG with the feature importance values generated by SHAP (Lundberg & Lee, 2017) applied to LightGBM. Hence, we'll first train the LightGBM model. Then, we'll select two samples and inspect the rules and the SHAP values.

### RUG

For this analysis, we will use the RUG model from Table 5a, i.e. the RUG model with 7 rules.

In [None]:
import Datasets as DS
# prep the data
X_train, X_test, y_train, y_test = gs_helpers.prep_data(DS.loan, binary=False, randomState=random_state,
                                                        testSize=test_size, target='good_bad')

# initialize classifier
RUG = RUGClassifier(max_depth=depth, rule_length_cost=True,
                    solver='gurobi', random_state=1, max_RMP_calls=3)

print('Fitting RUG')
RUG_fit = RUG.fit(X_train, y_train)

# get predited values
y_results['RUG_pred'] = RUG.predict(np.array(X_test))

print('Done')

### LightGBM

In [None]:
colnames = DS.loan('./datasets/').columns.tolist()
colnames.remove('good_bad')

In [None]:
X_train, X_test, y_train, y_test = gs_helpers.prep_data(DS.loan, binary=False, randomState=random_state,
                                                        testSize=test_size, target='good_bad')

X_train = pd.DataFrame(X_train, columns = colnames)
X_test = pd.DataFrame(X_test, columns = colnames)

In [None]:
from lightgbm import LGBMClassifier
depth = 2

# initialize classifier
lgbm = LGBMClassifier(objective='binary', random_state=0, max_depth=depth, num_leaves=(2**depth), n_estimators=20)

print('Fitting LightGBM')
lgbm.fit(X_train, y_train)

# get predicted values
y_results['LightGBM_pred'] = lgbm.predict(X_test)

print('Done')

### SHAP values

In [None]:
import shap

shap.initjs()
explainer = shap.TreeExplainer(lgbm, X_test)
shap_values = explainer(X_test)

### Compare SHAP output with rules

We compare the output of SHAP applied to LightGBM with the rules generated by RUG. We do this for two arbitrarily chosen samples from the test set, where one of the samples is covered by two rules and the other one by just one rule.

In [None]:
dict1, dict2 = RUG.get_instance_to_rule_dicts(pd.DataFrame(X_test).index, pd.DataFrame(X_test))

In [None]:
i = 18
# rules covering that instance
print(f'Instance {i}')
RUG.print_rules_for_instances([i], dict1)

# shap
shap.plots.waterfall(shap_values[i])

In [None]:
i = 62
# rules covering that instance
print(f'Instance {i}')
RUG.print_rules_for_instances([i], dict1)

# shap
shap.plots.waterfall(shap_values[i])

---

---

# Appendix

In this part of the paper we repeat the case study described above (see Section 4.3 in the manuscript) with traditional machine learning models, including logistic regression (LR), decision tree (DT), random forest (RF), AdaBoost (ADA), and LightGBM.

The results of this analysis are reported in Table 8 and described in Appendix D in the manuscript.


# Table 8a

We start with the objective of obtaining an interpretable model. We will then examine the performance of these interpretable model(s) genereated by different approaches.

To ensure the models are interpretable, we set the maximum depth to 2.

In [None]:
y_results = pd.DataFrame()

In [None]:
import Datasets as DS
X_train, X_test, y_train, y_test = gs_helpers.prep_data(DS.loan, binary=False, randomState=random_state,
                                                        testSize=test_size, target='good_bad')

### Logistic Regression (LR)

In [None]:
X_train_aug = sm.add_constant(X_train)
X_test_aug =  sm.add_constant(X_test)

lr = sm.Logit(y_train, X_train_aug).fit()

threshold = 0.5
y_results['LR_pred'] = np.array(lr.predict(X_test_aug) > threshold , dtype=int)

In [None]:
# Confusion Matrix
cm = pd.crosstab(y_test, y_results['LR_pred'])
print ("Confusion matrix : \n", cm)

# Accuracies
print('\nAccuracy  = %.4f' % accuracy_score(y_test, y_results['LR_pred']))
print('F1 score  = %.4f' % f1_score(y_test, y_results['LR_pred']))
print('MCC score  = %.4f' % matthews_corrcoef(y_test, y_results['LR_pred']))

### Decision Tree (DT)

In [None]:
tc = DecisionTreeClassifier(criterion='gini', max_depth = 2, random_state=1)
tc = tc.fit(X_train, y_train)

y_results['DT_pred'] = tc.predict(X_test)

In [None]:
# Confusion Matrix
cm = pd.crosstab(y_test, y_results['DT_pred'])
print ("Confusion matrix : \n", cm)

# Accuracies
print('\nAccuracy  = %.4f' % accuracy_score(y_test, y_results['DT_pred']))
print('F1 score  = %.4f' % f1_score(y_test, y_results['DT_pred']))
print('MCC score  = %.4f' % matthews_corrcoef(y_test, y_results['DT_pred']))
print('Number of rules = %.0f' % tc.get_n_leaves())
print('Average rule length = %.2f' % average_depth(tc))
print('Average number of rules per sample = %.2f' % 1)
print('Average rule length per sample = %.2f' % average_path_length(tc, X_test))

### Random Forest (RF)

In [None]:
rfc = RandomForestClassifier(n_estimators=20, criterion='gini', max_depth=7, random_state=1)
rfc.fit(np.array(X_train), np.array(y_train).flatten())

y_results['RF_pred'] = rfc.predict(X_test)

In [None]:
# Confusion matrix
cm = pd.crosstab(y_test, y_results['RF_pred'])
print ("Confusion matrix : \n", cm)

n_leaves = 0
for dtc in rfc.estimators_:
    n_leaves += dtc.get_n_leaves()

print('\nAccuracy  = %.4f' % accuracy_score(y_test, y_results['RF_pred']))
print('F1 score  = %.4f' % f1_score(y_test, y_results['RF_pred']))
print('MCC score  = %.4f' % matthews_corrcoef(y_test, y_results['RF_pred']))
print('Number of rules = %.0f' % n_leaves)
print('Average rule length = %.2f' % avg_depth_ensemble(rfc))
print('Average rule length per sample = %.2f' % avg_path_length_ensemble(rfc, X_test))

### AdaBoost (ADA)

In [None]:
ada = AdaBoostClassifier(n_estimators = 20, random_state=1)
ada.fit(X_train, y_train)
y_results['ADA_pred'] = ada.predict(X_test)

In [None]:
# Confusion matrix
cm = pd.crosstab(y_test, y_results['ADA_pred'])
print ("Confusion matrix : \n", cm)

print('\nAccuracy  = %.4f' % accuracy_score(y_test, y_results['ADA_pred']))
print('F1 score  = %.4f' % f1_score(y_test, y_results['ADA_pred']))
print('MCC score  = %.4f' % matthews_corrcoef(y_test, y_results['ADA_pred']))
print('Number of rules = %.0f' % (2*ada.n_estimators))
print('Number of rules per sample = %.0f' % ada.n_estimators)
print('Average rule length = %.2f' % avg_depth_ensemble(ada))
print('Average rule length per sample = %.2f' % avg_path_length_ensemble(ada, X_test))

### LightGBM

In [None]:
lgbm = LGBMClassifier(objective='binary', random_state=0, max_depth=2, num_leaves=(2**depth), n_estimators=20)
lgbm.fit(X_train, y_train)
y_results['LightGBM_pred'] = lgbm.predict(X_test)

In [None]:
# Confusion matrix
cm = pd.crosstab(y_test, y_results['LightGBM_pred'])
print ("Confusion matrix : \n", cm)

n_leaves = sum(tree['num_leaves'] for tree in lgbm._Booster.dump_model()["tree_info"])

print('\nAccuracy  = %.4f' % accuracy_score(y_test, y_results['LightGBM_pred']))
print('F1 score  = %.4f' % f1_score(y_test, y_results['LightGBM_pred']))
print('MCC score  = %.4f' % matthews_corrcoef(y_test, y_results['LightGBM_pred']))
print('Number of rules = %.0f' % n_leaves)
print('Average rule length = %.2f' % lgbm.get_params()['max_depth'])
print('Average number of rules per sample = %.2f' % lgbm.get_params()['n_estimators'])
print('Average rule length per sample = %.2f' % lgbm.get_params()['max_depth'])

---

# Table 8b

For this part, we try to increase the performance of the model and then examine their level of interpretability.

In [None]:
y_results = pd.DataFrame()

In [None]:
import Datasets as DS
X_train, X_test, y_train, y_test = gs_helpers.prep_data(DS.loan, binary=False, randomState=random_state,
                                                        testSize=test_size, target='good_bad')

### Logistic Regression (LR)

see above.

### Decision Tree (DT)

In [None]:
pgrid = {'max_depth': [3,5,7,9,11,13,15]}

tc_estimator = DecisionTreeClassifier(criterion='gini', random_state=1)
gcv = GridSearchCV(estimator=tc_estimator, param_grid=pgrid, n_jobs=1, cv=5, verbose=0, refit=True)
gcv_fit = gcv.fit(X_train, y_train)
tc = gcv_fit.best_estimator_

y_results['DT_pred'] = tc.predict(X_test)

gcv_fit.best_estimator_

In [None]:
# Confusion Matrix
cm = pd.crosstab(y_test, y_results['DT_pred'])
print ("Confusion matrix : \n", cm)

# Accuracies
print('\nAccuracy  = %.4f' % accuracy_score(y_test, y_results['DT_pred']))
print('F1 score  = %.4f' % f1_score(y_test, y_results['DT_pred']))
print('MCC score  = %.4f' % matthews_corrcoef(y_test, y_results['DT_pred']))
print('Number of rules = %.0f' % tc.get_n_leaves())
print('Average rule length = %.2f' % average_depth(tc))
print('Average rule length per sample = %.2f' % average_path_length(tc, X_test))

### Random Forest (RF)

In [None]:
pgrid = {'max_depth': [3,5,7,9,11,13,15],
         'n_estimators':[100, 150, 200,250,300]}

rf_estimator = RandomForestClassifier(criterion='gini', random_state=1)
gcv = GridSearchCV(estimator=rf_estimator, param_grid=pgrid, n_jobs=1, cv=5, verbose=1, refit=True)
gcv_fit = gcv.fit(X_train, y_train)
rfc = gcv_fit.best_estimator_

y_results['RF_pred'] = rfc.predict(X_test)

gcv_fit.best_estimator_

In [None]:
# Confusion matrix
cm = pd.crosstab(y_test, y_results['RF_pred'])
print ("Confusion matrix : \n", cm)

n_leaves = 0
for dtc in rfc.estimators_:
    n_leaves += dtc.get_n_leaves()

print('\nAccuracy  = %.4f' % accuracy_score(y_test, y_results['RF_pred']))
print('F1 score  = %.4f' % f1_score(y_test, y_results['RF_pred']))
print('MCC score  = %.4f' % matthews_corrcoef(y_test, y_results['RF_pred']))
print('Number of rules = %.0f' % n_leaves)
print('Average rule length = %.2f' % avg_depth_ensemble(rfc))
print('Average rule length per sample = %.2f' % avg_path_length_ensemble(rfc, X_test))

### AdaBoost (ADA)

In [None]:
pgrid = {'n_estimators':[100,150,200,250,300]}

ada_estimator = AdaBoostClassifier(random_state=1)
gcv = GridSearchCV(estimator=ada_estimator, param_grid=pgrid, n_jobs=1, cv=5, verbose=1, refit=True)
gcv_fit = gcv.fit(X_train, y_train)
ada = gcv_fit.best_estimator_

y_results['ADA_pred'] = ada.predict(X_test)

gcv_fit.best_estimator_

In [None]:
# Confusion matrix
cm = pd.crosstab(y_test, y_results['ADA_pred'])
print ("Confusion matrix : \n", cm)

# n_leaves = sum(tree.tree_.n_leaves for tree in gbc.estimators_.reshape(-1))

print('\nAccuracy  = %.4f' % accuracy_score(y_test, y_results['ADA_pred']))
print('F1 score  = %.4f' % f1_score(y_test, y_results['ADA_pred']))
print('MCC score  = %.4f' % matthews_corrcoef(y_test, y_results['ADA_pred']))
print('Number of rules = %.0f' % (2*ada.n_estimators))

### LightGBM

In [None]:
pgrid = {'max_depth':[3,5,7,9,11,15],
         'n_estimators':[100,150,200,250,300]}

lgbm_estimator = LGBMClassifier(random_state=1)
gcv = GridSearchCV(estimator=lgbm_estimator, param_grid=pgrid, n_jobs=1, cv=5, verbose=1, refit=True)
gcv_fit = gcv.fit(X_train, y_train)
gbc = gcv_fit.best_estimator_

y_results['LightGBM_pred'] = gbc.predict(X_test)

gcv_fit.best_estimator_

In [None]:
# Confusion matrix
cm = pd.crosstab(y_test, y_results['LightGBM_pred'])
print ("Confusion matrix : \n", cm)

n_leaves = sum(tree['num_leaves'] for tree in gcv_fit.best_estimator_._Booster.dump_model()["tree_info"])

print('\nAccuracy  = %.4f' % accuracy_score(y_test, y_results['LightGBM_pred']))
print('F1 score  = %.4f' % f1_score(y_test, y_results['LightGBM_pred']))
print('MCC score  = %.4f' % matthews_corrcoef(y_test, y_results['LightGBM_pred']))
print('Number of rules = %.0f' % n_leaves)
print('Average rule length = %.2f' % gcv_fit.best_estimator_.get_params()['max_depth'])
print('Average number of rules per sample = %.2f' % gcv_fit.best_estimator_.get_params()['n_estimators'])
print('Average rule length per sample = %.2f' % gcv_fit.best_estimator_.get_params()['max_depth'])