# prologue

### set up notebook and load package

In [1]:
# load what we need
import CHIRPS.datasets as ds
import CHIRPS.reproducible as rp
import multiprocessing as mp

  data = yaml.load(f.read()) or {}
  defaults = yaml.load(f)


# data management

### datasets

Any dataset can be turned into a container by invoking the constructor found in the module structures.py, the only mandatory parameters are an object that can be input to pandas.DataFrame and a class column name. Your object needs either its own column names, or these should be passed as a list to the var_names parameter.

Several datasets are available as pre-prepared containers that hold the data and some meta-data that is used in the algorithm. If these pre-packaged sets have more than 10,000 rows then we've included some downsampled versions. You can see what there is by doing:

In [2]:
# ignore the builtins, np, pd, urllib and cfg. The rest are dataset constructors that will load specific datasets.
if False:
    dir(ds)

You can see more information about each dataset with something like this for the adult dataset:

In [3]:
if False:
    print(ds.adult().spiel)

An example of one dataset is given below. Note that the random_state propagates through other functions via the meta_data and is easily updated to allow alternative runs:

In [4]:
if False:
    cardio = ds.cardio(random_state=123, project_dir=project_dir)
    meta_data = cardio.get_meta()
    meta_data['random_state']

The list of datasets used in "CHIRPS: Explaining Random Forest Classification" is:

In [5]:
if False:
    datasets = [
            ds.adult,
            ds.bankmark,
            ds.car,
            ds.cardio,
            ds.credit,
            ds.german,
            ds.lending_tiny_samp,
            ds.nursery,
            ds.rcdv,
           ]

The list of datasets used in "Explaining Multi-class AdaBoost Classification in Medical Applications" is:

In [6]:
if False:
    datasets = [
                ds.breast,
                ds.cardio,
                ds.diaretino,
                ds.heart,
                ds.mhtech14,
                ds.mh2tech16,
                ds.readmit,
                ds.thyroid,
                dsp.usoc2,
               ]

### Standardising train-test splitting across environments
To compare with methods in different environments, as we did in "CHIRPS: Explaining Random Forest Classification," you can maintain the same dataset splits. The train test data is split with the one-time random_state_splits and the splits are saved to csv in the project folders. This way you can do same model with different data splits, or same data splits with different model. This is what's happening behind the scenes:

In [7]:
##### do not run this here
# train_index, test_index = mydata.get_tt_split_idx(random_state=random_state_splits)
# tt = mydata.tt_split(train_index, test_index)

All you need here is the following, to take the same train_index, test_index numbers to an external .csv file

# Experimental Runs
From here on, everything is automated behind the scenes to perform runs across several datasets and comparing different algorithms with CHIRPS. If you want to run CHIRPS and control the parameters and view results and performance directly, look at the CHIRPS Examples Notebook.

In [8]:
# do the model tuning from scratch?
override_tuning = False

# Optional Memory and Computation Cost Management
# CHIRPS is time economical but memory intensive to compute for lots of instances at once.
forest_walk_async=True
chirps_explanation_async=False
n_cores = mp.cpu_count()-6

# How many instances from the test set do you want to explain?
# A number bigger than the test set will be interpreted as 'all'
n_instances = 10
start_instance = 0 # here can opt to start at a specific instance, diagnostic if something crashes

# CHOOSE ONE
model = 'RandomForest'
# model = 'AdaBoost1' # SAMME
# model = 'AdaBoost2' # SAMME.R
# model = 'GBM'

# CHOOSE ONE OR MORE
do_CHIRPS = True
do_Anchors = True
do_dfrgTrs = False
do_lore = False

# list the dataset constructors you want to include
datasets = [
#         ds.adult,
#         ds.bankmark,
        ds.car,
#         ds.cardio,
#         ds.credit,
#         ds.german,
#         ds.lending_tiny_samp,
#         ds.nursery,
#        ds.rcdv,
       ]
# datasets = [ds.cervicalh] # here can opt for just one but it must be a list, e.g. datasets = [datasets[0]]

# location to save results
# project_dir = '/datadisk/whiteboxing/benchmarks'
project_dir = 'V:\\whiteboxing\\tests' # defaults to a directory "whiteboxing" in the working directory
# project_dir = 'C:\\Users\\Crutt\\Documents\\whiteboxing\\tests'

# set the random_state for various tasks. This is not the same as random.seed(), not system wide.
random_state_splits = 123 # change this if you want to try different splits of the data into test / train
random_state_rf = 123 # change this if you want to try with different forest construction
random_state_exp = 123 # change this if you want to try with different runs of the explainer algorithm (affects bootstrap eval)

# To standardise train-test splitting across environments - writes external files
rp.export_data_splits(datasets=datasets, project_dir=project_dir, random_state_splits=random_state_splits)

# How much messaging to print to the screen?
verbose = True

# CHIRPS default parameters - see papers for details
merging_bootstraps = 20 # how many training bootstraps to test improvement in growing rule?
pruning_bootstraps = 20 # how many training bootstraps to test deterioration in pruning rule?
delta = 0.1 # pruning deterioration tolerance paramater

# this is here if you want to pass parameters to the methods
def benchmark_wrapper(do_CHIRPS=do_CHIRPS, do_Anchors=do_Anchors, do_dfrgTrs=do_dfrgTrs, do_lore=do_lore, **control):

    if do_CHIRPS:
        rp.do_benchmarking(benchmark_items, verbose, **control)
    
    if do_Anchors:
        control.update({'method' : 'Anchors'})
        rp.do_benchmarking(benchmark_items, verbose, **control)
    if do_dfrgTrs:
        control.update({'method' : 'defragTrees',
                    'Kmax' : 10, 'restart' : 100, 'maxitr' : 20})
        rp.do_benchmarking(benchmark_items, verbose, **control)

    if do_lore:
        control.update({'method' : 'lore'})
        rp.do_benchmarking(benchmark_items, verbose, **control)

# this is here to pass parameters to the model training and further parameters to CHIRPS
tuning = {'override' : override_tuning}
if model == 'RandomForest':
    tuning.update({'grid' : {'n_estimators': [(i + 1) * 200 for i in range(8)],
                            'max_depth' : [32]}})
    benchmark_items = rp.benchmarking_prep(datasets, model, tuning, project_dir,
                                           random_state=random_state_rf,
                                           random_state_splits=random_state_splits,
                                           do_raw=True, do_discretise=do_Anchors,
                                           start_instance=start_instance,
                                           verbose=verbose, n_cores=n_cores)

    kwargs = {'support_paths' : 0.1, 'alpha_paths' : 0.9, 'disc_path_bins' : 4,
             'score_func' : 3, 'weighting' : 'kldiv',
             'merging_bootstraps' : merging_bootstraps,
             'pruning_bootstraps' : pruning_bootstraps, 'delta' : delta}
 
    control = {'method' : 'CHIRPS', 'model' : model,
                'n_instances' : n_instances,
                'random_state' : random_state_exp,
                'kwargs' : kwargs,
                'forest_walk_async' : forest_walk_async,
                'chirps_explanation_async' : chirps_explanation_async,
                'n_cores' : n_cores}
    
    benchmark_wrapper(do_CHIRPS, do_Anchors, do_dfrgTrs, do_lore, **control)
    
elif model == "AdaBoost1":
    algo = 'SAMME'
    max_depth = [i for i in range(1, 5)]
    tuning.update({'grid' : {'base_estimator' : [rp.DecisionTreeClassifier(max_depth=d) for d in max_depth],
                            'n_estimators': [(i + 1) * 200 for i in range(8)], 'algorithm': [algo]}})
    benchmark_items = rp.benchmarking_prep(datasets, model, tuning, project_dir,
                                       random_state=random_state_rf,
                                       random_state_splits=random_state_splits,
                                       do_raw=True, do_discretise=do_Anchors,
                                       start_instance=start_instance,
                                       verbose=verbose, n_cores=n_cores)
    
    kwargs = {'paths_lengths_threshold' : 5,
             'support_paths' : 0.1, 'alpha_paths' : 0.0,
             'disc_path_bins' : 4, 'disc_path_eqcounts' : True,
             'score_func' : 1, 'weighting' : 'kldiv',
             'merging_bootstraps' : merging_bootstraps,
             'pruning_bootstraps' : pruning_bootstraps, 'delta' : delta}
 
    control = {'method' : 'CHIRPS', 'model' : model,
                'n_instances' : n_instances,
                'random_state' : random_state_exp,
                'kwargs' : kwargs,
                'forest_walk_async' : forest_walk_async,
                'chirps_explanation_async' : chirps_explanation_async,
                'n_cores' : n_cores}
    
    benchmark_wrapper(do_CHIRPS, do_Anchors, do_dfrgTrs, do_lore, **control)
    
elif model == 'AdaBoost2':
    algo = 'SAMME.R'
    max_depth = [i for i in range(1, 5)]
    tuning.update({'grid' : {'base_estimator' : [rp.DecisionTreeClassifier(max_depth=d) for d in max_depth],
                            'n_estimators': [(i + 1) * 200 for i in range(8)], 'algorithm': [algo]}})
    benchmark_items = rp.benchmarking_prep(datasets, model, tuning, project_dir,
                                       random_state=random_state_rf,
                                       random_state_splits=random_state_splits,
                                       do_raw=True, do_discretise=do_Anchors,
                                       start_instance=start_instance,
                                       verbose=verbose, n_cores=n_cores)
    
    kwargs = {'paths_lengths_threshold' : 5,
                 'support_paths' : 0.01, 'alpha_paths' : 0.0,
                 'disc_path_bins' : 8, 'disc_path_eqcounts' : True,
                 'score_func' : 1, 'weighting' : 'kldiv',
                 'merging_bootstraps' : merging_bootstraps,
                 'pruning_bootstraps' : pruning_bootstraps, 'delta' : delta}
    
    control = {'method' : 'CHIRPS', 'model' : model,
                'n_instances' : n_instances,
                'random_state' : random_state_exp,
                'kwargs' : kwargs,
                'forest_walk_async' : forest_walk_async,
                'chirps_explanation_async' : chirps_explanation_async,
                'n_cores' : n_cores}
    
    benchmark_wrapper(do_CHIRPS, do_Anchors, do_dfrgTrs, do_lore, **control)
    

    
else: # GBM - not fully implemented yet
    benchmark_items = rp.benchmarking_prep(datasets, model, tuning, project_dir,
                                           random_state=123, random_state_splits=123,
                                           start_instance=start_instance, verbose=verbose)

Exported train-test data for 1 datasets.
Preprocessing cardio data and model for cardio with random state = 123
Split data into main train-test and build forest
Train main model
using previous tuning parameters
Best OOB Accuracy Estimate during tuning: 0.9409
Best parameters:{'max_depth': 32.0, 'n_estimators': 600, 'oob_score': True, 'random_state': 123}

Discretise data and train model, e.g. for Anchors
using previous tuning parameters
Best OOB Accuracy Estimate during tuning: 0.9409
Best parameters:{'max_depth': 32.0, 'n_estimators': 600, 'oob_score': True, 'random_state': 123}


Beginning benchmark for cardio data.
Hyper-parameter settings: {'support_paths': 0.1, 'alpha_paths': 0.9, 'disc_path_bins': 4, 'score_func': 3, 'weighting': 'kldiv', 'merging_bootstraps': 20, 'pruning_bootstraps': 20, 'delta': 0.1}

Prepare Unseen Data and Predictions for CHIRPS benchmark
Walking forest for 10 instances... (please wait)
Forest Walk with async = True
Forest Walk time elapsed: 1.3625 seconds



  np.histogram(lowers, lower_bins)[0]).round(5) # can result in nans
  np.histogram(uppers, upper_bins)[0]).round(5) # can result in nans if no value falls into bin


found 43 patterns from 592 for batch_idx 0
start score sort for batch_idx 0 (43) patterns
start merge rule for batch_idx 0 (43) patterns
[('AC', False, 0.00174), ('MSTV', False, 0.58735)]
indicative:
0.9764216366158114 0.4607552335862187 0.10192131688733597 0.12312892322549493
merge complete for batch_idx 0 (43) patterns
start get explainer for batch_idx 0
as_chirps for batch_idx 1
start mining for batch_idx 1


  np.histogram(lowers, lower_bins)[0]).round(5) # can result in nans
  np.histogram(uppers, upper_bins)[0]).round(5) # can result in nans if no value falls into bin


found 39 patterns from 597 for batch_idx 1
start score sort for batch_idx 1 (39) patterns
start merge rule for batch_idx 1 (39) patterns
[('AC', False, 0.00176), ('MSTV', False, 0.57726)]
indicative:
0.9764216366158114 0.4607552335862187 0.1382463626659789 0.1327568667344863
merge complete for batch_idx 1 (39) patterns
start get explainer for batch_idx 1
as_chirps for batch_idx 2
start mining for batch_idx 2
found 12 patterns from 559 for batch_idx 2
start score sort for batch_idx 2 (12) patterns
start merge rule for batch_idx 2 (12) patterns


  np.histogram(lowers, lower_bins)[0]).round(5) # can result in nans
  np.histogram(uppers, upper_bins)[0]).round(5) # can result in nans if no value falls into bin


[('Mean', True, 102.82)]
indicative:
0.9074074074074074 0.034132368003102015 0.40863566083040026 0.22249388753056235
merge complete for batch_idx 2 (12) patterns
start get explainer for batch_idx 2
as_chirps for batch_idx 3
start mining for batch_idx 3


  np.histogram(lowers, lower_bins)[0]).round(5) # can result in nans
  np.histogram(uppers, upper_bins)[0]).round(5) # can result in nans if no value falls into bin


found 49 patterns from 594 for batch_idx 3
start score sort for batch_idx 3 (49) patterns
start merge rule for batch_idx 3 (49) patterns
[('AC', False, 0.00174)]
indicative:
0.976878612716763 0.44354897854413 0.11530677305348896 0.08281715989905943
merge complete for batch_idx 3 (49) patterns
start get explainer for batch_idx 3
as_chirps for batch_idx 4
start mining for batch_idx 4


  np.histogram(lowers, lower_bins)[0]).round(5) # can result in nans
  np.histogram(uppers, upper_bins)[0]).round(5) # can result in nans if no value falls into bin


found 47 patterns from 600 for batch_idx 4
start score sort for batch_idx 4 (47) patterns
start merge rule for batch_idx 4 (47) patterns
[('AC', False, 0.00175)]
indicative:
0.976878612716763 0.44354897854413 0.11959767499257304 0.09363042988971414
merge complete for batch_idx 4 (47) patterns
start get explainer for batch_idx 4
Working on CHIRPS for instance 5 of 10
as_chirps for batch_idx 5
start mining for batch_idx 5


  np.histogram(lowers, lower_bins)[0]).round(5) # can result in nans
  np.histogram(uppers, upper_bins)[0]).round(5) # can result in nans if no value falls into bin


found 39 patterns from 560 for batch_idx 5
start score sort for batch_idx 5 (39) patterns
start merge rule for batch_idx 5 (39) patterns
[('AC', False, 0.00178), ('MSTV', False, 0.57037)]
indicative:
0.9763560500695411 0.4594717928241401 0.1503684339158464 0.15716272600834494
merge complete for batch_idx 5 (39) patterns
start get explainer for batch_idx 5
as_chirps for batch_idx 6
start mining for batch_idx 6


  np.histogram(lowers, lower_bins)[0]).round(5) # can result in nans
  np.histogram(uppers, upper_bins)[0]).round(5) # can result in nans if no value falls into bin


found 45 patterns from 494 for batch_idx 6
start score sort for batch_idx 6 (45) patterns
start merge rule for batch_idx 6 (45) patterns
[('ASTV', True, 59.50549), ('Median', True, 150.63514)]
indicative:
0.9614921780986763 0.5060423576195637 0.08572942777888107 0.11485897063099737
merge complete for batch_idx 6 (45) patterns
start get explainer for batch_idx 6
as_chirps for batch_idx 7
start mining for batch_idx 7


  np.histogram(lowers, lower_bins)[0]).round(5) # can result in nans
  np.histogram(uppers, upper_bins)[0]).round(5) # can result in nans if no value falls into bin


found 42 patterns from 597 for batch_idx 7
start score sort for batch_idx 7 (42) patterns
start merge rule for batch_idx 7 (42) patterns
[('AC', False, 0.00175)]
indicative:
0.976878612716763 0.44354897854413 0.13740259469989544 0.05701976685250887
merge complete for batch_idx 7 (42) patterns
start get explainer for batch_idx 7
as_chirps for batch_idx 8
start mining for batch_idx 8


  np.histogram(lowers, lower_bins)[0]).round(5) # can result in nans
  np.histogram(uppers, upper_bins)[0]).round(5) # can result in nans if no value falls into bin


found 53 patterns from 593 for batch_idx 8
start score sort for batch_idx 8 (53) patterns
start merge rule for batch_idx 8 (53) patterns
[('AC', False, 0.00178), ('MSTV', False, 0.57959)]
indicative:
0.9763560500695411 0.4594717928241401 0.08032183721075875 0.10690067810131632
merge complete for batch_idx 8 (53) patterns
start get explainer for batch_idx 8
as_chirps for batch_idx 9
start mining for batch_idx 9


  np.histogram(lowers, lower_bins)[0]).round(5) # can result in nans
  np.histogram(uppers, upper_bins)[0]).round(5) # can result in nans if no value falls into bin


found 46 patterns from 566 for batch_idx 9
start score sort for batch_idx 9 (46) patterns
start merge rule for batch_idx 9 (46) patterns
[('ALTV', True, 68.9819), ('ALTV', False, 7.18905), ('ASTV', False, 59.38818), ('ASTV', True, 79.51869), ('MSTV', True, 0.5411)]
indicative:
0.9354838709677419 0.03951329364330157 0.4712724238829725 0.44491625383345135
merge complete for batch_idx 9 (46) patterns
start get explainer for batch_idx 9
CHIRPS time elapsed: 6.0048 seconds
CHIRPS with async = False

Evaluating found explanations
Results saved to V:\whiteboxing\tests\cardio\RandomForest_CHIRPS_rnst_123
CHIRPS batch results eval time elapsed: 2.5161 seconds

Prepare Unseen Data and Predictions for Anchors benchmark
Running Anchors on each instance and collecting results
0: Working on Anchors for instance 63


In [9]:
stop

NameError: name 'stop' is not defined

## Sensitivity Analysis

In [None]:
# load what we need
import numpy as np
import CHIRPS.datasets as ds
import CHIRPS.reproducible as rp

# prepare data
datasets = [
            ds.adult_small_samp,
            ds.bankmark_samp,
            ds.car,
            ds.cardio,
            ds.credit,
            ds.german,
            ds.lending_tiny_samp,
            ds.nursery_samp,
            ds.rcdv_samp
           ]
# datasets = [datasets[0]] # for testing can just choose one


# CHIRPS default set up
merging_bootstraps = 20
pruning_bootstraps = 20
delta = 0.1 # prune rule terms if loss of precision no greater than delta

forest_walk_async=True
chirps_explanation_async=True

n_instances = 1000

# model = 'RandomForest'
# model = 'AdaBoost1'
# model = 'AdaBoost2'
# model = 'GBM'

do_Anchors = False
do_dfrgTrs = False

# here can opt for just one, e.g. [datasets[0]] (as an iterator)
start_instance = 0 # here can opt to start at a specific instance

project_dir = 'V:\\whiteboxing\\tests' # defaults to a directory "whiteboxing" in the working directory
# project_dir = 'C:\\Users\\Crutt\\Documents\\whiteboxing\\tests'
# project_dir = '/datadisk/whiteboxing'
random_state_splits = 123 # change this if you want to try different splits of the data into test / train
random_state_rf = 123 # change this if you want to try with different forest construction
random_state_exp = 123 # change this if you want to try with different runs of the explainer algorithm (affects bootstrap eval)

verbose = True

tuning = {'grid' : None, 'override' : False}

# for troubleshooting, can isolate an instance and do like
# bi_copy['cardio']['main']['ds_container'].current_row_test = 110

if model == 'RandomForest':
    tuning = {'grid' : {'n_estimators': [(i + 1) * 200 for i in range(8)], 'max_depth' : [32]},
          'override' : override_tuning}
    benchmark_items = rp.benchmarking_prep(datasets, model, tuning, project_dir,
                                           random_state=random_state_rf,
                                           random_state_splits=random_state_splits,
                                           start_instance=start_instance, verbose=verbose)

    alpha_paths = np.tile([0.9, 0.5, 0.1], 24)
    disc_path_bins = np.tile(np.repeat([4, 8], 3), 12)
    score_func = np.tile(np.repeat([5, 3, 1], 6), 4)
    support_paths = np.tile(np.repeat([0.1, 0.05], 18), 2)
    weighting = np.repeat(['chisq', 'nothing'], 36)

    kwargs_grid = {k : {'alpha_paths' : ap, 'disc_path_bins' : dpb, 'disc_path_eqcounts' : False,
                        'score_func' : sf, 'weighting' : w, 'support_paths' : sp,
                        'merging_bootstraps' : merging_bootstraps,
                        'pruning_bootstraps' : pruning_bootstraps,
                        'which_trees' : 'majority',
                        'delta' : delta} 
        for k, ap, dpb, sf, w, sp in zip(range(72), alpha_paths, disc_path_bins, score_func, weighting, support_paths)}
    
    
    for kwargs in kwargs_grid:
        bi_copy = rp.deepcopy(benchmark_items) # to avoid running down the internal counters
        control = {'method' : 'CHIRPS', 'model' : model,
                    'n_instances' : n_instances,
                    'random_state' : random_state_exp,
                    'kwargs' : kwargs_grid[kwargs],
                    'forest_walk_async' : forest_walk_async,
                    'chirps_explanation_async' : chirps_explanation_async,
                    'save_sensitivity_path' : 'rf_sensitivity'}

        rp.do_benchmarking(bi_copy, verbose, **control)
        
elif model in ('AdaBoost1', 'AdaBoost2'):
    if model == 'AdaBoost1':
        algo = 'SAMME'
        save_sensitivity_path = 'ada1_sensitivity'
        support_paths = np.tile(np.tile([0.05, 0.02, 0.01], 16), 2)
        n_kwargs = 36
    else:
        algo = 'SAMME.R'
        save_sensitivity_path = 'ada2_sensitivity'
        support_paths = np.tile(np.tile([0.005, 0.002, 0.001], 16), 2)
        n_kwargs = 72
        
    max_depth = [i for i in range(1, 5)]
    tuning.update({'grid' : {'base_estimator' : [rp.DecisionTreeClassifier(max_depth=d) for d in max_depth],
                            'n_estimators': [(i + 1) * 200 for i in range(8)], 'algorithm': [algo]}})
    benchmark_items = rp.benchmarking_prep(datasets, model, tuning, project_dir,
                                           random_state=123, random_state_splits=123,
                                           start_instance=start_instance, verbose=verbose)
    
    disc_path_bins = np.tile(np.tile(np.repeat([4, 8], 3), 6), 2)
    disc_path_eqcounts = np.tile(np.tile(np.repeat([True, False], 6), 6), 2)
    weighting = np.tile(np.repeat(['chisq', 'kldiv', 'nothing'], 12), 2)
    which_trees = np.repeat(['majority', 'conf_weighted'], 36)

    kwargs_grid = {k : {'paths_lengths_threshold' : 5, 'alpha_paths' : 0.0,
                        'disc_path_bins' : dpb, 'disc_path_eqcounts' : dpeq,
                        'score_func' : 1, 'weighting' : w, 'support_paths' : sp,
                        'merging_bootstraps' : merging_bootstraps,
                        'pruning_bootstraps' : pruning_bootstraps,
                        'which_trees' : wchtr,
                        'delta' : delta} 
    for k, dpb, dpeq, w, sp, wchtr \
                   in zip(range(n_kwargs), disc_path_bins, disc_path_eqcounts, weighting, support_paths, which_trees)}

    for kwargs in kwargs_grid: # range(24, 36): #
        bi_copy = rp.deepcopy(benchmark_items) # to avoid running down the internal counters
        control = {'method' : 'CHIRPS', 'model' : model,
                    'n_instances' : n_instances,
                    'random_state' : random_state_exp,
                    'kwargs' : kwargs_grid[kwargs],
                    'forest_walk_async' : forest_walk_async,
                    'chirps_explanation_async' : chirps_explanation_async,
                    'save_sensitivity_path' : save_sensitivity_path}
 
        rp.do_benchmarking(bi_copy, verbose, **control)