# A decision support system for explainability techniques

In [1]:
from ciu import determine_ciu
from sklearn.cluster import DBSCAN
from sklearn.cluster import KMeans
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.metrics import pairwise_distances
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
import numpy as np
import os
import pandas as pd
import warnings
import random
import time
import lime
import lime.lime_tabular
import tqdm
from anchor import anchor_tabular
import anchor_utils
import metrics_rules
import metrics
from pymfe.mfe import MFE

import six
import sys
sys.modules['sklearn.externals.six'] = six
from skrules import SkopeRules

warnings.filterwarnings('ignore')

## Reading in the datasets, preprocessing

In [2]:
# Define the path to the datasets folder
#datasets_folder = "../datasets"
datasets_folder = "datasets"

# Initialize empty lists to store dataframes for each file
folder_names = []
attribute_names_list = []
categorical_indicator_list = []
X_list = []
y_list = []

# Loop through each folder in the datasets folder
for folder_name in os.listdir(datasets_folder):
    folder_path = os.path.join(datasets_folder, folder_name)
    
    # Check if it's a directory
    if os.path.isdir(folder_path):
        # Construct file paths for each CSV file in the folder
        attribute_names_path = os.path.join(folder_path, "attribute_names.csv")
        categorical_indicator_path = os.path.join(folder_path, "categorical_indicator.csv")
        X_path = os.path.join(folder_path, "X.csv")
        y_path = os.path.join(folder_path, "y.csv")
        
        # Read each CSV file into a pandas dataframe
        attribute_names_df = pd.read_csv(attribute_names_path)
        categorical_indicator_df = pd.read_csv(categorical_indicator_path)
        X_df = pd.read_csv(X_path)
        y_df = pd.read_csv(y_path)
        
        # Append dataframes to the lists
        attribute_names_list.append(attribute_names_df)
        categorical_indicator_list.append(categorical_indicator_df)
        X_list.append(X_df)
        y_list.append(y_df)

        # Save folder name to list
        folder_names.append(folder_name)

In [3]:
nr_of_rows = []

for i in range(len(X_list)):
    nr_of_rows.append(len(X_list[i]))

print(f"Min number of rows: {min(nr_of_rows)}")
print(f"Max number of rows: {max(nr_of_rows)}")

Min number of rows: 500
Max number of rows: 20000


## Functions for explaining instances

In [4]:
# CIU
def exp_fn_ciu(xtest):
    exp1 = []
    for i in range(len(xtest)):
        exp = determine_ciu(X_test.iloc[i:i+1], model.predict_proba, X_train.to_dict('list'), samples = 1000, prediction_index = 1)
        exp_list = [[feat_list.index(i), exp.ci[i]] for i in exp.ci]
        exp1.append(exp_list)
    return np.array(exp1)

# LIME
def exp_fn_lime(xtest, exp_fn):
    exp1 = []
    for i in tqdm.tqdm(range(len(xtest))):
        exp = exp_fn2(i)
        exp1.append(exp.as_map()[exp.available_labels()[0]])
    return np.array(exp1)

# ANCHOR
def exp_fn_anchor(xtest):
    exp1 = []
    for i in tqdm.tqdm(range(len(xtest))):
        exp = anchor_explainer.explain_instance(X_test.values[i], model.predict, threshold=0.95)
        exp_list = [0]*len(X_train.columns)
        for j in exp.features():
            exp_list[j] = 1
        exp1.append(exp_list)
    return np.array(exp1)

## Interpreting instances and calculating quality measures

In [19]:
%%time
nr_of_exp_instances = 14
interpretability_results = pd.DataFrame([])

for i in range(20):
    data_X = X_list[i]
    data_y = y_list[i]
    X_train, X_test, y_train, y_test = train_test_split(data_X, data_y, test_size = 0.2, random_state = 42)
    
    # Calculating meta-features
    mfe = MFE(groups=["general", "statistical"])
    mfe.fit(np.array(data_X), np.array(data_y))
    ft = mfe.extract()
    df = pd.DataFrame(ft, columns = ft[0]).drop(0)

    # Preprocessing data
    le = LabelEncoder()
    le_y = LabelEncoder()
    imputer_cat = SimpleImputer(strategy = 'most_frequent')
    imputer_num = SimpleImputer(strategy = 'mean')
    for col in X_train.columns:
        # Preprocessing categorical columns
        if X_train[col].dtype == 'object':
            le.fit(X_train[col])
            X_train[col] = le.transform(X_train[col])
            X_test[col] = le.transform(X_test[col])
            imputer_cat.fit(X_train[col])
            X_train[col] = imputer_cat.transform(X_train[col])
            X_test[col] = imputer_cat.transform(X_test[col])
        # Preprocessing numerical columns
        else:
            imputer_num.fit(X_train[col].values.reshape(-1, 1))
            X_train[col] = imputer_num.transform(X_train[col].values.reshape(-1, 1))
            X_test[col] = imputer_num.transform(X_test[col].values.reshape(-1, 1))
    for col in y_train.columns:
        # Preprocessing categorical columns
        if y_train[col].dtype == 'object':
            le_y.fit(y_train[col])
            y_train[col] = le_y.transform(y_train[col])
            y_test[col] = le_y.transform(y_test[col])

    random_state = 42
    exp_iter = 10
    random.seed(random_state)
    
    test_x = X_test.values
    n_classes = len(np.unique(y_train))
    class_names = np.unique(y_test)
    feat_list = X_train.columns.tolist()
    X = np.vstack((X_train.values, test_x))

    # Fit GB model
    model = GradientBoostingClassifier()
    model.fit(X_train, y_train)

    # Dictionarys for quality measures results
    df_identity = {}
    df_separability = {}
    df_similarity = {}
    df_time = {}

    
    # ---- CIU - explaining and measuring quality ----
    
    # Explain
    start_time = time.time()
    exp1_ciu = exp_fn_ciu(X_test[:nr_of_exp_instances])
    exp2_ciu = exp_fn_ciu(X_test[:nr_of_exp_instances])
    time_ciu = time.time() - start_time
    
    # Save explanations
    np.save('explanations/' + folder_names[i] + '_ciu1.npy', exp1_ciu)
    np.save('explanations/' + folder_names[i] + '_ciu2.npy', exp2_ciu)
    
    # Quality measures
    identity_ciu = metrics.calc_identity(exp1_ciu, exp2_ciu)[0]
    separability_ciu = metrics.calc_separability(test_x[:nr_of_exp_instances])[3]
    X_test_norm = metrics.normalize_test(X_train, X_test)
    similarity_ciu = metrics.calc_similarity(exp1_ciu, X_test_norm[:nr_of_exp_instances])

    # Save results to dict
    df_identity['ciu'] = identity_ciu
    df_separability['ciu'] = separability_ciu
    df_similarity['ciu'] = time_ciu
    df_time['ciu'] = similarity_ciu

    
    # ---- LIME - explaining and measuring quality ----

    lime_explainer = lime.lime_tabular.LimeTabularExplainer(X_train.values, feature_names = feat_list, class_names=class_names, discretize_continuous=True)
    exp_fn2 = lambda i: lime_explainer.explain_instance(X_test.iloc[i], model.predict_proba, num_features=len(X_test.columns))

    # Explain
    start_time = time.time()
    exp1_lime = exp_fn_lime(test_x[:nr_of_exp_instances], exp_fn2)
    exp2_lime = exp_fn_lime(test_x[:nr_of_exp_instances], exp_fn2)
    time_lime = time.time() - start_time

    # Save explanations
    np.save('explanations/' + folder_names[i] + '_lime1.npy', exp1_lime)
    np.save('explanations/' + folder_names[i] + '_lime2.npy', exp2_lime)

    # Quality measures
    identity_lime = metrics.calc_identity(exp1_lime, exp2_lime)[0]
    separability_lime = metrics.calc_separability(test_x[:nr_of_exp_instances])[3]
    similarity_lime = metrics.calc_similarity(exp1_lime, X_test_norm[:nr_of_exp_instances])

    
    # Save results to dict
    df_identity['lime'] = identity_lime
    df_separability['lime'] = separability_lime
    df_similarity['lime'] = time_lime
    df_time['lime'] = similarity_lime

    
    # ---- ANCHOR - explaining and measuring quality ----

    anchor_explainer = anchor_tabular.AnchorTabularExplainer(
                        np.unique(y_train).tolist(),
                        X_train.columns.tolist(),
                        X_train.values
                        )

    # Explain
    start_time = time.time()
    exp1_anchor = exp_fn_anchor(X_test[:nr_of_exp_instances])
    exp2_anchor = exp_fn_anchor(X_test[:nr_of_exp_instances])
    time_anchor = time.time() - start_time

    # Save explanations
    np.save('explanations/' + folder_names[i] + '_anchor1.npy', exp1_anchor)
    np.save('explanations/' + folder_names[i] + '_anchor2.npy', exp2_anchor)

    # Quality measures
    identity_anchor = metrics_rules.calc_identity_rules(exp1_anchor, exp2_anchor)[0]
    separability_anchor = metrics_rules.calc_separability_rules(exp1_anchor)[3]
    X_test_norm = metrics_rules.normalize_test(X_train, X_test)
    similarity_anchor = metrics_rules.calc_similarity(exp1_anchor, X_test_norm[:nr_of_exp_instances])

    # Save results to dict
    df_identity['anchor'] = identity_anchor
    df_separability['anchor'] = separability_anchor
    df_similarity['anchor'] = time_anchor
    df_time['anchor'] = similarity_anchor


    # ---- RULEFIT - explaining and measuring quality ----

    clf_rulefit = SkopeRules(max_depth_duplication = 2,
                             n_estimators = 100,
                             precision_min = 0.3,
                             recall_min = 0.1,
                             feature_names = X_train.columns.tolist())

    clf_rulefit.fit(X_train, y_train)

    # Explain
    start_time = time.time()
    top_rules1 = clf_rulefit.score_top_rules(X_test[:nr_of_exp_instances])
    top_rules2 = clf_rulefit.score_top_rules(X_test[:nr_of_exp_instances])
    time_rulefit = time.time() - start_time

    # Save explanations
    np.save('explanations/' + folder_names[i] + '_rulefit1.npy', top_rules1)
    np.save('explanations/' + folder_names[i] + '_rulefit2.npy', top_rules2)

    # Quality measures
    identity_rulefit = metrics_rules.calc_identity_rules(top_rules1, top_rules2)[0]
    separability_rulefit = metrics_rules.calc_separability_rules(top_rules1)[3]
    enc_rules = metrics_rules.exp_enc(clf_rulefit, top_rules1)
    similarity_rulefit = metrics_rules.calc_similarity(enc_rules, X_test_norm[:nr_of_exp_instances])

    # Save results to dict
    df_identity['rulefit'] = identity_rulefit
    df_separability['rulefit'] = separability_rulefit
    df_similarity['rulefit'] = time_rulefit
    df_time['rulefit'] = similarity_rulefit


    # ---- Selecting the best techniques based on quality measures ----

    best_identity = max(df_identity, key=df_identity.get)
    best_separability = max(df_separability, key=df_separability.get)
    best_similarity = max(df_similarity, key=df_similarity.get)
    best_time = max(df_time, key=df_time.get)

    # Save results to dataframe
    df['best_identity'] = best_identity
    df['best_separability'] = best_separability
    df['best_similarity'] = best_similarity
    df['best_time'] = best_time

    interpretability_results = pd.concat([interpretability_results, df])

100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [00:00<00:00, 23.44it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [00:00<00:00, 23.25it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [00:01<00:00,  9.07it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [00:01<00:00,  8.84it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [00:01<00:00, 10.35it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [00:01<00:00, 10.37it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [00:09<00:00,  1.51it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [00:09<00:00,  1.50it/s]
100%|███████████████████████████████████

ValueError: Expected 2D array, got 1D array instead:
array=[0.9982695  0.98823586 0.05264847 0.01438141].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [20]:
interpretability_results

Unnamed: 0,attr_to_inst,can_cor.mean,can_cor.sd,cat_to_num,cor.mean,cor.sd,cov.mean,cov.sd,eigenvalues.mean,eigenvalues.sd,...,sparsity.sd,t_mean.mean,t_mean.sd,var.mean,var.sd,w_lambda,best_identity,best_separability,best_similarity,best_time
1,0.000322,0.528956,,0.0,0.15054,0.223382,23036992346.414856,72798303992.9171,121134480488.22282,234111523206.04208,...,0.223514,169541.676209,253212.170859,121134480488.22284,168245420225.65167,0.720205,lime,anchor,anchor,rulefit
1,0.025377,0.568092,,0.0,0.350977,0.243944,469154.392907,8762157.949538,106280594.705627,646390558.822428,...,0.086522,142.832454,733.315812,106280594.705627,644186452.5723,0.677272,lime,anchor,anchor,ciu
1,0.023672,0.444716,,0.0,0.405831,0.300664,12768345.744952,278742719.083171,3478069177.695143,21155909590.126247,...,0.021614,154.927167,789.092552,3478069177.695143,21087816087.78045,0.802228,ciu,rulefit,anchor,anchor
1,0.001929,,,0.0,,,,,,,...,0.008187,212.247493,814.302136,,,,lime,rulefit,anchor,anchor
1,0.04023,0.598582,,0.0,0.775321,0.213983,5460267.115497,60240853.146294,613850878.368749,2812809834.503192,...,0.027386,128.417255,479.447053,613850878.368749,2799183414.518701,0.641699,lime,rulefit,anchor,anchor
1,0.009957,0.467723,,0.0,0.720347,0.221164,145365.769673,1304494.7814,14549820.294238,66667325.946777,...,0.020748,42.756092,144.029936,14549820.294238,66396708.690128,0.781235,lime,rulefit,anchor,anchor
1,0.018936,0.386223,,0.0,0.632733,0.227349,9113908.170039,112468708.576845,1391021745.093177,6374271207.053247,...,0.012391,266.343759,1033.984479,1391021745.093177,6353362935.781049,0.850831,ciu,rulefit,anchor,anchor
1,0.0064,0.423927,0.56381,0.0,0.0,0.0,0.0,0.0,2.003205,0.0,...,0.0,3.0,0.0,2.003205,0.0,0.323122,lime,rulefit,anchor,rulefit
1,0.000526,0.572295,,0.0,0.278781,0.272877,152.216247,305.82708,1493.801871,2166.19809,...,6.2e-05,29.270102,58.628622,1493.801871,1883.135357,0.672478,lime,rulefit,anchor,anchor
1,0.000685,0.303627,0.218076,0.0,0.256451,0.16084,24.699359,28.692798,104.816879,119.529302,...,0.043015,14.296274,13.098502,104.816879,67.292901,0.327596,lime,rulefit,anchor,anchor


In [18]:
interpretability_results.to_csv('interpretability_results.csv')

## Interpreting instances and calculating quality measures (without similarity)

In [41]:
%%time
nr_of_exp_instances = 25
interpretability_results2 = pd.DataFrame([])

for i in range(15):
    data_X = X_list[i]
    data_y = y_list[i]
    X_train, X_test, y_train, y_test = train_test_split(data_X, data_y, test_size = 0.2, random_state = 42)
    
    # Calculating meta-features
    mfe = MFE(groups=["general", "statistical"])
    mfe.fit(np.array(data_X), np.array(data_y))
    ft = mfe.extract()
    df = pd.DataFrame(ft, columns = ft[0]).drop(0)

    # Preprocessing data
    le = LabelEncoder()
    le_y = LabelEncoder()
    imputer_cat = SimpleImputer(strategy = 'most_frequent')
    imputer_num = SimpleImputer(strategy = 'mean')
    for col in X_train.columns:
        # Preprocessing categorical columns
        if X_train[col].dtype == 'object':
            le.fit(X_train[col])
            X_train[col] = le.transform(X_train[col])
            X_test[col] = le.transform(X_test[col])
            imputer_cat.fit(X_train[col])
            X_train[col] = imputer_cat.transform(X_train[col])
            X_test[col] = imputer_cat.transform(X_test[col])
        # Preprocessing numerical columns
        else:
            imputer_num.fit(X_train[col].values.reshape(-1, 1))
            X_train[col] = imputer_num.transform(X_train[col].values.reshape(-1, 1))
            X_test[col] = imputer_num.transform(X_test[col].values.reshape(-1, 1))
    for col in y_train.columns:
        # Preprocessing categorical columns
        if y_train[col].dtype == 'object':
            le_y.fit(y_train[col])
            y_train[col] = le_y.transform(y_train[col])
            y_test[col] = le_y.transform(y_test[col])

    random_state = 42
    exp_iter = 10
    random.seed(random_state)
    
    test_x = X_test.values
    n_classes = len(np.unique(y_train))
    class_names = np.unique(y_test)
    feat_list = X_train.columns.tolist()
    X = np.vstack((X_train.values, test_x))

    # Fit GB model
    model = GradientBoostingClassifier()
    model.fit(X_train, y_train)

    # Dictionarys for quality measures results
    df_identity = {}
    df_separability = {}
    #df_similarity = {}
    df_time = {}

    
    # ---- CIU - explaining and measuring quality ----
    
    # Explain
    start_time = time.time()
    exp1_ciu = exp_fn_ciu(X_test[:nr_of_exp_instances])
    exp2_ciu = exp_fn_ciu(X_test[:nr_of_exp_instances])
    time_ciu = time.time() - start_time
    
    # Save explanations
    np.save('explanations2/' + folder_names[i] + '_ciu1.npy', exp1_ciu)
    np.save('explanations2/' + folder_names[i] + '_ciu2.npy', exp2_ciu)
    
    # Quality measures
    identity_ciu = metrics.calc_identity(exp1_ciu, exp2_ciu)[0]
    separability_ciu = metrics.calc_separability(test_x[:nr_of_exp_instances])[3]
    #X_test_norm = metrics.normalize_test(X_train, X_test)
    #similarity_ciu = metrics.calc_similarity(exp1_ciu, X_test_norm[:nr_of_exp_instances])

    # Save results to dict
    df_identity['ciu'] = identity_ciu
    df_separability['ciu'] = separability_ciu
    #df_similarity['ciu'] = time_ciu
    df_time['ciu'] = similarity_ciu

    
    # ---- LIME - explaining and measuring quality ----

    lime_explainer = lime.lime_tabular.LimeTabularExplainer(X_train.values, feature_names = feat_list, class_names=class_names, discretize_continuous=True)
    exp_fn2 = lambda i: lime_explainer.explain_instance(X_test.iloc[i], model.predict_proba, num_features=len(X_test.columns))

    # Explain
    start_time = time.time()
    exp1_lime = exp_fn_lime(test_x[:nr_of_exp_instances], exp_fn2)
    exp2_lime = exp_fn_lime(test_x[:nr_of_exp_instances], exp_fn2)
    time_lime = time.time() - start_time

    # Save explanations
    np.save('explanations2/' + folder_names[i] + '_lime1.npy', exp1_lime)
    np.save('explanations2/' + folder_names[i] + '_lime2.npy', exp2_lime)

    # Quality measures
    identity_lime = metrics.calc_identity(exp1_lime, exp2_lime)[0]
    separability_lime = metrics.calc_separability(test_x[:nr_of_exp_instances])[3]
    #similarity_lime = metrics.calc_similarity(exp1_lime, X_test_norm[:nr_of_exp_instances])

    
    # Save results to dict
    df_identity['lime'] = identity_lime
    df_separability['lime'] = separability_lime
    #df_similarity['lime'] = time_lime
    df_time['lime'] = similarity_lime

    
    # ---- ANCHOR - explaining and measuring quality ----

    anchor_explainer = anchor_tabular.AnchorTabularExplainer(
                        np.unique(y_train).tolist(),
                        X_train.columns.tolist(),
                        X_train.values
                        )

    # Explain
    start_time = time.time()
    exp1_anchor = exp_fn_anchor(X_test[:nr_of_exp_instances])
    exp2_anchor = exp_fn_anchor(X_test[:nr_of_exp_instances])
    time_anchor = time.time() - start_time

    # Save explanations
    np.save('explanations2/' + folder_names[i] + '_anchor1.npy', exp1_anchor)
    np.save('explanations2/' + folder_names[i] + '_anchor2.npy', exp2_anchor)

    # Quality measures
    identity_anchor = metrics_rules.calc_identity_rules(exp1_anchor, exp2_anchor)[0]
    separability_anchor = metrics_rules.calc_separability_rules(exp1_anchor)[3]
    #X_test_norm = metrics_rules.normalize_test(X_train, X_test)
    #similarity_anchor = metrics_rules.calc_similarity(exp1_anchor, X_test_norm[:nr_of_exp_instances])

    # Save results to dict
    df_identity['anchor'] = identity_anchor
    df_separability['anchor'] = separability_anchor
    #df_similarity['anchor'] = time_anchor
    df_time['anchor'] = similarity_anchor


    # ---- RULEFIT - explaining and measuring quality ----

    clf_rulefit = SkopeRules(max_depth_duplication = 2,
                             n_estimators = 100,
                             precision_min = 0.3,
                             recall_min = 0.1,
                             feature_names = X_train.columns.tolist())

    clf_rulefit.fit(X_train, y_train)

    # Explain
    start_time = time.time()
    top_rules1 = clf_rulefit.score_top_rules(X_test[:nr_of_exp_instances])
    top_rules2 = clf_rulefit.score_top_rules(X_test[:nr_of_exp_instances])
    time_rulefit = time.time() - start_time

    # Save explanations
    np.save('explanations2/' + folder_names[i] + '_rulefit1.npy', top_rules1)
    np.save('explanations2/' + folder_names[i] + '_rulefit2.npy', top_rules2)

    # Quality measures
    identity_rulefit = metrics_rules.calc_identity_rules(top_rules1, top_rules2)[0]
    separability_rulefit = metrics_rules.calc_separability_rules(top_rules1)[3]
    #enc_rules = metrics_rules.exp_enc(clf_rulefit, top_rules1)
    #similarity_rulefit = metrics_rules.calc_similarity(enc_rules, X_test_norm[:nr_of_exp_instances])

    # Save results to dict
    df_identity['rulefit'] = identity_rulefit
    df_separability['rulefit'] = separability_rulefit
    #df_similarity['rulefit'] = time_rulefit
    df_time['rulefit'] = similarity_rulefit


    # ---- Selecting the best techniques based on quality measures ----

    best_identity = max(df_identity, key=df_identity.get)
    best_separability = max(df_separability, key=df_separability.get)
    #best_similarity = max(df_similarity, key=df_similarity.get)
    best_time = max(df_time, key=df_time.get)

    # Save results to dataframe
    df['best_identity'] = best_identity
    df['best_separability'] = best_separability
    #df['best_similarity'] = best_similarity
    df['best_time'] = best_time

    interpretability_results2 = pd.concat([interpretability_results2, df])

100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [00:03<00:00,  6.93it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [00:03<00:00,  6.58it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [00:08<00:00,  2.92it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [00:10<00:00,  2.34it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [00:04<00:00,  5.98it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [00:04<00:00,  5.84it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [00:31<00:00,  1.27s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [00:29<00:00,  1.19s/it]
100%|███████████████████████████████████

CPU times: total: 28min 14s
Wall time: 29min 3s


In [42]:
interpretability_results2

Unnamed: 0,attr_to_inst,can_cor.mean,can_cor.sd,cat_to_num,cor.mean,cor.sd,cov.mean,cov.sd,eigenvalues.mean,eigenvalues.sd,...,sparsity.mean,sparsity.sd,t_mean.mean,t_mean.sd,var.mean,var.sd,w_lambda,best_identity,best_separability,best_time
1,0.000322,0.528956,,0.0,0.15054,0.223382,23036992346.414856,72798303992.9171,121134480488.22282,234111523206.04208,...,0.100133,0.223514,169541.676209,253212.170859,121134480488.22284,168245420225.65167,0.720205,ciu,anchor,anchor
1,0.025377,0.568092,,0.0,0.350977,0.243944,469154.392907,8762157.949538,106280594.705627,646390558.822428,...,0.036344,0.086522,142.832454,733.315812,106280594.705627,644186452.5723,0.677272,lime,rulefit,anchor
1,0.023672,0.444716,,0.0,0.405831,0.300664,12768345.744952,278742719.083171,3478069177.695143,21155909590.126247,...,0.016497,0.021614,154.927167,789.092552,3478069177.695143,21087816087.78045,0.802228,ciu,rulefit,anchor
1,0.001929,,,0.0,,,,,,,...,0.007105,0.008187,212.247493,814.302136,,,,lime,rulefit,anchor
1,0.04023,0.598582,,0.0,0.775321,0.213983,5460267.115497,60240853.146294,613850878.368749,2812809834.503192,...,0.020273,0.027386,128.417255,479.447053,613850878.368749,2799183414.518701,0.641699,lime,rulefit,anchor
1,0.009957,0.467723,,0.0,0.720347,0.221164,145365.769673,1304494.7814,14549820.294238,66667325.946777,...,0.018697,0.020748,42.756092,144.029936,14549820.294238,66396708.690128,0.781235,lime,rulefit,anchor
1,0.018936,0.386223,,0.0,0.632733,0.227349,9113908.170039,112468708.576845,1391021745.093177,6374271207.053247,...,0.012637,0.012391,266.343759,1033.984479,1391021745.093177,6353362935.781049,0.850831,ciu,rulefit,anchor
1,0.0064,0.423927,0.56381,0.0,0.0,0.0,0.0,0.0,2.003205,0.0,...,0.198718,0.0,3.0,0.0,2.003205,0.0,0.323122,lime,rulefit,anchor
1,0.000526,0.572295,,0.0,0.278781,0.272877,152.216247,305.82708,1493.801871,2166.19809,...,3.7e-05,6.2e-05,29.270102,58.628622,1493.801871,1883.135357,0.672478,lime,rulefit,anchor
1,0.000685,0.303627,0.218076,0.0,0.256451,0.16084,24.699359,28.692798,104.816879,119.529302,...,0.029054,0.043015,14.296274,13.098502,104.816879,67.292901,0.327596,lime,rulefit,anchor


In [43]:
interpretability_results2.to_csv('interpretability_results2.csv')