In [1]:
import pandas as pd
import numpy as np

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [2]:
seed = 2022

X, y = load_breast_cancer(return_X_y=True, as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)

In [3]:
X.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [4]:
X_train.shape

(455, 30)

# Fit Global Sufficient Rules

In [5]:
from acv_explainers import ACXplainer
from sklearn.metrics import roc_auc_score, accuracy_score

# It has the same params as a Random Forest, and it should be tuned to maximize the performance.  
acv_xplainer = ACXplainer(classifier=True, n_estimators=18, max_depth=15)
acv_xplainer.fit(X_train, y_train)

In [6]:
accuracy_score(y_test, acv_xplainer.predict(X_test))

0.9649122807017544

### 1- Compute Sufficient Explanations

In [7]:
sdp_importance, sdp_index, size, sdp = acv_xplainer.importance_sdp_rf(X_train, y_train.astype(np.double),
                                                                      X_train, y_train.astype(np.double), stop=False, 
                                                                      pi_level=0.9)

100%|██████████████████████████████████████████| 18/18 [00:00<00:00, 569.44it/s]
100%|███████████████████████████████████████████| 10/10 [00:19<00:00,  1.95s/it]


In [8]:
from acv_explainers.utils import get_active_null_coalition_list

S_star, N_star = get_active_null_coalition_list(sdp_index, size)

### 2- Compute Sufficient Rules

In [9]:
sdp, rules, sdp_all, rules_data, w = acv_xplainer.compute_sdp_maxrules(X_train, y_train.astype(np.double),
                                                         X_train, y_train.astype(np.double), S_star, verbose=True)

100%|█████████████████████████████████████████| 455/455 [00:33<00:00, 13.69it/s]


### 3- Compute Global Sufficient Rules (G-SR)

In [10]:
acv_xplainer.fit_global_rules(X_train, y_train, rules, S_star)

#### Rules stats

- Number of rules

In [11]:
acv_xplainer.rules.shape

(93, 30, 2)

- Rules accuracy

In [12]:
print(acv_xplainer.rules_acc[:10])

[1.0, 0.9949494949494949, 0.9772727272727273, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]


- Rules Coverage

In [13]:
print(acv_xplainer.rules_coverage[:10])

[0.12087912087912088, 0.4351648351648352, 0.0967032967032967, 0.006593406593406593, 0.002197802197802198, 0.004395604395604396, 0.2813186813186813, 0.14725274725274726, 0.07692307692307693, 0.3912087912087912]


### Global model prediction 

In [14]:
y_test_pred = acv_xplainer.predict_proba_global_rules(X_test.values, min_acc=0.9)

### 4-  Compute the coverage and the precision of G-SR

In [15]:
y_o = []
y_r = []
for i, ya in enumerate(y_test_pred[0]):
    if ya != None:
        y_o.append(ya)
        y_r.append(y_test.values[i])
        
y_o = np.array(y_o, dtype=int)

print('Accuracy = {} --- Test Coverage = {}'.format(accuracy_score(y_r, y_o), len(y_r)/X_test.shape[0]))


Accuracy = 0.9523809523809523 --- Test Coverage = 0.9210526315789473


In [19]:
# # Test webApp

# import acv_app
# import os

# # compile the ACXplainer
# acv_app.compile_ACXplainers(acv_xplainer, X_train, y_train, X_test, y_test, path=os.getcwd())

# # Launch the webApp
# acv_app.run_webapp(pickle_path=os.getcwd())

# Baseline models

In [19]:
from imodels import BoostedRulesClassifier, BayesianRuleListClassifier, GreedyRuleListClassifier, SkopeRulesClassifier # see more models below
from imodels import SLIMRegressor, RuleFitRegressor, RuleFitClassifier

# Rule Fit

In [20]:
rf = RuleFitClassifier()  # initialize a model
rf.fit(X_train, y_train)  

RuleFitClassifier()

In [21]:
accuracy_score(y_test, rf.predict(X_test))

0.9473684210526315

# Skoped Rule

In [26]:
rf = SkopeRulesClassifier(n_estimators=100, precision_min=0.9)  # initialize a model
rf.fit(X_train, y_train)  

accuracy_score(y_test, rf.predict(X_test))

0.9210526315789473

# Decision Tree

In [27]:
from sklearn.tree import DecisionTreeClassifier

In [28]:
rf = DecisionTreeClassifier()  # initialize a model
rf.fit(X_train, y_train)  

accuracy_score(y_test, rf.predict(X_test))

0.9473684210526315

# Random Forest

In [29]:
from sklearn.ensemble import RandomForestClassifier

In [30]:
rf = RandomForestClassifier()  # initialize a model
rf.fit(X_train, y_train)  

accuracy_score(y_test, rf.predict(X_test))

0.9736842105263158