In [1]:
import rulematrix
from rulematrix.surrogate import rule_surrogate
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer, load_iris

## Load Dataset

First, we load a dataset.
To make use of the visualization, it's better to provide feature names and target names.

We partition the dataset into training and test set.

In [2]:
# Load dataset
# dataset = load_iris()
dataset = load_breast_cancer()

# Feature Information
is_continuous = dataset.get('is_continuous', None)
is_categorical = dataset.get('is_categorical', None)
is_integer = dataset.get('is_integer', None)
feature_names = dataset.get('feature_names', None)
target_names = dataset.get('target_names', None)

# Split dataset into train and test
train_x, test_x, train_y, test_y = \
    train_test_split(dataset['data'], dataset['target'], test_size=0.25, random_state=42)


## Training a Neural Net

In [3]:
def train_nn(neurons=(20,), **kwargs):
    is_categorical = dataset.get('is_categorical', None)
    model = MLPClassifier(hidden_layer_sizes=neurons, **kwargs)
    if is_categorical is not None:
        model = Pipeline([
            ('one_hot', OneHotEncoder(categorical_features=is_categorical)),
            ('mlp', model)
        ])
    model.fit(train_x, train_y)
    train_score = model.score(train_x, train_y)
    test_score = model.score(test_x, test_y)
    print('Training score:', train_score)
    print('Test score:', test_score)
    return model


nn = train_nn((20, 20))

Training score: 0.8873239436619719
Test score: 0.9300699300699301


## Train Rule Surrogate

Next we train the surrogate rulelist of the neural net, using default parameters, and render the RuleMatrix visualization.

In [4]:
def train_surrogate(model, sampling_rate=2.0, **kwargs):
    surrogate = rule_surrogate(model.predict, train_x, sampling_rate=sampling_rate,
                               is_continuous=is_continuous,
                               is_categorical=is_categorical,
                               is_integer=is_integer,
                               rlargs={'feature_names': feature_names, 'verbose': 2},
                               **kwargs)

    train_fidelity = surrogate.score(train_x)
    test_fidelity = surrogate.score(test_x)
    print('Training fidelity:', train_fidelity)
    print('Test fidelity:', test_fidelity)
    return surrogate

surrogate = train_surrogate(nn, 4)
rl = surrogate.student
print(rl)

Training fidelity: 0.8849765258215962
Test fidelity: 0.8811188811188811
The rule list contains 12 of rules:

     IF (worst area in (-1.0, 218.0)) THEN prob: [0.0070, 0.9930]

ELSE IF (area error in (54.97, inf)) THEN prob: [0.9912, 0.0088]

ELSE IF (smoothness error in (-inf, inf)) AND (worst area in (-inf, -60.2)) THEN prob: [0.8487, 0.1513]

ELSE IF (mean perimeter in (-inf, 52.91)) THEN prob: [0.4783, 0.5217]

ELSE IF (worst area in (1295.7, inf)) THEN prob: [0.8701, 0.1299]

ELSE IF (area error in (47.06, 54.97)) THEN prob: [0.8315, 0.1685]

ELSE IF (worst area in (218.0, 481.8)) THEN prob: [0.0519, 0.9481]

ELSE IF (mean radius in (12.583, 15.739)) AND (worst area in (843.8, 1295.7)) THEN prob: [0.7229, 0.2771]

ELSE IF (mean perimeter in (86.79, 105.37)) AND (mean area in (441.3, 746.5)) THEN prob: [0.0614, 0.9386]

ELSE IF (area error in (29.58, 47.06)) THEN prob: [0.6082, 0.3918]

ELSE IF (worst compactness in (0.286, inf)) AND (mean perimeter in (52.91, 86.79)) THEN prob: [0.

In [5]:
rulematrix.render(train_x, train_y, surrogate, 
       feature_names=feature_names, target_names=target_names, 
       is_categorical=is_categorical)