In [1]:
from rulematrix.visualization import render
from rulematrix.surrogate import rule_surrogate
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer, load_iris

In [2]:
# Load dataset
dataset = load_iris()
# dataset = load_breast_cancer()

# Feature Information
is_continuous = dataset.get('is_continuous', None)
is_categorical = dataset.get('is_categorical', None)
is_integer = dataset.get('is_integer', None)
feature_names = dataset.get('feature_names', None)
target_names = dataset.get('target_names', None)

# Split dataset into train and test
train_x, test_x, train_y, test_y = \
    train_test_split(dataset['data'], dataset['target'], test_size=0.25, random_state=42)


In [3]:
def train_nn(neurons=(20,), **kwargs):
    is_categorical = dataset.get('is_categorical', None)
    model = MLPClassifier(hidden_layer_sizes=neurons, **kwargs)
    if is_categorical is not None:
        model = Pipeline([
            ('one_hot', OneHotEncoder(categorical_features=is_categorical)),
            ('mlp', model)
        ])
    model.fit(train_x, train_y)
    train_score = model.score(train_x, train_y)
    test_score = model.score(test_x, test_y)
    print('Training score:', train_score)
    print('Test score:', test_score)
    return model


def train_surrogate(model, sampling_rate=2.0, **kwargs):
    surrogate = rule_surrogate(model.predict, train_x, sampling_rate=sampling_rate,
                               is_continuous=is_continuous,
                               is_categorical=is_categorical,
                               is_integer=is_integer,
                               rlargs={'feature_names': feature_names, 'verbose': 2},
                               **kwargs)

    train_fidelity = surrogate.score(train_x)
    test_fidelity = surrogate.score(test_x)
    print('Training fidelity:', train_fidelity)
    print('Test fidelity:', test_fidelity)
    return surrogate


In [4]:
nn = train_nn((20, 20))
surrogate = train_surrogate(nn, 4)



Training score: 0.9732142857142857
Test score: 1.0
Training fidelity: 0.9196428571428571
Test fidelity: 0.9210526315789473


In [5]:
rl = surrogate.student
print(rl)

The rule list contains 8 of rules:

     IF (petal length (cm) in (5.4417, inf)) THEN prob: [0.0104, 0.0104, 0.9792]

ELSE IF (petal length (cm) in (3.0164, 4.3907)) THEN prob: [0.0206, 0.9588, 0.0206]

ELSE IF (petal width (cm) in (-inf, 0.5616)) THEN prob: [0.9833, 0.0083, 0.0083]

ELSE IF (sepal length (cm) in (-inf, 5.0335)) AND (petal length (cm) in (2.3684, 3.0164)) THEN prob: [0.1429, 0.8095, 0.0476]

ELSE IF (sepal length (cm) in (6.7978, inf)) THEN prob: [0.0625, 0.8125, 0.1250]

ELSE IF (petal width (cm) in (0.5616, 0.9473)) THEN prob: [0.7931, 0.1724, 0.0345]

ELSE IF (sepal length (cm) in (5.831, 6.7978)) AND (petal length (cm) in (4.3907, 4.9999)) THEN prob: [0.0256, 0.7949, 0.1795]

ELSE DEFAULT prob: [0.0185, 0.1296, 0.8519]



In [6]:
render(train_x, train_y, surrogate, 
       feature_names=feature_names, target_names=target_names, 
       is_categorical=is_categorical, name='train')

/nbextensions/rulematrix/d3.min.js
/nbextensions/rulematrix/react.production.min.js


