# PSyKE's demo

Some imports.

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score

from psyke import Extractor, Clustering, EvaluableModel
from psyke.extraction.hypercubic.strategy import AdaptiveStrategy
from psyke.extraction.hypercubic import Grid, FeatureRanker
from psyke.tuning.orchid import OrCHiD
from psyke.utils.logic import pretty_theory
from psyke.utils import Target

Import iris dataset separating features and class.

In [2]:
x, y = load_iris(return_X_y=True, as_frame=True)

Rename of the features.

In [3]:
x.columns = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']

Replace integer indices with the corresponding string class.

In [4]:
y = pd.DataFrame(y).replace({"target": {0: 'setosa', 1: 'versicolor', 2: 'virginica'}})
y

Unnamed: 0,target
0,setosa
1,setosa
2,setosa
3,setosa
4,setosa
...,...
145,virginica
146,virginica
147,virginica
148,virginica


The final dataset:

In [5]:
dataset = x.join(y)
dataset.columns = [*dataset.columns[:-1], 'iris']
dataset

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,iris
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


Split between train and test set in a reproducible way.

In [6]:
train, test = train_test_split(dataset, test_size=0.25, random_state=1)

We use as predictor a KNN and we train it.

In [7]:
#predictor = MLPClassifier(alpha=1, max_iter=1000)
predictor = KNeighborsClassifier(n_neighbors=7)
#predictor = DecisionTreeClassifier()
predictor.fit(train.iloc[:, :-1], train.iloc[:, -1])
print(f'Accuracy: {accuracy_score(predictor.predict(test.iloc[:, :-1]), test.iloc[:, -1]):.2f}')
print(f'F1: {f1_score(predictor.predict(test.iloc[:, :-1]), test.iloc[:, -1], average="weighted"):.2f}')

Accuracy: 0.97
F1: 0.97


In [8]:
def print_scores(scores):
    print(f'Classification accuracy = {scores[EvaluableModel.ClassificationScore.ACCURACY][0]:.2f} (data), '
          f'{scores[EvaluableModel.ClassificationScore.ACCURACY][1]:.2f} (BB)\n'
          f'F1 = {scores[EvaluableModel.ClassificationScore.F1][0]:.2f} (data), '
          f'{scores[EvaluableModel.ClassificationScore.F1][1]:.2f} (BB)')

def get_scores(extractor, test, predictor, brute=False, criterion='density', n=2):
    return extractor.score(test, predictor, True, True, brute, criterion, n, EvaluableModel.Task.CLASSIFICATION,
                           [EvaluableModel.ClassificationScore.ACCURACY, EvaluableModel.ClassificationScore.F1])

We create an extractor that uses the CART algorithm and we extract prolog rules from our trained KNN.

In [9]:
cart = Extractor.cart(predictor, simplify=True)
theory_from_cart = cart.extract(train)
scores, completeness = get_scores(cart, test, predictor)
print(f'CART performance ({cart.n_rules} rules with {completeness * 100:.2f}% coverage):')
print_scores(scores)
print('\nCART extracted rules:\n\n' + pretty_theory(theory_from_cart))

CART performance (3 rules with 100.00% coverage):
Classification accuracy = 0.97 (data), 1.00 (BB)
F1 = 0.97 (data), 1.00 (BB)

CART extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    PetalLength =< 2.6.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalLength =< 4.75.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica).


In [10]:
divine = Extractor.divine(predictor, k=5, patience=15, close_to_center=True)
theory_from_divine = divine.extract(train)
scores, completeness = get_scores(divine, test, predictor)
print(f'DiViNE performance ({divine.n_rules} rules with {completeness * 100:.2f}% coverage):')
print_scores(scores)

scores, completeness = get_scores(divine, test, predictor, True, 'density')
print(f'\nDiViNE brute performance ({divine.n_rules} rules with {completeness * 100:.2f}% coverage):')
print_scores(scores)

print('\nDiViNE extracted rules:\n\n' + pretty_theory(theory_from_divine))

DiViNE performance (3 rules with 73.68% coverage):
Classification accuracy = 0.96 (data), 1.00 (BB)
F1 = 0.96 (data), 1.00 (BB)

DiViNE brute performance (3 rules with 100.00% coverage):
Classification accuracy = 0.97 (data), 1.00 (BB)
F1 = 0.97 (data), 1.00 (BB)

DiViNE extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    SepalLength in [4.9, 7.0], SepalWidth in [2.0, 3.2], PetalLength in [3.3, 4.9], PetalWidth in [1.0, 1.5].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    SepalLength in [4.3, 5.7], SepalWidth in [2.3, 4.4], PetalLength in [1.0, 1.9], PetalWidth in [0.1, 0.6].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    SepalLength in [5.6, 7.7], SepalWidth in [2.5, 3.8], PetalLength in [4.8, 6.9], PetalWidth in [1.4, 2.5].


In [34]:
divine = Extractor.divine(predictor, k=5, patience=15, close_to_center=False)
theory_from_divine = divine.extract(train)
scores, completeness = get_scores(divine, test, predictor)
print(f'DiViNE performance ({divine.n_rules} rules with {completeness * 100:.2f}% coverage):')
print_scores(scores)

scores, completeness = get_scores(divine, test, predictor, True)
print(f'\nDiViNE brute performance ({divine.n_rules} rules with {completeness * 100:.2f}% coverage):')
print_scores(scores)

print('\nDiViNE extracted rules:\n\n' + pretty_theory(theory_from_divine))

DiViNE performance (3 rules with 71.05% coverage):
Classification accuracy = 0.96 (data), 1.00 (BB)
F1 = 0.96 (data), 1.00 (BB)

DiViNE brute performance (3 rules with 100.00% coverage):
Classification accuracy = 0.97 (data), 1.00 (BB)
F1 = 0.97 (data), 1.00 (BB)

DiViNE extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    SepalLength in [4.9, 7.0], SepalWidth in [2.0, 3.2], PetalLength in [3.3, 4.7], PetalWidth in [1.0, 1.5].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    SepalLength in [4.3, 5.7], SepalWidth in [2.3, 4.4], PetalLength in [1.0, 1.9], PetalWidth in [0.1, 0.6].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    SepalLength in [5.6, 7.7], SepalWidth in [2.5, 3.8], PetalLength in [4.8, 6.9], PetalWidth in [1.4, 2.5].


In [42]:
it = Extractor.iter(predictor, min_update=0.2, min_examples=150, threshold=0.1,
                    max_iterations=100, n_points=1, fill_gaps=True)
theory_from_iter = it.extract(train)
scores, completeness = get_scores(it, test, predictor)
print(f'ITER performance ({it.n_rules} rules with {completeness * 100:.2f}% coverage):')
print_scores(scores)

scores, completeness = get_scores(it, test, predictor, True)
print(f'\nITER brute performance ({it.n_rules} rules with {completeness * 100:.2f}% coverage):')
print_scores(scores)

print('\nITER extracted rules:\n\n' + pretty_theory(theory_from_iter))

ITER performance (3 rules with 97.37% coverage):
Classification accuracy = 0.97 (data), 1.00 (BB)
F1 = 0.97 (data), 1.00 (BB)

ITER brute performance (3 rules with 100.00% coverage):
Classification accuracy = 0.97 (data), 1.00 (BB)
F1 = 0.97 (data), 1.00 (BB)

ITER extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    SepalLength in [4.29, 7.70], SepalWidth in [1.99, 4.40], PetalLength in [0.99, 2.58], PetalWidth in [0.09, 2.50].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    SepalLength in [4.29, 7.70], SepalWidth in [1.99, 4.40], PetalLength in [2.58, 4.94], PetalWidth in [0.09, 2.50].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    SepalLength in [4.29, 7.70], SepalWidth in [1.99, 4.40], PetalLength in [4.94, 6.90], PetalWidth in [0.09, 2.50].


We create a GridEx extractor to extract prolog rules from the same KNN.

In [40]:
ranked = FeatureRanker(x.columns).fit(predictor, x).rankings()
gridEx = Extractor.gridex(predictor, Grid(1, AdaptiveStrategy(ranked, [(0.7, 5)])), threshold=.1, min_examples=1)
theory_from_gridEx = gridEx.extract(train)
scores, completeness = get_scores(gridEx, test, predictor)
print(f'GridEx performance ({gridEx.n_rules} rules with {completeness * 100:.2f}% coverage):')
print_scores(scores)

scores, completeness = get_scores(gridEx, test, predictor, True)
print(f'\nGridEx brute performance ({gridEx.n_rules} rules with {completeness * 100:.2f}% coverage):')
print_scores(scores)

print('\nGridEx extracted rules:\n\n' + pretty_theory(theory_from_gridEx))

GridEx performance (5 rules with 94.74% coverage):
Classification accuracy = 0.94 (data), 0.97 (BB)
F1 = 0.95 (data), 0.97 (BB)

GridEx brute performance (5 rules with 100.00% coverage):
Classification accuracy = 0.95 (data), 0.97 (BB)
F1 = 0.95 (data), 0.97 (BB)

GridEx extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalLength in [4.54, 5.72], PetalWidth in [1.06, 1.54].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    PetalLength in [0.99, 2.17], PetalWidth in [0.09, 1.06].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalLength in [2.17, 4.54], PetalWidth in [0.57, 1.06].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalLength in [3.36, 4.54], PetalWidth in [1.06, 2.02].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    PetalLength in [4.54, 6.90], PetalWidth in [1.54, 2.50].


In [41]:
ranked = FeatureRanker(x.columns).fit(predictor, x).rankings()
gridEx = Extractor.gridex(predictor, Grid(1, AdaptiveStrategy(ranked, [(0.85, 8)])), threshold=.1, min_examples=1)
theory_from_gridEx = gridEx.extract(train)
scores, completeness = get_scores(gridEx, test, predictor)
print(f'GridEx performance ({gridEx.n_rules} rules with {completeness * 100:.2f}% coverage):')
print_scores(scores)

scores, completeness = get_scores(gridEx, test, predictor, True)
print(f'\nGridEx brute performance ({gridEx.n_rules} rules with {completeness * 100:.2f}% coverage):')
print_scores(scores)

print('\nGridEx extracted rules:\n\n' + pretty_theory(theory_from_gridEx))

GridEx performance (3 rules with 94.74% coverage):
Classification accuracy = 0.92 (data), 0.94 (BB)
F1 = 0.92 (data), 0.95 (BB)

GridEx brute performance (3 rules with 100.00% coverage):
Classification accuracy = 0.92 (data), 0.95 (BB)
F1 = 0.92 (data), 0.95 (BB)

GridEx extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    PetalLength in [0.99, 2.47].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalLength in [3.21, 4.68].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    PetalLength in [4.68, 6.90].


We use the CReEPy clustering-based extractor to perform the extraction.

In [None]:
def print_clustering_scores(scores):
    print(f'ARI = {scores[EvaluableModel.ClusteringScore.ARI][0]:.2f}\n'
          f'AMI = {scores[EvaluableModel.ClusteringScore.AMI][0]:.2f}\n'
          f'V-measure = {scores[EvaluableModel.ClusteringScore.V][0]:.2f}\n'
          f'FMI = {scores[EvaluableModel.ClusteringScore.FMI][0]:.2f}')

def get_clustering_scores(clustering, test):
    return clustering.score(test, None, False, True, EvaluableModel.Task.CLASSIFICATION,
                           [EvaluableModel.ClusteringScore.ARI, EvaluableModel.ClusteringScore.AMI,
                            EvaluableModel.ClusteringScore.V, EvaluableModel.ClusteringScore.FMI])

def print_scores_short(scores):
    print(f'Classification accuracy = {scores[EvaluableModel.ClassificationScore.ACCURACY][0]:.2f}')

def get_scores_short(extractor, test):
    return extractor.score(test, None, False, True, EvaluableModel.Task.CLASSIFICATION,
                           [EvaluableModel.ClassificationScore.ACCURACY, EvaluableModel.ClassificationScore.F1])

In [None]:
orchid = OrCHiD(dataframe=train, algorithm=OrCHiD.Algorithm.ExACT, output=Target.CLASSIFICATION,
                max_mae_increase=1.2, min_rule_decrease=0.9, readability_tradeoff=0.1, patience=5, max_depth=3)
orchid.search()
(_, _, depth, threshold) = orchid.get_best()[0]

In [None]:
exact = Clustering.exact(depth=depth, error_threshold=threshold, output=Target.CLASSIFICATION)
exact.fit(train)
scores, completeness = get_clustering_scores(exact, test)
print(f'ExACT performance ({exact.n_rules} clusters with {completeness * 100:.2f}% coverage):')
print_clustering_scores(scores)
scores, _ = get_scores_short(exact, test)
print_scores_short(scores)
print()
exact.explain()

In [None]:
creepy = Extractor.creepy(predictor, depth=2, error_threshold=0.1, output=Target.CLASSIFICATION,
                          ranks=ranked, ignore_threshold=.99, clustering=Clustering.exact)
theory_from_creepy = creepy.extract(train)
scores, completeness = get_scores(creepy, test, predictor)
print(f'CReEPy performance ({creepy.n_rules} rules with {completeness * 100:.2f}% coverage):')
print_scores(scores)
print('\nCReEPy extracted rules:\n\n' + pretty_theory(theory_from_creepy))

In [None]:
orchid = OrCHiD(dataframe=train, algorithm=OrCHiD.Algorithm.CREAM, output=Target.CLASSIFICATION,
                max_mae_increase=1.2, min_rule_decrease=0.9, readability_tradeoff=0.1, patience=5, max_depth=3)
orchid.search()
(_, _, depth, threshold) = orchid.get_best()[0]

In [None]:
cream = Clustering.cream(depth=depth, error_threshold=threshold, output=Target.CLASSIFICATION)
cream.fit(train)
scores, completeness = get_clustering_scores(cream, test)
print(f'CREAM performance ({cream.n_rules} clusters with {completeness * 100:.2f}% coverage):')
print_clustering_scores(scores)
scores, _ = get_scores_short(cream, test)
print_scores_short(scores)
print()
cream.explain()

In [None]:
creepy = Extractor.creepy(predictor, depth=2, error_threshold=0.1, output=Target.CLASSIFICATION,
                          ranks=ranked, ignore_threshold=.99, clustering=Clustering.cream)
theory_from_creepy = creepy.extract(train)
scores, completeness = get_scores(creepy, test, predictor)
print(f'CReEPy performance ({creepy.n_rules} rules with {completeness * 100:.2f}% coverage):')
print_scores(scores)
print('\nCReEPy extracted rules:\n\n' + pretty_theory(theory_from_creepy))