# PSyKE's demo

Some imports.

In [1]:
from psyke.extraction.hypercubic import HyperCube

dimensions = {'X': (0.2, 0.6), 'Y': (0.7, 0.9)}
mean = 0.5
cube = HyperCube(dimensions, set(), mean)

cube.center()
cube.corners()

[{'X': 0.2, 'Y': 0.7},
 {'X': 0.2, 'Y': 0.9},
 {'X': 0.6, 'Y': 0.7},
 {'X': 0.6, 'Y': 0.9}]

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score

from psyke import Extractor, Clustering, EvaluableModel
from psyke.extraction.hypercubic.strategy import AdaptiveStrategy
from psyke.extraction.hypercubic import Grid, FeatureRanker
from psyke.tuning.orchid import OrCHiD
from psyke.utils.logic import pretty_theory
from psyke.utils import Target

Import iris dataset separating features and class.

In [5]:
x, y = load_iris(return_X_y=True, as_frame=True)

Rename of the features.

In [6]:
x.columns = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']

Replace integer indices with the corresponding string class.

In [7]:
y = pd.DataFrame(y).replace({"target": {0: 'setosa', 1: 'versicolor', 2: 'virginica'}})
y

Unnamed: 0,target
0,setosa
1,setosa
2,setosa
3,setosa
4,setosa
...,...
145,virginica
146,virginica
147,virginica
148,virginica


The final dataset:

In [8]:
dataset = x.join(y)
dataset.columns = [*dataset.columns[:-1], 'iris']
dataset

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,iris
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


Split between train and test set in a reproducible way.

In [9]:
train, test = train_test_split(dataset, test_size=0.25, random_state=1)

We use as predictor a KNN and we train it.

In [10]:
#predictor = MLPClassifier(alpha=1, max_iter=1000)
predictor = KNeighborsClassifier(n_neighbors=5)
#predictor = DecisionTreeClassifier()
predictor.fit(train.iloc[:, :-1], train.iloc[:, -1])
print(f'Accuracy: {accuracy_score(predictor.predict(test.iloc[:, :-1]), test.iloc[:, -1]):.2f}')
print(f'F1: {f1_score(predictor.predict(test.iloc[:, :-1]), test.iloc[:, -1], average="weighted"):.2f}')

Accuracy: 1.00
F1: 1.00


In [11]:
def print_scores(scores):
    print(f'Classification accuracy = {scores[EvaluableModel.ClassificationScore.ACCURACY][0]:.2f} (data), '
          f'{scores[EvaluableModel.ClassificationScore.ACCURACY][1]:.2f} (BB)\n'
          f'F1 = {scores[EvaluableModel.ClassificationScore.F1][0]:.2f} (data), '
          f'{scores[EvaluableModel.ClassificationScore.F1][1]:.2f} (BB)')

def get_scores(extractor, test, predictor):
    return extractor.score(test, predictor, True, True, EvaluableModel.Task.CLASSIFICATION,
                           [EvaluableModel.ClassificationScore.ACCURACY, EvaluableModel.ClassificationScore.F1])

In [12]:
it = Extractor.iter(predictor, min_update=0.15, min_examples=150, threshold=0.1, max_iterations=600, n_points=1)
theory_from_iter = it.extract(train)
scores, completeness = get_scores(it, test, predictor)
print(f'ITER performance ({it.n_rules} rules with {completeness * 100:.2f}% coverage):')
print_scores(scores)
print('\nITER extracted rules:\n\n' + pretty_theory(theory_from_iter))

ITER performance (5 rules with 97.37% coverage):
Classification accuracy = 0.97 (data), 0.97 (BB)
F1 = 0.97 (data), 0.97 (BB)

ITER extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    SepalLength in [4.29, 7.70], SepalWidth in [1.99, 4.40], PetalLength in [0.99, 3.17], PetalWidth in [0.09, 2.10].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    SepalLength in [4.29, 7.70], SepalWidth in [1.99, 4.40], PetalLength in [3.17, 5.82], PetalWidth in [0.09, 1.74].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    SepalLength in [4.29, 7.70], SepalWidth in [1.99, 4.40], PetalLength in [0.99, 4.05], PetalWidth in [2.10, 2.50].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    SepalLength in [4.29, 7.70], SepalWidth in [1.99, 4.40], PetalLength in [5.82, 6.90], PetalWidth in [0.09, 2.50].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    SepalLength in [4.29, 7.70], S

We create a GridEx extractor to extract prolog rules from the same KNN.

In [13]:
ranked = FeatureRanker(x.columns).fit(predictor, x).rankings()
gridEx = Extractor.gridex(predictor, Grid(1, AdaptiveStrategy(ranked, [(0.85, 8)])), threshold=.1, min_examples=1)
theory_from_gridEx = gridEx.extract(train)
scores, completeness = get_scores(gridEx, test, predictor)
print(f'GridEx performance ({gridEx.n_rules} rules with {completeness * 100:.2f}% coverage):')
print_scores(scores)
print('\nGridEx extracted rules:\n\n' + pretty_theory(theory_from_gridEx))

GridEx performance (3 rules with 94.74% coverage):
Classification accuracy = 0.92 (data), 0.92 (BB)
F1 = 0.92 (data), 0.92 (BB)

GridEx extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    PetalLength in [0.99, 2.47].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalLength in [3.21, 4.68].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    PetalLength in [4.68, 6.90].


We create an extractor that uses the CART algorithm and we extract prolog rules from our trained KNN.

In [14]:
cart = Extractor.cart(predictor, simplify=True)
theory_from_cart = cart.extract(train)
scores, completeness = get_scores(cart, test, predictor)
print(f'CART performance ({cart.n_rules} rules with {completeness * 100:.2f}% coverage):')
print_scores(scores)
print('\nCART extracted rules:\n\n' + pretty_theory(theory_from_cart))

CART performance (3 rules with 100.00% coverage):
Classification accuracy = 0.97 (data), 0.97 (BB)
F1 = 0.97 (data), 0.97 (BB)

CART extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    PetalLength =< 2.6.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalLength =< 4.75.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica).


We use the CReEPy clustering-based extractor to perform the extraction.

In [15]:
def print_clustering_scores(scores):
    print(f'ARI = {scores[EvaluableModel.ClusteringScore.ARI][0]:.2f}\n'
          f'AMI = {scores[EvaluableModel.ClusteringScore.AMI][0]:.2f}\n'
          f'V-measure = {scores[EvaluableModel.ClusteringScore.V][0]:.2f}\n'
          f'FMI = {scores[EvaluableModel.ClusteringScore.FMI][0]:.2f}')

def get_clustering_scores(clustering, test):
    return clustering.score(test, None, False, True, EvaluableModel.Task.CLASSIFICATION,
                           [EvaluableModel.ClusteringScore.ARI, EvaluableModel.ClusteringScore.AMI,
                            EvaluableModel.ClusteringScore.V, EvaluableModel.ClusteringScore.FMI])

def print_scores_short(scores):
    print(f'Classification accuracy = {scores[EvaluableModel.ClassificationScore.ACCURACY][0]:.2f}')

def get_scores_short(extractor, test):
    return extractor.score(test, None, False, True, EvaluableModel.Task.CLASSIFICATION,
                           [EvaluableModel.ClassificationScore.ACCURACY, EvaluableModel.ClassificationScore.F1])

In [16]:
orchid = OrCHiD(dataframe=train, algorithm=OrCHiD.Algorithm.ExACT, output=Target.CLASSIFICATION,
                max_mae_increase=1.2, min_rule_decrease=0.9, readability_tradeoff=0.1, patience=5, max_depth=3)
orchid.search()
(_, _, depth, threshold) = orchid.get_best()[0]

Algorithm.ExACT. Depth: 1. Threshold = 1.00. Predictive loss = 0.30, 2 rules
Algorithm.ExACT. Depth: 1. Threshold = 0.02. Predictive loss = 0.30, 2 rules
Algorithm.ExACT. Depth: 1. Threshold = 0.06. Predictive loss = 0.30, 2 rules
Algorithm.ExACT. Depth: 1. Threshold = 0.11. Predictive loss = 0.30, 2 rules
Algorithm.ExACT. Depth: 1. Threshold = 0.15. Predictive loss = 0.30, 2 rules
Algorithm.ExACT. Depth: 1. Threshold = 0.20. Predictive loss = 0.30, 2 rules

Algorithm.ExACT. Depth: 2. Threshold = 1.00. Predictive loss = 0.30, 2 rules
Algorithm.ExACT. Depth: 2. Threshold = 0.02. Predictive loss = 0.27, 3 rules
Algorithm.ExACT. Depth: 2. Threshold = 0.06. Predictive loss = 0.27, 3 rules
Algorithm.ExACT. Depth: 2. Threshold = 0.11. Predictive loss = 0.27, 3 rules
Algorithm.ExACT. Depth: 2. Threshold = 0.15. Predictive loss = 0.27, 3 rules
Algorithm.ExACT. Depth: 2. Threshold = 0.20. Predictive loss = 0.27, 3 rules

****************************************
* Best Algorithm.ExACT
**********

In [17]:
exact = Clustering.exact(depth=depth, error_threshold=threshold, output=Target.CLASSIFICATION)
exact.fit(train)
scores, completeness = get_clustering_scores(exact, test)
print(f'ExACT performance ({exact.n_rules} clusters with {completeness * 100:.2f}% coverage):')
print_clustering_scores(scores)
scores, _ = get_scores_short(exact, test)
print_scores_short(scores)
print()
exact.explain()

ExACT performance (3 clusters with 97.37% coverage):
ARI = 0.53
AMI = 0.62
V-measure = 0.64
FMI = 0.69
Classification accuracy = 0.78

Output is virginica if:
    SepalLength is in [4.90, 7.70]
    SepalWidth is in [2.20, 3.80]
    PetalLength is in [4.50, 6.90]
    PetalWidth is in [1.40, 2.50]
Output is versicolor if:
    SepalLength is in [4.90, 7.70]
    SepalWidth is in [2.00, 3.80]
    PetalLength is in [3.30, 6.90]
    PetalWidth is in [1.00, 2.50]
Output is setosa if:
    SepalLength is in [4.30, 7.70]
    SepalWidth is in [2.00, 4.40]
    PetalLength is in [1.00, 6.90]
    PetalWidth is in [0.10, 2.50]


In [18]:
creepy = Extractor.creepy(predictor, depth=2, error_threshold=0.1, output=Target.CLASSIFICATION,
                          ranks=ranked, ignore_threshold=.99, clustering=Clustering.exact)
theory_from_creepy = creepy.extract(train)
scores, completeness = get_scores(creepy, test, predictor)
print(f'CReEPy performance ({creepy.n_rules} rules with {completeness * 100:.2f}% coverage):')
print_scores(scores)
print('\nCReEPy extracted rules:\n\n' + pretty_theory(theory_from_creepy))

CReEPy performance (3 rules with 100.00% coverage):
Classification accuracy = 0.95 (data), 0.95 (BB)
F1 = 0.95 (data), 0.95 (BB)

CReEPy extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    PetalLength in [4.79, 6.90].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalLength in [3.29, 6.90].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa).


In [19]:
orchid = OrCHiD(dataframe=train, algorithm=OrCHiD.Algorithm.CREAM, output=Target.CLASSIFICATION,
                max_mae_increase=1.2, min_rule_decrease=0.9, readability_tradeoff=0.1, patience=5, max_depth=3)
orchid.search()
(_, _, depth, threshold) = orchid.get_best()[0]

Algorithm.CREAM. Depth: 1. Threshold = 1.00. Predictive loss = 0.30, 2 rules
Algorithm.CREAM. Depth: 1. Threshold = 0.02. Predictive loss = 0.30, 2 rules
Algorithm.CREAM. Depth: 1. Threshold = 0.06. Predictive loss = 0.30, 2 rules
Algorithm.CREAM. Depth: 1. Threshold = 0.11. Predictive loss = 0.30, 2 rules
Algorithm.CREAM. Depth: 1. Threshold = 0.15. Predictive loss = 0.30, 2 rules
Algorithm.CREAM. Depth: 1. Threshold = 0.20. Predictive loss = 0.30, 2 rules

Algorithm.CREAM. Depth: 2. Threshold = 1.00. Predictive loss = 0.30, 2 rules
Algorithm.CREAM. Depth: 2. Threshold = 0.02. Predictive loss = 0.07, 3 rules
Algorithm.CREAM. Depth: 2. Threshold = 0.06. Predictive loss = 0.07, 3 rules
Algorithm.CREAM. Depth: 2. Threshold = 0.11. Predictive loss = 0.07, 3 rules
Algorithm.CREAM. Depth: 2. Threshold = 0.15. Predictive loss = 0.07, 3 rules
Algorithm.CREAM. Depth: 2. Threshold = 0.20. Predictive loss = 0.07, 3 rules

****************************************
* Best Algorithm.CREAM
**********

In [20]:
cream = Clustering.cream(depth=depth, error_threshold=threshold, output=Target.CLASSIFICATION)
cream.fit(train)
scores, completeness = get_clustering_scores(cream, test)
print(f'CREAM performance ({cream.n_rules} clusters with {completeness * 100:.2f}% coverage):')
print_clustering_scores(scores)
scores, _ = get_scores_short(cream, test)
print_scores_short(scores)
print()
cream.explain()

CREAM performance (3 clusters with 97.37% coverage):
ARI = 0.85
AMI = 0.82
V-measure = 0.83
FMI = 0.90
Classification accuracy = 0.95

Output is setosa if:
    SepalLength is in [4.30, 5.70]
    SepalWidth is in [2.30, 4.40]
    PetalLength is in [1.00, 1.90]
    PetalWidth is in [0.10, 0.60]
Output is versicolor if:
    SepalLength is in [4.90, 7.00]
    SepalWidth is in [2.00, 3.40]
    PetalLength is in [3.30, 5.10]
    PetalWidth is in [1.00, 1.80]
Output is virginica if:
    SepalLength is in [4.30, 7.70]
    SepalWidth is in [2.00, 4.40]
    PetalLength is in [1.00, 6.90]
    PetalWidth is in [0.10, 2.50]


In [21]:
creepy = Extractor.creepy(predictor, depth=2, error_threshold=0.1, output=Target.CLASSIFICATION,
                          ranks=ranked, ignore_threshold=.99, clustering=Clustering.cream)
theory_from_creepy = creepy.extract(train)
scores, completeness = get_scores(creepy, test, predictor)
print(f'CReEPy performance ({creepy.n_rules} rules with {completeness * 100:.2f}% coverage):')
print_scores(scores)
print('\nCReEPy extracted rules:\n\n' + pretty_theory(theory_from_creepy))

CReEPy performance (3 rules with 100.00% coverage):
Classification accuracy = 0.95 (data), 0.95 (BB)
F1 = 0.95 (data), 0.95 (BB)

CReEPy extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    PetalLength in [0.99, 1.90].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalLength in [3.29, 5.00].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica).
