# PSyKE's demo

Some imports.

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, f1_score

from psyke import Extractor
from psyke.extraction.hypercubic.strategy import AdaptiveStrategy
from psyke.extraction.hypercubic import Grid, FeatureRanker
from psyke.utils.logic import pretty_theory
from psyke.utils import Target

Import iris dataset separating features and class.

In [2]:
x, y = load_iris(return_X_y=True, as_frame=True)

Rename of the features.

In [3]:
x.columns = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']

Replace integer indices with the corresponding string class.

In [4]:
y = pd.DataFrame(y).replace({"target": {0: 'setosa', 1: 'versicolor', 2: 'virginica'}})
y

Unnamed: 0,target
0,setosa
1,setosa
2,setosa
3,setosa
4,setosa
...,...
145,virginica
146,virginica
147,virginica
148,virginica


The final dataset:

In [5]:
dataset = x.join(y)
dataset.columns = [*dataset.columns[:-1], 'iris']
dataset

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,iris
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


Split between train and test set in a reproducible way.

In [6]:
train, test = train_test_split(dataset, test_size=0.5, random_state=0)

We use as predictor a KNN and we train it.

In [7]:
#predictor = MLPClassifier(alpha=1, max_iter=1000)
predictor = KNeighborsClassifier(n_neighbors=5)
#predictor = DecisionTreeClassifier()
predictor.fit(train.iloc[:, :-1], train.iloc[:, -1])
print(f'Accuracy: {accuracy_score(predictor.predict(test.iloc[:, :-1]), test.iloc[:, -1]):.2f}')
print(f'F1: {f1_score(predictor.predict(test.iloc[:, :-1]), test.iloc[:, -1], average="weighted"):.2f}')

Accuracy: 0.96
F1: 0.96


In [8]:
it = Extractor.iter(predictor, min_update=0.15, min_examples=150, threshold=0.1, max_iterations=600, n_points=1)
theory_from_iter = it.extract(train)
print('ITER performance ({} rules):\nAccuracy = {:.2f}\nFidelity = {:.2f}\nF1 = {:.2f}\nF1 = {:.2f}\n'
      .format(it.n_rules, it.accuracy(test), it.accuracy(test, predictor), it.f1(test), it.f1(test, predictor)))
print('ITER extracted rules:\n\n' + pretty_theory(theory_from_iter))

ITER performance (3 rules):
Accuracy = 0.91
Fidelity = 0.93
F1 = 0.91
F1 = 0.93

ITER extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    SepalLength in [4.39, 7.90], SepalWidth in [2.19, 4.10], PetalLength in [1.19, 2.35], PetalWidth in [0.09, 2.50].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    SepalLength in [4.39, 7.90], SepalWidth in [2.19, 4.10], PetalLength in [2.35, 4.92], PetalWidth in [0.09, 2.50].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    SepalLength in [4.39, 7.90], SepalWidth in [2.19, 4.10], PetalLength in [4.92, 6.90], PetalWidth in [0.09, 2.50].


We create a GridEx extractor to extract prolog rules from the same KNN.

In [9]:
ranked = FeatureRanker(x.columns).fit(predictor, x).rankings()
gridEx = Extractor.gridex(predictor, Grid(1, AdaptiveStrategy(ranked, [(0.85, 8)])), threshold=.1, min_examples=1)
theory_from_gridEx = gridEx.extract(train)
print('GridEx performance ({} rules):\nAccuracy = {:.2f}\nAccuracy fidelity = {:.2f}\nF1 = {:.2f}\nF1 = {:.2f}\n'
      .format(gridEx.n_rules, gridEx.accuracy(test), gridEx.accuracy(test, predictor),
              gridEx.f1(test), gridEx.f1(test, predictor)))
print('GridEx extracted rules:\n\n' + pretty_theory(theory_from_gridEx))

GridEx performance (3 rules):
Accuracy = 0.94
Accuracy fidelity = 0.96
F1 = 0.94
F1 = 0.96

GridEx extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    PetalLength in [1.19, 1.91].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalLength in [2.62, 4.76].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    PetalLength in [4.76, 6.90].


We create an extractor that uses the CART algorithm and we extract prolog rules from our trained KNN.

In [10]:
cart = Extractor.cart(predictor, simplify=True)
theory_from_cart = cart.extract(train)
print('CART performance ({} rules):\nAccuracy = {:.2f}\nFidelity = {:.2f}\nF1 = {:.2f}\nF1 = {:.2f}\n'
      .format(cart.n_rules, cart.accuracy(test), cart.accuracy(test, predictor),
              cart.f1(test), cart.f1(test, predictor)))
print('\nCART extracted rules:\n\n' + pretty_theory(theory_from_cart))

CART performance (3 rules):
Accuracy = 0.92
Fidelity = 0.93
F1 = 0.92
F1 = 0.93


CART extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    PetalLength =< 2.35.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalWidth =< 1.55.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica).


We use the CReEPy clustering-based extractor to perform the extraction.

In [11]:
exact = Extractor.exact(depth=2, error_threshold=0.1, output=Target.CLASSIFICATION)
_ = exact.extract(train)
exact.print()

Output is virginica if:
    SepalLength is in [5.70, 7.90]
    SepalWidth is in [2.50, 3.80]
    PetalLength is in [4.80, 6.90]
    PetalWidth is in [1.60, 2.50]
Output is versicolor if:
    SepalLength is in [4.90, 7.90]
    SepalWidth is in [2.20, 3.80]
    PetalLength is in [3.00, 6.90]
    PetalWidth is in [1.00, 2.50]
Output is setosa if:
    SepalLength is in [4.40, 7.90]
    SepalWidth is in [2.20, 4.10]
    PetalLength is in [1.20, 6.90]
    PetalWidth is in [0.10, 2.50]


In [12]:
creepy = Extractor.creepy(predictor, depth=2, error_threshold=0.1, output=Target.CLASSIFICATION,
                          ranks=ranked, ignore_threshold=.99, clustering=Extractor.exact)
theory_from_creepy = creepy.extract(train)
print('CReEPy performance ({} rules):\nAccuracy = {:.2f}\nFidelity = {:.2f}\n'
      .format(creepy.n_rules, creepy.accuracy(test), creepy.accuracy(test, predictor)))
print('CReEPy extracted rules:\n\n' + pretty_theory(theory_from_creepy))

CReEPy performance (3 rules):
Accuracy = 0.95
Fidelity = 0.96

CReEPy extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    PetalLength in [4.79, 6.90].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalLength in [2.99, 6.90].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa).


In [13]:
cream = Extractor.cream(depth=2, error_threshold=0.1, output=Target.CLASSIFICATION)
_ = cream.extract(train)
cream.print()

Output is setosa if:
    SepalLength is in [4.40, 5.80]
    SepalWidth is in [2.30, 4.10]
    PetalLength is in [1.20, 1.70]
    PetalWidth is in [0.10, 0.50]
Output is versicolor if:
    SepalLength is in [4.90, 6.70]
    SepalWidth is in [2.20, 3.20]
    PetalLength is in [3.00, 5.00]
    PetalWidth is in [1.00, 1.80]
Output is virginica if:
    SepalLength is in [4.40, 7.90]
    SepalWidth is in [2.20, 4.10]
    PetalLength is in [1.20, 6.90]
    PetalWidth is in [0.10, 2.50]


In [14]:
creepy = Extractor.creepy(predictor, depth=2, error_threshold=0.1, output=Target.CLASSIFICATION,
                          ranks=ranked, ignore_threshold=.99, clustering=Extractor.cream)
theory_from_creepy = creepy.extract(train)
print('CReEPy performance ({} rules):\nAccuracy = {:.2f}\nFidelity = {:.2f}\n'
      .format(creepy.n_rules, creepy.accuracy(test), creepy.accuracy(test, predictor)))
print('CReEPy extracted rules:\n\n' + pretty_theory(theory_from_creepy))

CReEPy performance (3 rules):
Accuracy = 0.87
Fidelity = 0.88

CReEPy extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    PetalLength in [1.19, 1.70].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalLength in [2.99, 4.90].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica).
