# PSyKE's demo

Some imports.

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from psyke.cart.predictor import CartPredictor

from psyke import Extractor
from psyke.regression.strategy import AdaptiveStrategy
from psyke.regression import Grid, FeatureRanker, HyperCubeExtractor
from psyke.utils.logic import pretty_theory

Import iris dataset separating features and class.

In [2]:
x, y = load_iris(return_X_y=True, as_frame=True)

Rename of the features.

In [3]:
x.columns = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']

Replace integer indices with the corresponding string class.

In [4]:
y = pd.DataFrame(y).replace({"target": {0: 'setosa', 1: 'virginica', 2: 'versicolor'}})
y

Unnamed: 0,target
0,setosa
1,setosa
2,setosa
3,setosa
4,setosa
...,...
145,versicolor
146,versicolor
147,versicolor
148,versicolor


The final dataset:

In [5]:
dataset = x.join(y)
dataset.columns = [*dataset.columns[:-1], 'iris']
dataset

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,iris
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,versicolor
146,6.3,2.5,5.0,1.9,versicolor
147,6.5,3.0,5.2,2.0,versicolor
148,6.2,3.4,5.4,2.3,versicolor


Split between train and test set in a reproducible way.

In [6]:
train, test = train_test_split(dataset, test_size=0.5, random_state=0)

We use as predictor a KNN with K = 7 and we train it.

In [7]:
predictor = KNeighborsClassifier(n_neighbors=4)
predictor.fit(train.iloc[:, :-1], train.iloc[:, -1])
predictor.score(test.iloc[:, :-1], test.iloc[:, -1])

0.9733333333333334

We create an extractor that uses the CART algorithm and we extract prolog rules from our trained KNN.

In [8]:
DTClassifier = DecisionTreeClassifier().fit(train.iloc[:, :-1], predictor.predict(train.iloc[:, :-1]))
cart = Extractor.cart(CartPredictor(DTClassifier))
theory_from_cart = cart.extract(train)
print(f'CART performance ({cart.n_rules} rules):')
print(f'Accuracy = {cart.accuracy(test):.2f}')
print(f'Fidelity = {cart.accuracy(test, predictor):.2f}\n')
print('\nCART extracted rules:\n\n' + pretty_theory(theory_from_cart))

CART performance (3 rules):
Accuracy = 0.92
Fidelity = 0.92


CART extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    PetalWidth =< 0.75.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    PetalWidth =< 1.55.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor).


We create a GridEx extractor to extract prolog rules from the same KNN.

In [9]:
ranked = FeatureRanker(x.columns).fit(predictor, x).rankings()
gridEx = Extractor.gridex(predictor, Grid(1, AdaptiveStrategy(ranked, [(0.85, 8)])), threshold=.1, min_examples=1)
theory_from_gridEx = gridEx.extract(train)
print('GridEx performance ({} rules):\nAccuracy = {:.2f}\nAccuracy fidelity = {:.2f}\n'
      .format(gridEx.n_rules, gridEx.accuracy(test), gridEx.accuracy(test, predictor)))
print('GridEx extracted rules:\n\n' + pretty_theory(theory_from_gridEx))

TypeError: ufunc 'true_divide' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

We use CReEPy and CREAM cluster-based extractors to perform the extraction.

In [10]:
creepy = Extractor.creepy(predictor, depth=2, error_threshold=0.1, output=HyperCubeExtractor.Target.CLASSIFICATION)
theory_from_creepy = creepy.extract(train)
print('CReEPy performance ({} rules):\nAccuracy = {:.2f}\nFidelity = {:.2f}\n'
      .format(creepy.n_rules, creepy.accuracy(test), creepy.accuracy(test, predictor)))
print('CReEPy extracted rules:\n\n' + pretty_theory(theory_from_creepy))

CReEPy performance (3 rules):
Accuracy = 0.79
Fidelity = 0.81

CReEPy extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    SepalLength in [4.899999, 6.700001], SepalWidth in [2.199999, 3.200001], PetalLength in [2.999999, 5.000001], PetalWidth in [0.999999, 1.800001].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    SepalLength in [4.899999, 7.700001], SepalWidth in [2.199999, 3.800001], PetalLength in [2.999999, 6.900001], PetalWidth in [0.999999, 2.500001].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    SepalLength in [4.399999, 7.900001], SepalWidth in [2.199999, 4.100001], PetalLength in [1.199999, 6.900001], PetalWidth in [0.099999, 2.500001].


In [11]:
cream = Extractor.cream(predictor, depth=2, error_threshold=0.1, output=HyperCubeExtractor.Target.CLASSIFICATION)
theory_from_cream = cream.extract(train)
print('CREAM performance ({} rules):\nAccuracy = {:.2f}\nFidelity = {:.2f}\n'
      .format(cream.n_rules, cream.accuracy(test), cream.accuracy(test, predictor)))
print('CREAM extracted rules:\n\n' + pretty_theory(theory_from_cream))

CREAM performance (3 rules):
Accuracy = 0.79
Fidelity = 0.81

CREAM extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    SepalLength in [4.899999, 6.700001], SepalWidth in [2.199999, 3.200001], PetalLength in [2.999999, 5.000001], PetalWidth in [0.999999, 1.800001].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    SepalLength in [4.899999, 7.700001], SepalWidth in [2.199999, 3.800001], PetalLength in [2.999999, 6.900001], PetalWidth in [0.999999, 2.500001].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    SepalLength in [4.399999, 7.900001], SepalWidth in [2.199999, 4.100001], PetalLength in [1.199999, 6.900001], PetalWidth in [0.099999, 2.500001].
