# PSyKE's demo

Some imports.

In [1]:
from psyke.utils.dataframe import get_discrete_features_equal_frequency
from psyke import Extractor
from psyke.utils.dataframe import get_discrete_dataset
from psyke.utils.logic import pretty_theory
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd

Import iris dataset separating features and class.

In [12]:
x, y = load_iris(return_X_y=True, as_frame=True)

Rename of the features.

In [13]:
x.columns = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']

The original features' dataset is discretized using the equal frequency method. Each feature is mapped in a 3 (can be an arbitrary integer) new one-hot encoded sub-features representing 3 real intervals. So from the original 4 features we have a new 12 features dataset. S, M and L stand for small, medium and large.

In [14]:
iris_features = get_discrete_features_equal_frequency(x, output=False, bin_names=['S', 'M', 'L'])

for descrete_feature in iris_features:
    print(str(descrete_feature), end='\n\n')

SepalWidth = {'SepalWidth_0' if SepalWidth ∈ ]-∞, 2.8[, 'SepalWidth_1' if SepalWidth ∈ [2.8, 3.0], 'SepalWidth_2' if SepalWidth ∈ [3.0, 3.4], 'SepalWidth_3' if SepalWidth ∈ ]3.4, ∞[}

PetalLength = {'PetalLength_0' if PetalLength ∈ ]-∞, 1.6[, 'PetalLength_1' if PetalLength ∈ [1.6, 4.4], 'PetalLength_2' if PetalLength ∈ [4.4, 5.1], 'PetalLength_3' if PetalLength ∈ ]5.1, ∞[}

PetalWidth = {'PetalWidth_0' if PetalWidth ∈ ]-∞, 0.3[, 'PetalWidth_1' if PetalWidth ∈ [0.3, 1.3], 'PetalWidth_2' if PetalWidth ∈ [1.3, 1.8], 'PetalWidth_3' if PetalWidth ∈ ]1.8, ∞[}

SepalLength = {'SepalLength_0' if SepalLength ∈ ]-∞, 5.1[, 'SepalLength_1' if SepalLength ∈ [5.1, 5.8], 'SepalLength_2' if SepalLength ∈ [5.8, 6.4], 'SepalLength_3' if SepalLength ∈ ]6.4, ∞[}



Reassign features' data to the discretized one.

In [5]:
x = get_discrete_dataset(x, iris_features)
x

Unnamed: 0,PetalLength_0,PetalLength_1,PetalLength_2,PetalWidth_0,PetalWidth_1,PetalWidth_2,SepalLength_0,SepalLength_1,SepalLength_2,SepalWidth_0,SepalWidth_1,SepalWidth_2
0,1,0,0,1,0,0,1,0,0,0,0,1
1,1,0,0,1,0,0,1,0,0,0,1,0
2,1,0,0,1,0,0,1,0,0,0,1,0
3,1,0,0,1,0,0,1,0,0,0,1,0
4,1,0,0,1,0,0,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
145,0,0,1,0,0,1,0,0,1,0,1,0
146,0,0,1,0,0,1,0,1,0,1,0,0
147,0,0,1,0,0,1,0,0,1,0,1,0
148,0,0,1,0,0,1,0,1,0,0,0,1


Replace integer indices with the corresponding string class.

In [6]:
y = pd.DataFrame(y).replace({"target": {0: 'setosa', 1: 'virginica', 2: 'versicolor'}})
y

Unnamed: 0,target
0,setosa
1,setosa
2,setosa
3,setosa
4,setosa
...,...
145,versicolor
146,versicolor
147,versicolor
148,versicolor


The final dataset:

In [7]:
dataset = x.join(y)
dataset.columns = [*dataset.columns[:-1], 'iris']
dataset

Unnamed: 0,PetalLength_0,PetalLength_1,PetalLength_2,PetalWidth_0,PetalWidth_1,PetalWidth_2,SepalLength_0,SepalLength_1,SepalLength_2,SepalWidth_0,SepalWidth_1,SepalWidth_2,iris
0,1,0,0,1,0,0,1,0,0,0,0,1,setosa
1,1,0,0,1,0,0,1,0,0,0,1,0,setosa
2,1,0,0,1,0,0,1,0,0,0,1,0,setosa
3,1,0,0,1,0,0,1,0,0,0,1,0,setosa
4,1,0,0,1,0,0,1,0,0,0,0,1,setosa
...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,0,0,1,0,0,1,0,0,1,0,1,0,versicolor
146,0,0,1,0,0,1,0,1,0,1,0,0,versicolor
147,0,0,1,0,0,1,0,0,1,0,1,0,versicolor
148,0,0,1,0,0,1,0,1,0,0,0,1,versicolor


Split between train and test set in a reproducible way.

In [8]:
train, test = train_test_split(dataset, test_size=0.5, random_state=0)

We use as predictor a KNN with K = 7 and we train it.

In [9]:
predictor = KNeighborsClassifier(n_neighbors=7)
predictor.fit(train.iloc[:, :-1], train.iloc[:, -1])

KNeighborsClassifier(n_neighbors=7)

We create an extractor that uses the REAL algorithm and we extract prolog rules from our trained KNN.

In [10]:
real = Extractor.real(predictor, iris_features)
theory_from_real = real.extract(train)
print('REAL extracted rules:\n\n' + pretty_theory(theory_from_real))

REAL extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    PetalWidth =< 1.0, SepalLength =< 5.4.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    PetalWidth =< 1.0, SepalWidth > 3.2.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa) :-
    PetalLength =< 3.0, SepalLength =< 5.4.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    SepalLength > 6.3, SepalWidth in [2.9, 3.2].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalWidth > 1.6.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    PetalLength in [3.0, 4.9], SepalWidth =< 2.9.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    PetalWidth in [1.0, 1.6].


We create a different extractor that use Trepan algorithm and we extract prolog rules from the same KNN.

In [11]:
trepan = Extractor.trepan(predictor, iris_features)
theory_from_trepan = trepan.extract(train)
print('\nTrepan extracted rules:\n\n' + pretty_theory(theory_from_trepan))


Trepan extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalLength > 3.0, PetalLength in [3.0, 4.9], PetalWidth > 1.6.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    PetalLength > 3.0, PetalLength in [3.0, 4.9].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalLength > 3.0.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa).
