In [1]:
from psyke.utils.dataframe_utils import get_discrete_features_equal_frequency
from psyke import Extractor
from psyke.utils.dataframe_utils import get_discrete_dataset
from psyke.utils.logic_utils import pretty_theory
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd

In [2]:
x, y = load_iris(return_X_y=True, as_frame=True)

In [3]:
x.columns = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']

In [4]:
iris_features = get_discrete_features_equal_frequency(x, b=3, output=False)
iris_features

{<psyke.schema.DiscreteFeature at 0x7f3ebf2f22e0>,
 <psyke.schema.DiscreteFeature at 0x7f3ebf2f2550>,
 <psyke.schema.DiscreteFeature at 0x7f3ebf2f27f0>,
 <psyke.schema.DiscreteFeature at 0x7f3ebfc18f40>}

In [5]:
x = get_discrete_dataset(x, iris_features)
x

Unnamed: 0,PetalLength_0,PetalLength_1,PetalLength_2,PetalWidth_0,PetalWidth_1,PetalWidth_2,SepalLength_0,SepalLength_1,SepalLength_2,SepalWidth_0,SepalWidth_1,SepalWidth_2
0,1,0,0,1,0,0,1,0,0,0,0,1
1,1,0,0,1,0,0,1,0,0,0,1,0
2,1,0,0,1,0,0,1,0,0,0,1,0
3,1,0,0,1,0,0,1,0,0,0,1,0
4,1,0,0,1,0,0,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
145,0,0,1,0,0,1,0,0,1,0,1,0
146,0,0,1,0,0,1,0,1,0,1,0,0
147,0,0,1,0,0,1,0,0,1,0,1,0
148,0,0,1,0,0,1,0,1,0,0,0,1


In [6]:
y = pd.DataFrame(y).replace({"target": {0: 'setosa', 1: 'virginica', 2: 'versicolor'}})
y

Unnamed: 0,target
0,setosa
1,setosa
2,setosa
3,setosa
4,setosa
...,...
145,versicolor
146,versicolor
147,versicolor
148,versicolor


In [7]:
dataset = x.join(y)
dataset.columns = [*dataset.columns[:-1], 'iris']
dataset

Unnamed: 0,PetalLength_0,PetalLength_1,PetalLength_2,PetalWidth_0,PetalWidth_1,PetalWidth_2,SepalLength_0,SepalLength_1,SepalLength_2,SepalWidth_0,SepalWidth_1,SepalWidth_2,iris
0,1,0,0,1,0,0,1,0,0,0,0,1,setosa
1,1,0,0,1,0,0,1,0,0,0,1,0,setosa
2,1,0,0,1,0,0,1,0,0,0,1,0,setosa
3,1,0,0,1,0,0,1,0,0,0,1,0,setosa
4,1,0,0,1,0,0,1,0,0,0,0,1,setosa
...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,0,0,1,0,0,1,0,0,1,0,1,0,versicolor
146,0,0,1,0,0,1,0,1,0,1,0,0,versicolor
147,0,0,1,0,0,1,0,0,1,0,1,0,versicolor
148,0,0,1,0,0,1,0,1,0,0,0,1,versicolor


In [8]:
train, test = train_test_split(dataset, test_size=0.5, random_state=0)

In [9]:
predictor = KNeighborsClassifier(n_neighbors=7)
predictor.fit(train.iloc[:, :-1], train.iloc[:, -1])

KNeighborsClassifier(n_neighbors=7)

In [12]:
real = Extractor.real(predictor, iris_features)
theory_from_real = real.extract(train)
print('REAL extracted rules:\n\n' + pretty_theory(theory_from_real))

REAL extracted rules:

iris(PetalLength9, PetalWidth9, SepalLength9, SepalWidth9, setosa) :- PetalWidth9 =< 1.0.
iris(PetalLength10, PetalWidth10, SepalLength10, SepalWidth10, versicolor) :- PetalLength10 > 4.9, SepalWidth10 in [2.9, 3.2].
iris(PetalLength11, PetalWidth11, SepalLength11, SepalWidth11, versicolor) :- PetalWidth11 > 1.6.
iris(PetalLength12, PetalWidth12, SepalLength12, SepalWidth12, virginica) :- SepalWidth12 =< 2.9.
iris(PetalLength13, PetalWidth13, SepalLength13, SepalWidth13, virginica) :- SepalLength13 in [5.4, 6.3].
iris(PetalLength14, PetalWidth14, SepalLength14, SepalWidth14, virginica) :- PetalWidth14 in [1.0, 1.6].


In [13]:
trepan = Extractor.trepan(predictor, iris_features)
theory_from_trepan = trepan.extract(train)
print('\nTrepan extracted rules:\n\n' + pretty_theory(theory_from_trepan))


Trepan extracted rules:

iris(PetalLength15, PetalWidth15, SepalLength15, SepalWidth15, virginica) :- PetalLength15 > 3.0, PetalLength15 in [3.0, 4.9].
iris(PetalLength16, PetalWidth16, SepalLength16, SepalWidth16, versicolor) :- PetalLength16 > 3.0.
iris(PetalLength17, PetalWidth17, SepalLength17, SepalWidth17, setosa) :- true.
