In [1]:
from psyke.utils.dataframe import get_discrete_features_equal_frequency
from psyke import Extractor
from psyke.utils.dataframe import get_discrete_dataset
from psyke.utils.logic import pretty_theory
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd

In [2]:
x, y = load_iris(return_X_y=True, as_frame=True)

In [3]:
x.columns = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']

In [4]:
iris_features = get_discrete_features_equal_frequency(x, output=False, bin_names=['S', 'M', 'L'])

for descrete_feature in iris_features:
    print(str(descrete_feature), end='\n\n')

PetalWidth = {'PetalWidth_S' if PetalWidth ∈ ]-∞, 1.0[, 'PetalWidth_M' if PetalWidth ∈ [1.0, 1.6], 'PetalWidth_L' if PetalWidth ∈ ]1.6, ∞[}

PetalLength = {'PetalLength_S' if PetalLength ∈ ]-∞, 3.0[, 'PetalLength_M' if PetalLength ∈ [3.0, 4.9], 'PetalLength_L' if PetalLength ∈ ]4.9, ∞[}

SepalWidth = {'SepalWidth_S' if SepalWidth ∈ ]-∞, 2.9[, 'SepalWidth_M' if SepalWidth ∈ [2.9, 3.2], 'SepalWidth_L' if SepalWidth ∈ ]3.2, ∞[}

SepalLength = {'SepalLength_S' if SepalLength ∈ ]-∞, 5.4[, 'SepalLength_M' if SepalLength ∈ [5.4, 6.3], 'SepalLength_L' if SepalLength ∈ ]6.3, ∞[}



In [5]:
x = get_discrete_dataset(x, iris_features)
x

Unnamed: 0,PetalLength_L,PetalLength_M,PetalLength_S,PetalWidth_L,PetalWidth_M,PetalWidth_S,SepalLength_L,SepalLength_M,SepalLength_S,SepalWidth_L,SepalWidth_M,SepalWidth_S
0,0,0,1,0,0,1,0,0,1,1,0,0
1,0,0,1,0,0,1,0,0,1,0,1,0
2,0,0,1,0,0,1,0,0,1,0,1,0
3,0,0,1,0,0,1,0,0,1,0,1,0
4,0,0,1,0,0,1,0,0,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
145,1,0,0,1,0,0,1,0,0,0,1,0
146,1,0,0,1,0,0,0,1,0,0,0,1
147,1,0,0,1,0,0,1,0,0,0,1,0
148,1,0,0,1,0,0,0,1,0,1,0,0


In [6]:
y = pd.DataFrame(y).replace({"target": {0: 'setosa', 1: 'virginica', 2: 'versicolor'}})
y

Unnamed: 0,target
0,setosa
1,setosa
2,setosa
3,setosa
4,setosa
...,...
145,versicolor
146,versicolor
147,versicolor
148,versicolor


In [7]:
dataset = x.join(y)
dataset.columns = [*dataset.columns[:-1], 'iris']
dataset

Unnamed: 0,PetalLength_L,PetalLength_M,PetalLength_S,PetalWidth_L,PetalWidth_M,PetalWidth_S,SepalLength_L,SepalLength_M,SepalLength_S,SepalWidth_L,SepalWidth_M,SepalWidth_S,iris
0,0,0,1,0,0,1,0,0,1,1,0,0,setosa
1,0,0,1,0,0,1,0,0,1,0,1,0,setosa
2,0,0,1,0,0,1,0,0,1,0,1,0,setosa
3,0,0,1,0,0,1,0,0,1,0,1,0,setosa
4,0,0,1,0,0,1,0,0,1,1,0,0,setosa
...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,1,0,0,1,0,0,1,0,0,0,1,0,versicolor
146,1,0,0,1,0,0,0,1,0,0,0,1,versicolor
147,1,0,0,1,0,0,1,0,0,0,1,0,versicolor
148,1,0,0,1,0,0,0,1,0,1,0,0,versicolor


In [8]:
train, test = train_test_split(dataset, test_size=0.5, random_state=0)

In [9]:
predictor = KNeighborsClassifier(n_neighbors=7)
predictor.fit(train.iloc[:, :-1], train.iloc[:, -1])

KNeighborsClassifier(n_neighbors=7)

In [10]:
real = Extractor.real(predictor, iris_features)
theory_from_real = real.extract(train)
print('REAL extracted rules:\n\n' + pretty_theory(theory_from_real))

REAL extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa).
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    SepalLength > 6.3, SepalWidth > 3.2.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalWidth > 1.6.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalLength > 4.9, SepalWidth in [2.9, 3.2].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    PetalLength in [3.0, 4.9].


In [11]:
trepan = Extractor.trepan(predictor, iris_features)
theory_from_trepan = trepan.extract(train)
print('\nTrepan extracted rules:\n\n' + pretty_theory(theory_from_trepan))


Trepan extracted rules:

iris(PetalLength, PetalWidth, SepalLength, SepalWidth, versicolor) :-
    PetalLength > 4.9.
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, virginica) :-
    PetalLength =< 4.9, PetalLength in [3.0, 4.9].
iris(PetalLength, PetalWidth, SepalLength, SepalWidth, setosa).
