In [13]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from config_algorithms import ACOConfig
from oqat import OQATClassifier, OQATModel
from imblearn.over_sampling import SMOTE
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.metrics import confusion_matrix


In [2]:
def pretty_print_confusion_matrix(confusion_matrix):
    for row in confusion_matrix:
        print(row)

In [3]:
# Read csv file and load it into a numpy array
# df = pd.read_csv('datasets/test2.csv')
df = pd.read_csv('datasets/hayes_roth.csv')

# Separate features and labels into two dataframes
X = df.drop('class', axis=1)
y = df['class']

# Define the feature type for each column (discrete or continuous)
column_names = X.columns.to_list()
column_types = ['cat', 'cat', 'cat']

# transform the dataframes into a numpy array
X = X.values
y = y.values

In [4]:
# Transform the string labels into integers
X = preprocessing.OrdinalEncoder().fit_transform(X)
y = preprocessing.LabelEncoder().fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

In [5]:
# Balance the training set

# print("Before SMOTE")
# print(pd.Series(y_train).value_counts())
# smote = SMOTE()
# X_train, y_train = smote.fit_resample(X_train, y_train)
# print("After SMOTE")
# print(pd.Series(y_train).value_counts())

In [6]:
# Run a classification tree algorithm from sklearn
tree_model = DecisionTreeClassifier()
tree_model.fit(X_train, y_train)
y_pred = tree_model.predict(X_test)
cf = confusion_matrix(y_test, y_pred)
print("Decision Tree")
pretty_print_confusion_matrix(cf)
print("Score: ", tree_model.score(X_test, y_test))
text_repr = tree.export_text(tree_model)
print(text_repr)

Decision Tree
[21  5  0]
[ 4 19  0]
[ 2  1 14]
Score:  0.8181818181818182
|--- feature_3 <= 2.50
|   |--- feature_2 <= 2.50
|   |   |--- feature_2 <= 0.50
|   |   |   |--- feature_1 <= 0.50
|   |   |   |   |--- class: 0
|   |   |   |--- feature_1 >  0.50
|   |   |   |   |--- feature_3 <= 0.50
|   |   |   |   |   |--- class: 0
|   |   |   |   |--- feature_3 >  0.50
|   |   |   |   |   |--- feature_3 <= 1.50
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- feature_3 >  1.50
|   |   |   |   |   |   |--- feature_0 <= 0.50
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- feature_0 >  0.50
|   |   |   |   |   |   |   |--- feature_0 <= 1.50
|   |   |   |   |   |   |   |   |--- feature_1 <= 1.50
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |--- feature_1 >  1.50
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- feature_0 >  1.50
|   |   |   |   |   |   |   |   |--- class: 0
|   |   |--- fe

In [7]:
# # Run the OQAT algorithm
aco_config = ACOConfig(algorithm="vertex-ac", cycles=20, ants=10, alpha=1, rho=0.99, tau_max=6., tau_min=0.01)
classifier = OQATClassifier(collision_strategy="random", heuristic="aco", heuristic_config=aco_config)
classifier.fit(X_train, y_train, column_names, column_types)
print(classifier.model)
y_pred = classifier.predict(X_test, column_names)
print(y_pred)
cf = classifier.confusion_matrix(y_pred, y_test)
print("OQAT")
pretty_print_confusion_matrix(cf)
print("Score: ", classifier.score(y_pred, y_test))

Model for class 0 created
Model for class 1 created
Model for class 2 created
{0: (([age=0.0] ∨ [age=2.0] ∨ [education=0.0]) ∧ ([age=0.0] ∨ [education=2.0] ∨ [marital=0.0] ∨ [marital=2.0]) ∧ ([hobby=2.0] ∨ [hobby=0.0] ∨ [age=1.0] ∨ [education=0.0] ∨ [marital=0.0]) ∧ ([hobby=0.0] ∨ [age=0.0] ∨ [age=1.0] ∨ [education=2.0] ∨ [education=0.0] ∨ [marital=2.0] ∨ [marital=1.0]) ∧ ([hobby=0.0] ∨ [age=2.0] ∨ [age=1.0] ∨ [education=2.0] ∨ [education=1.0] ∨ [marital=2.0] ∨ [marital=1.0]) ∧ ([hobby=1.0] ∨ [hobby=2.0] ∨ [age=2.0] ∨ [age=0.0] ∨ [education=1.0] ∨ [education=2.0] ∨ [marital=1.0] ∨ [marital=0.0]) ∧ ([hobby=2.0] ∨ [hobby=1.0] ∨ [age=1.0] ∨ [age=2.0] ∨ [education=1.0] ∨ [education=0.0] ∨ [marital=2.0] ∨ [marital=0.0]) ∧ ([hobby=0.0] ∨ [hobby=2.0] ∨ [age=0.0] ∨ [age=2.0] ∨ [education=1.0] ∨ [education=2.0] ∨ [marital=0.0] ∨ [marital=1.0]) ∧ ([hobby=1.0] ∨ [hobby=2.0] ∨ [age=1.0] ∨ [age=0.0] ∨ [education=2.0] ∨ [education=0.0] ∨ [marital=1.0] ∨ [marital=2.0]) ∧ ([hobby=1.0] ∨ [hobby=0.0] ∨ 

In [27]:
# array tu numpy array
y_pred_np = np.array(y_pred)
y_pred_filtered = y_pred_np[y_test == 2]
X_test_filtered = X_test[y_test == 2]

y_pred_filtered_2 = y_pred_filtered[y_pred_filtered != 2]
X_test_filtered_2 = X_test_filtered[y_pred_filtered != 2]

print(X_test_filtered_2)
print(y_pred_filtered_2)

print(classifier.model[2])
y_pred_new = classifier.model[0].predict(X_test_filtered_2, column_names)
print(y_pred_new)
y_pred_new = classifier.model[1].predict(X_test_filtered_2, column_names)
print(y_pred_new)
y_pred_new = classifier.model[2].predict(X_test_filtered_2, column_names)
print(y_pred_new)


[[2. 3. 0. 2.]
 [2. 2. 3. 1.]
 [2. 1. 1. 3.]]
[0 1 1]
(([age=3.0] ∨ [education=3.0] ∨ [marital=3.0]))
[True, False, False]
[False, True, True]
[True, True, True]


In [28]:
classifier.collision_strategy = None
y_pred_2 = classifier.predict(X_test, column_names)
print(y_pred_2)

[{1}, {1}, set(), set(), {0}, {1}, {1}, {0}, {2}, {0}, {0}, {1}, {0}, {2}, {2}, {0}, {0}, {0}, {1}, {0, 2}, {1}, {0}, {0}, {2}, {2}, {1}, {0, 2}, {1}, {0}, {0}, {0}, {0, 2}, {2}, {1}, {2}, {0}, {1}, {0, 1}, {0}, {2}, {0}, {1}, {0}, {1}, {0, 2}, {0}, {0, 1}, {0}, {0}, {0}, {1}, {1}, set(), {0}, {1}, {0}, set(), {0}, {1, 2}, {2}, {0}, {0}, {1}, {2}, {2}, {1, 2}]
