In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from config_algorithms import ACOConfig
from oqat import OQATClassifier, OQATModel
from imblearn.over_sampling import SMOTE
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.metrics import confusion_matrix


In [2]:
SEED = 420

def pretty_print_confusion_matrix(confusion_matrix):
    for row in confusion_matrix:
        print(row)

In [3]:
# Read csv file and load it into a numpy array
# df = pd.read_csv('datasets/test2.csv')
# df = pd.read_csv('datasets/hayes_roth.csv')
df = pd.read_csv('datasets/car.csv')


# Separate features and labels into two dataframes
X = df.drop('class', axis=1)
y = df['class']

# Define the feature type for each column (discrete or continuous)
column_names = X.columns.to_list()
column_types = ['cat', 'cat', 'cat']

# transform the dataframes into a numpy array
X = X.values
y = y.values

In [4]:
# Transform the string labels into integers
X = preprocessing.OrdinalEncoder().fit_transform(X)
y = preprocessing.LabelEncoder().fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=SEED)

In [5]:
# Balance the training set

print("Before SMOTE")
print(pd.Series(y_train).value_counts())
# smote = SMOTE()
# X_train, y_train = smote.fit_resample(X_train, y_train)
# print("After SMOTE")
# print(pd.Series(y_train).value_counts())

Before SMOTE
0    249
1     70
2     14
3     12
dtype: int64


In [6]:
# Run a classification tree algorithm from sklearn
tree_model = DecisionTreeClassifier()
tree_model.fit(X_train, y_train)
y_pred = tree_model.predict(X_test)
cf = confusion_matrix(y_test, y_pred)
print("Decision Tree")
pretty_print_confusion_matrix(cf)
print("Score: ", tree_model.score(X_test, y_test))
text_repr = tree.export_text(tree_model)
print(text_repr)

Decision Tree
[930  29   2   0]
[ 37 255  21   1]
[ 0  6 47  2]
[ 0 10  0 43]
Score:  0.9219088937093276
|--- feature_3 <= 0.50
|   |--- class: 0
|--- feature_3 >  0.50
|   |--- feature_5 <= 0.50
|   |   |--- class: 0
|   |--- feature_5 >  0.50
|   |   |--- feature_0 <= 1.50
|   |   |   |--- feature_1 <= 1.50
|   |   |   |   |--- feature_1 <= 0.50
|   |   |   |   |   |--- class: 0
|   |   |   |   |--- feature_1 >  0.50
|   |   |   |   |   |--- feature_0 <= 0.50
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- feature_0 >  0.50
|   |   |   |   |   |   |--- feature_5 <= 1.50
|   |   |   |   |   |   |   |--- feature_4 <= 0.50
|   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |--- feature_4 >  0.50
|   |   |   |   |   |   |   |   |--- feature_3 <= 1.50
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |--- feature_3 >  1.50
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- feature_5 >  1.50
|

In [7]:
# # Run the OQAT algorithm
aco_config = ACOConfig(algorithm="vertex-ac", cycles=20, ants=10, alpha=1, rho=0.99, tau_max=6., tau_min=0.01)
classifier = OQATClassifier(collision_strategy="random", heuristic="aco", heuristic_config=aco_config)
classifier.fit(X_train, y_train, column_names, column_types)
print(classifier.model)

Model for class 0 created
Score: 0.9134615384615384
Model for class 1 created
Score: 0.7692307692307693
Model for class 2 created
Score: 0.9807692307692307
Model for class 3 created
Score: 0.9615384615384616
{0: {'oqat_model': (([maint=1.0] ∨ [maint=0.0] ∨ [persons=0.0] ∨ [lug_boot=0.0] ∨ [safety=0.0]) ∧ ([buy=1.0] ∨ [buy=0.0] ∨ [maint=0.0] ∨ [doors=0.0] ∨ [persons=0.0] ∨ [safety=0.0]) ∧ ([buy=0.0] ∨ [maint=0.0] ∨ [doors=1.0] ∨ [doors=3.0] ∨ [persons=2.0] ∨ [persons=0.0] ∨ [safety=0.0]) ∧ ([buy=0.0] ∨ [buy=1.0] ∨ [maint=2.0] ∨ [maint=3.0] ∨ [maint=1.0] ∨ [persons=0.0] ∨ [lug_boot=0.0] ∨ [safety=0.0]) ∧ ([buy=2.0] ∨ [buy=1.0] ∨ [buy=3.0] ∨ [maint=1.0] ∨ [maint=0.0] ∨ [doors=0.0] ∨ [doors=1.0] ∨ [persons=0.0] ∨ [persons=2.0] ∨ [lug_boot=2.0] ∨ [lug_boot=1.0] ∨ [safety=0.0] ∨ [safety=1.0]) ∧ ([buy=0.0] ∨ [buy=3.0] ∨ [buy=2.0] ∨ [maint=2.0] ∨ [maint=3.0] ∨ [maint=0.0] ∨ [doors=3.0] ∨ [doors=2.0] ∨ [doors=0.0] ∨ [persons=1.0] ∨ [persons=0.0] ∨ [lug_boot=2.0] ∨ [lug_boot=1.0] ∨ [safety=0.0] 

In [8]:
y_pred = classifier.predict(X_test, column_names)
print(y_pred)
cf = classifier.confusion_matrix(y_pred, y_test)
print("OQAT")
pretty_print_confusion_matrix(cf)
print("Score: ", classifier.score(y_pred, y_test))

[1, 0, 2, 0, 2, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 2, 1, 1, 0, 0, 0, 0, 0, -1, 0, 0, 0, 1, -1, 0, 0, 1, 0, 0, 3, 0, 0, 0, -1, 0, 1, 0, 2, 0, 0, 1, 0, 3, 1, 0, 0, 0, 1, 0, 0, 0, -1, 1, 1, 1, 0, 1, 0, 2, 0, 0, -1, 1, 0, 0, 0, 1, -1, 0, 0, 0, 1, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0, -1, 0, 0, -1, 3, 0, 0, 0, 0, 0, 0, 0, -1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 2, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, -1, 0, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 1, 1, 1, 0, -1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, 0, 0, 1, 3, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1, 2, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 3, 0, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 1, 0, 0, -1, 0, 0, 0, 0, 1, 2, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 1, -1, 0, 1, 0, 3, 2, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0

In [9]:
classifier.collision_strategy = "other"
y_pred_2 = classifier.predict(X_test, column_names)
cf2 = classifier.confusion_matrix(y_pred_2, y_test)

In [10]:
classifier.collision_strategy = "best_score"
y_pred_3 = classifier.predict(X_test, column_names)
print(y_pred_3)
cf3 = classifier.confusion_matrix(y_pred_3, y_test)
pretty_print_confusion_matrix(cf3)
print("Score: ", classifier.score(y_pred_3, y_test))

[1, 0, 2, 0, 2, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 2, 1, 1, 0, 0, 0, 0, 0, -1, 2, 0, 0, 1, -1, 0, 0, 0, 0, 0, 3, 0, 0, 0, -1, 0, 1, 0, 2, 0, 0, 1, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, 1, 0, 0, 1, 2, 2, 0, 0, -1, 1, 0, 0, 0, 1, -1, 0, 0, 0, 1, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 2, 1, 1, 0, 0, -1, 0, 0, -1, 3, 0, 0, 0, 0, 2, 0, 0, -1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, -1, 2, 0, 0, 2, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, -1, 0, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 1, 1, 1, 0, -1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 3, 1, 1, 0, 0, 3, 3, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1, 2, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 2, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 1, 0, 0, -1, 0, 0, 0, 0, 1, 2, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 2, -1, 0, 1, 0, 3, 2, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0

In [11]:
classifier.collision_strategy = "weights"
weights = classifier.predict(X_test, column_names)

classifier.collision_strategy = "weighted"
y_pred_4 = classifier.predict(X_test, column_names)
print(y_pred_4)
cf4 = classifier.confusion_matrix(y_pred_4, y_test)
pretty_print_confusion_matrix(cf4)
print("Score: ", classifier.score(y_pred_4, y_test))

[1, 0, 3, 0, 2, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 2, 1, 1, 0, 0, 0, 0, 1, 2, 2, 0, 0, 1, 2, 0, 0, 1, 0, 0, 3, 0, 0, 0, 2, 0, 1, 0, 2, 0, 0, 1, 0, 3, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 2, 2, 0, 0, 3, 1, 0, 1, 1, 1, 2, 0, 0, 0, 1, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 2, 1, 1, 0, 0, 3, 0, 0, 2, 3, 0, 0, 0, 0, 2, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 2, 3, 2, 0, 0, 2, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 3, 1, 0, 0, 2, 0, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 3, 1, 1, 0, 0, 3, 3, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 2, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 2, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 3, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 3, 0, 0, 0, 0, 1, 2, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 3, 0, 1, 0, 3, 2, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 

In [12]:
classifier.collision_strategy = "weights_norm"
weights_norm = classifier.predict(X_test, column_names)

classifier.collision_strategy = "weighted_norm"
y_pred_5 = classifier.predict(X_test, column_names)
cf5 = classifier.confusion_matrix(y_pred_5, y_test)
pretty_print_confusion_matrix(cf5)
print("Score: ", classifier.score(y_pred_5, y_test))

[929, 32, 0, 0]
[60, 218, 15, 21]
[0, 34, 19, 2]
[0, 23, 4, 26]
Score:  0.8618944323933478


In [13]:

for learning_class, model in classifier.model.items():
    print(f'Class: {learning_class}')
    print(f'Score: {model["score"]}')
    for i, clause in enumerate(model['oqat_model'].model.clauses):
        print(f'\t{model["cnf_weights"][i]: ^4} Clause: {clause}')

print(f'    X                 y_test   random     score    weighted   weighted_norm   sets')
for i in range(len(y_test)):
    print(X_test[i], f'{y_test[i]: ^9}', f'{y_pred[i]: ^9}', f'{y_pred_3[i]: ^9}', f'{y_pred_4[i]: ^9}', f'{y_pred_5[i]: ^9}', f'{weights[i]}', f'{weights_norm[i]}', f'{y_pred_2[i]}')

Class: 0
Score: 0.9134615384615384
	 32  Clause: ([maint=1.0] ∨ [maint=0.0] ∨ [persons=0.0] ∨ [lug_boot=0.0] ∨ [safety=0.0])
	 15  Clause: ([buy=1.0] ∨ [buy=0.0] ∨ [maint=0.0] ∨ [doors=0.0] ∨ [persons=0.0] ∨ [safety=0.0])
	 7   Clause: ([buy=0.0] ∨ [maint=0.0] ∨ [doors=1.0] ∨ [doors=3.0] ∨ [persons=2.0] ∨ [persons=0.0] ∨ [safety=0.0])
	 6   Clause: ([buy=0.0] ∨ [buy=1.0] ∨ [maint=2.0] ∨ [maint=3.0] ∨ [maint=1.0] ∨ [persons=0.0] ∨ [lug_boot=0.0] ∨ [safety=0.0])
	 3   Clause: ([buy=2.0] ∨ [buy=1.0] ∨ [buy=3.0] ∨ [maint=1.0] ∨ [maint=0.0] ∨ [doors=0.0] ∨ [doors=1.0] ∨ [persons=0.0] ∨ [persons=2.0] ∨ [lug_boot=2.0] ∨ [lug_boot=1.0] ∨ [safety=0.0] ∨ [safety=1.0])
	 1   Clause: ([buy=0.0] ∨ [buy=3.0] ∨ [buy=2.0] ∨ [maint=2.0] ∨ [maint=3.0] ∨ [maint=0.0] ∨ [doors=3.0] ∨ [doors=2.0] ∨ [doors=0.0] ∨ [persons=1.0] ∨ [persons=0.0] ∨ [lug_boot=2.0] ∨ [lug_boot=1.0] ∨ [safety=0.0] ∨ [safety=1.0])
Class: 1
Score: 0.7692307692307693
	 92  Clause: ([safety=1.0] ∨ [safety=2.0])
	 56  Clause: ([persons=