In [23]:
import pandas as pd
import json

#  plot confusion matrices
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier

import sys
sys.path.insert(1, '../')
from py_oqat.config_algorithms import ACOConfig
from py_oqat.classifier import OQATClassifier

In [24]:
aco_config = ACOConfig(algorithm="vertex-ac", cycles=20, ants=10, alpha=1, rho=0.99, tau_max=6., tau_min=0.01)
classifier = OQATClassifier(collision_strategy="random", heuristic="aco", heuristic_config=aco_config)

# open model from json
with open("trained_models/cars_model.json", "r") as f:
    model_json = json.load(f)
    classifier.model_from_json(model_json)

print("Model loaded")
for learning_class, model in classifier.model.items():
    print(f'Class: {learning_class}')
    print(f'Score: {model["score"]}')
    for i, clause in enumerate(model['oqat_model'].model.clauses):
        print(f'\t{model["cnf_weights"][i]: ^4} Clause: {clause}')


Model loaded
Class: 0
Score: 0.9461538461538461
	 69  Clause: ([buy=0.0] ∨ [buy=1.0] ∨ [doors=0.0] ∨ [persons=0.0] ∨ [lug_boot=0.0] ∨ [safety=0.0])
	 40  Clause: ([maint=0.0] ∨ [maint=1.0] ∨ [doors=0.0] ∨ [persons=0.0] ∨ [safety=0.0] ∨ [safety=1.0])
	 17  Clause: ([buy=0.0] ∨ [maint=0.0] ∨ [persons=0.0] ∨ [lug_boot=0.0] ∨ [lug_boot=1.0] ∨ [safety=0.0])
	 13  Clause: ([buy=0.0] ∨ [buy=1.0] ∨ [maint=0.0] ∨ [maint=1.0] ∨ [persons=0.0] ∨ [lug_boot=2.0] ∨ [safety=2.0] ∨ [safety=0.0])
	 12  Clause: ([buy=0.0] ∨ [maint=2.0] ∨ [maint=0.0] ∨ [maint=3.0] ∨ [persons=0.0] ∨ [lug_boot=2.0] ∨ [safety=0.0] ∨ [safety=1.0])
	 8   Clause: ([buy=2.0] ∨ [buy=3.0] ∨ [maint=1.0] ∨ [maint=0.0] ∨ [doors=0.0] ∨ [doors=1.0] ∨ [persons=0.0] ∨ [lug_boot=0.0] ∨ [safety=2.0] ∨ [safety=0.0])
	 7   Clause: ([buy=1.0] ∨ [buy=0.0] ∨ [maint=1.0] ∨ [maint=3.0] ∨ [doors=2.0] ∨ [persons=0.0] ∨ [safety=0.0] ∨ [safety=1.0])
	 5   Clause: ([buy=2.0] ∨ [maint=0.0] ∨ [maint=1.0] ∨ [doors=3.0] ∨ [doors=1.0] ∨ [doors=2.0] ∨ [pers

In [25]:
SEED = 420

# load data
df = pd.read_csv("datasets/car.csv")

# Separate features and labels into two dataframes
X = df.drop('class', axis=1)
y = df['class']

# Define the feature type for each column (discrete or continuous)
column_names = X.columns.to_list()
column_types = ['cat', 'cat', 'cat']

# transform the dataframes into a numpy array
X = X.values
y = y.values

# Transform the string labels into integers
X = preprocessing.OrdinalEncoder().fit_transform(X)
y = preprocessing.LabelEncoder().fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=SEED)

# Classes distribution
print("For training")
print(pd.Series(y_train).value_counts())
print("For testing")
print(pd.Series(y_test).value_counts())


For training
0    609
1    189
2     34
3     32
dtype: int64
For testing
0    601
1    195
2     35
3     33
dtype: int64


In [26]:
def pretty_print_confusion_matrix(confusion_matrix):
    for row in confusion_matrix:
        print(row)

In [27]:
# Run a classification tree algorithm from sklearn
tree_model = DecisionTreeClassifier()
tree_model.fit(X_train, y_train)
y_pred = tree_model.predict(X_test)
cf = confusion_matrix(y_test, y_pred)
print("Decision Tree")
pretty_print_confusion_matrix(cf)
print("Score: ", tree_model.score(X_test, y_test))

Decision Tree
[593   8   0   0]
[ 13 173   8   1]
[ 1  6 28  0]
[ 0  2  0 31]
Score:  0.9548611111111112


In [28]:
y_pred = classifier.predict(X_test, column_names)
cf = classifier.confusion_matrix(y_pred, y_test)
print("OQAT")
pretty_print_confusion_matrix(cf)
print("Score: ", classifier.score(y_pred, y_test))

OQAT
[579, 10, 4, 0]
[10, 166, 7, 8]
[0, 8, 24, 3]
[0, 4, 2, 27]
Score:  0.9342723004694836


In [29]:
classifier.collision_strategy = "other"
y_pred_2 = classifier.predict(X_test, column_names)
cf2 = classifier.confusion_matrix(y_pred_2, y_test)

In [30]:
classifier.collision_strategy = "best_score"
y_pred_3 = classifier.predict(X_test, column_names)
cf3 = classifier.confusion_matrix(y_pred_3, y_test)
pretty_print_confusion_matrix(cf3)
print("Score: ", classifier.score(y_pred_3, y_test))

[585, 3, 5, 0]
[26, 149, 7, 9]
[0, 0, 35, 0]
[0, 1, 2, 30]
Score:  0.937793427230047


In [31]:
classifier.collision_strategy = "weights"
weights = classifier.predict(X_test, column_names)

classifier.collision_strategy = "weighted"
y_pred_4 = classifier.predict(X_test, column_names)
cf4 = classifier.confusion_matrix(y_pred_4, y_test)
pretty_print_confusion_matrix(cf4)
print("Score: ", classifier.score(y_pred_4, y_test))

[579, 17, 5, 0]
[3, 175, 8, 9]
[0, 0, 29, 6]
[0, 1, 2, 30]
Score:  0.9409722222222222


In [32]:
classifier.collision_strategy = "weights_norm"
weights_norm = classifier.predict(X_test, column_names)

classifier.collision_strategy = "weighted_norm"
y_pred_5 = classifier.predict(X_test, column_names)
cf5 = classifier.confusion_matrix(y_pred_5, y_test)
pretty_print_confusion_matrix(cf5)
print("Score: ", classifier.score(y_pred_5, y_test))

[593, 4, 4, 0]
[26, 159, 4, 6]
[4, 10, 15, 6]
[0, 9, 2, 22]
Score:  0.9131944444444444


In [33]:
print(f'    X                 y_test   random     score    weighted   weighted_norm   sets')
for i in range(len(y_test)):
    print(X_test[i], f'{y_test[i]: ^9}', f'{y_pred[i]: ^9}', f'{y_pred_3[i]: ^9}', f'{y_pred_4[i]: ^9}', f'{y_pred_5[i]: ^9}', f'{weights[i]}', f'{weights_norm[i]}', f'{y_pred_2[i]}')

    X                 y_test   random     score    weighted   weighted_norm   sets
[3. 1. 1. 1. 2. 1.]     1         1         1         1         1     {0: array([92.]), 1: array([471.]), 2: array([552.]), 3: array([188.])} {0: array([0.51685393]), 1: array([1.]), 2: array([0.95172414]), 3: array([0.32246998])} {1}
[2. 1. 1. 0. 2. 2.]     0         0         0         0         0     {0: array([178.]), 1: array([261.]), 2: array([139.]), 3: array([534.])} {0: array([1.]), 1: array([0.55414013]), 2: array([0.23965517]), 3: array([0.91595197])} {0}
[3. 1. 3. 2. 0. 2.]     1         1         3         3         1     {0: array([166.]), 1: array([471.]), 2: array([552.]), 3: array([583.])} {0: array([0.93258427]), 1: array([1.]), 2: array([0.95172414]), 3: array([1.])} {1, 3}
[3. 1. 0. 0. 0. 1.]     0         0         0         0         0     {0: array([178.]), 1: array([261.]), 2: array([462.]), 3: array([156.])} {0: array([1.]), 1: array([0.55414013]), 2: array([0.79655172]), 3: arra