# Classifying digits dataset with PolyACO+

In [14]:
from sklearn import datasets, neighbors, linear_model
from sklearn.cross_validation import train_test_split
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

import sys
import os.path as osp
import time
sys.path.append(osp.dirname(osp.dirname(osp.abspath('__file__'))))

import acoc
import utils
from utils import data_manager
from config import CLASSIFIER_CONFIG

In [15]:
data_set = data_manager.load_data_set('iris')

In [16]:
X = data_set.data
y = data_set.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

class_indices = list(set(y))

## SVM classification

In [30]:
r = []
for _ in range(10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
    clf = SVC(gamma=0.001)
    clf.fit(X_train, y_train)
    r.append(clf.score(X_test, y_test) * 100)
print("Classification score: {}".format(sum(r) / len(r)))

Classification score: 96.0


### LinearSVM

In [None]:
r = []
for _ in range(100):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
    clf = LinearSVC()
    clf.fit(X_train, y_train)
    r.append(clf.score(X_test, y_test) * 100)
print("Classification score: {}".format(sum(r) / len(r)))

## Naive Bayes classification

In [None]:
r = []
for _ in range(100):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
    clf = neighbors.KNeighborsClassifier()
    clf.fit(X_train, y_train)
    r.append(clf.score(X_test, y_test)*100)
print("Classification score: {}".format(sum(r) / len(r)))

## Logistic regression

In [None]:
r = []
for _ in range(100):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
    clf = linear_model.LogisticRegression()
    clf.fit(X_train, y_train)
    r.append(clf.score(X_test, y_test)*100)
print("Classification score: {}".format(sum(r) / len(r)))

## PolyACO+ classification

In [None]:
CLASSIFIER_CONFIG.max_level = 3
CLASSIFIER_CONFIG.level_convergence_rate = 200

In [None]:
clf = acoc.PolyACO(X.shape[1], class_indices, CLASSIFIER_CONFIG)
clf.train(X_train, y_train)
predictions = clf.evaluate(X_test)
print("\rClassification score: {}".format(acoc.compute_score(predictions, y_test)))

# Matrix

In [31]:
def plot_confusion_matrix(cm, title='Confusion matrix', cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(iris.target_names))
    plt.xticks(tick_marks, iris.target_names, rotation=45)
    plt.yticks(tick_marks, iris.target_names)
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')


# Compute confusion matrix
cm = confusion_matrix(y_test, predictions)
np.set_printoptions(precision=2)
print('Confusion matrix, without normalization')
print(cm)
plt.figure()
plot_confusion_matrix(cm)

# Normalize the confusion matrix by row (i.e by the number of samples
# in each class)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print('Normalized confusion matrix')
print(cm_normalized)
plt.figure()
plot_confusion_matrix(cm_normalized, title='Normalized confusion matrix')

plt.show()

ValueError: Found arrays with inconsistent numbers of samples: [10 50]