# Evaluation of the Inaccuracy class

In [1]:
import numpy as np

from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_digits

In [2]:
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams

In [3]:
rcParams['figure.figsize'] = 10, 5

In [4]:
from fastautoml.fastautoml import Inaccuracy

## Inaccuracy of a Model

In [7]:
X, y = load_digits(return_X_y=True)

In [8]:
tree = DecisionTreeClassifier(min_samples_leaf=5)
tree.fit(X, y)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=5, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [9]:
inacc = Inaccuracy()
inacc.fit(X, y)

Inaccuracy()

In [11]:
inacc.inaccuracy_model(tree)

0.17094976238811344

We can also work directly with the set of predictions, instead of using the model. This allow us to compute the inaccuracy of models not implemented by scikit-learn.

In [13]:
pred = tree.predict(X)

In [14]:
inacc.inaccuracy_predictions(pred)

0.17094976238811344

## Compare score with inaccuracy

In [5]:
X, y = load_digits(return_X_y=True)

In [6]:
inacc = Inaccuracy()
inacc.fit(X, y)

Inaccuracy()

Train a collection of decision tree classifers with different levels of depth, compute a classical score and the new inaccuracy metric, and compare results.

In [None]:
scores       = list()
inaccuracies = list()

for i in range(1, 20):
    
    tree = DecisionTreeClassifier(max_depth=i, random_state=42)
    tree.fit(X, y)
    
    scores.append(1 - tree.score(X, y))
    inaccuracies.append(inacc.inaccuracy_model(tree))

In [None]:
plt.plot(range(1,20), scores, label="Score")
plt.plot(range(1,20), inaccuracies, label="Inaccuracy")
plt.ylabel("Error")
plt.xlabel("Tree Depth")
plt.legend()
plt.show()

## Adding errors

Study the behavior of score and inaccuracy when we introduce more errors in the dataset.

In [None]:
tree = DecisionTreeClassifier(min_samples_leaf=5, random_state=42)
tree.fit(X, y)

In [None]:
inacc.fit(X, y)
inacc.inaccuracy_model(tree)

In [None]:
1 - tree.score(X, y)

Let's see what happens if we make a hundred times the same error.

In [None]:
X2     = X.copy()
y2     = y.copy()
for i in np.arange(100):
    X2 = np.append(X2, [X[0]], axis=0)
    y2 = np.append(y2, (y[0]+1) % 10)

In [None]:
inacc.fit(X2, y2)
inacc.inaccuracy_model(tree)

In [None]:
1 - tree.score(X2, y2)

The theory of nescience states that making one hundred times the same error is not that bad. Let's see what happens if we make one hundred different errors.

In [None]:
X3     = X.copy()
y3     = y.copy()
for i in np.arange(100):
    index  = np.random.randint(X.shape[0])
    X3     = np.append(X3, [X[index]], axis=0)
    y3     = np.append(y3, (y[index]+1) % 10)

In [None]:
inacc.fit(X3, y3)
inacc.inaccuracy_model(tree)

In [None]:
1 - tree.score(X3, y3)

Making one hundred different errors is worse than making one hundred times the same error.

## Imbalanced dataset

Study the behaviour of score and inaccuracy in a highly imbalanced dataset.

In [None]:
from sklearn.datasets import make_classification

In [None]:
depth = list()
score = list()
inacc = list()

inaccuracy = Inaccuracy()

for i in np.arange(1, 100):
                    
    X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0,
                               class_sep=2, flip_y=0, weights=[0.95,0.05])

    inaccuracy.fit(X, y)
        
    tree = DecisionTreeClassifier(min_samples_leaf=i)
    tree.fit(X, y)

    depth.append(i)        
    score.append(1 - tree.score(X, y))
    inacc.append(inaccuracy.inaccuracy_model(tree))

In [None]:
plt.plot(depth, score, label="Score")
plt.plot(depth, inacc, label="Inaccuracy")
plt.title("Isotropic Gaussian Blobs")
plt.ylabel("Error")
plt.xlabel("Minimum Leaf Size")
plt.legend(loc='best')

As we can see, the scoring metric is not able to work with imbalanced datasets, a problem that is not suffered by the new inaccuracy metric.