In [51]:
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from sklearn import datasets, model_selection, metrics
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

In [3]:
data = pd.read_csv('ecoli.data', names=['f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'class'], sep='\s+')
data.head()

Unnamed: 0,f1,f2,f3,f4,f5,f6,f7,f8,class
0,AAT_ECOLI,0.49,0.29,0.48,0.5,0.56,0.24,0.35,cp
1,ACEA_ECOLI,0.07,0.4,0.48,0.5,0.54,0.35,0.44,cp
2,ACEK_ECOLI,0.56,0.4,0.48,0.5,0.49,0.37,0.46,cp
3,ACKA_ECOLI,0.59,0.49,0.48,0.5,0.52,0.45,0.36,cp
4,ADI_ECOLI,0.23,0.32,0.48,0.5,0.55,0.25,0.35,cp


#### train test split

In [17]:
data = data.sample(frac=1)
X = np.array(data.iloc[ : , 1:8])
y = np.array(data['class'])
cut = round(len(y) * .8)
x_train = X[ :cut]
y_train = y[ :cut]
x_test = X[cut: ]
y_test = y[cut: ]

In [18]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=8)
knn.fit(x_train, y_train)

y_pred = []
y_pred = knn.predict(x_test)
c = metrics.confusion_matrix(y_test, y_pred)
print(c)

[[27  0  0  0  0  1]
 [ 2 12  0  0  0  1]
 [ 0  7  6  0  0  0]
 [ 0  0  0  2  0  0]
 [ 0  0  0  0  1  0]
 [ 0  0  0  0  0  8]]


### precision score

In [65]:
labels = np.unique(y_test)
for i, label in enumerate(labels):
    print(f'class{i} manual precision:   ', c[i][i] / c[:,i].sum())
    print(f'class{i} sklearn precision: ', precision_score(y_test, y_pred, labels=[label], average=None))
    print('')

class0 manual precision:    0.9310344827586207
class0 sklearn precision:  [0.93103448]

class1 manual precision:    0.631578947368421
class1 sklearn precision:  [0.63157895]

class2 manual precision:    1.0
class2 sklearn precision:  [1.]

class3 manual precision:    1.0
class3 sklearn precision:  [1.]

class4 manual precision:    1.0
class4 sklearn precision:  [1.]

class5 manual precision:    0.8
class5 sklearn precision:  [0.8]



### recall score

In [66]:
for i, label in enumerate(labels):
    print(f'class{i} manual recall:   ', c[i][i] / c[i].sum())
    print(f'class{i} sklearn recall: ', recall_score(y_test, y_pred, labels=[label], average=None))
    print('')

class0 manual recall:    0.9642857142857143
class0 sklearn recall:  [0.96428571]

class1 manual recall:    0.8
class1 sklearn recall:  [0.8]

class2 manual recall:    0.46153846153846156
class2 sklearn recall:  [0.46153846]

class3 manual recall:    1.0
class3 sklearn recall:  [1.]

class4 manual recall:    1.0
class4 sklearn recall:  [1.]

class5 manual recall:    1.0
class5 sklearn recall:  [1.]

