In [None]:
import sys
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.datasets import load_svmlight_file
from sklearn import preprocessing
import pylab as pl
import matplotlib.pyplot as plt
import numpy as np

In [None]:
def main(data, K_value, distance, vet_acur, vet_confusion):

        # loads data
        #print ("Loading data...")
        X_data, y_data = load_svmlight_file(data)
        # splits data
        #print ("Spliting data...")
        X_train, X_test, y_train, y_test =  train_test_split(X_data, y_data, test_size=0.5, random_state = 5)
        # x vetor de características e Y labels
        X_train = X_train.toarray()
        X_test = X_test.toarray()

        # fazer a normalizacao dos dados #######
        #scaler = preprocessing.MinMaxScaler()
        #X_train = scaler.fit_transform(X_train_dense)
        #X_test = scaler.fit_transform(X_test_dense)
        
        # cria um kNN
        neigh = KNeighborsClassifier(n_neighbors=K_value, metric=distance)
        #treinamento
        #print ('Fitting knn')
        neigh.fit(X_train, y_train)

        # predicao do classificador
        #print ('Predicting...')
        y_pred = neigh.predict(X_test)

        # mostra o resultado do classificador na base de teste
        print ('Accuracy: ',  neigh.score(X_test, y_test))
        
        
        
        #salva no vetor de acurácias
        vet_acur.append(neigh.score(X_test, y_test))
        
        

        # cria a matriz de confusao
        cm = confusion_matrix(y_test, y_pred)
        
        #salva no vetor de confusoes
        vet_confusion.append(cm)
        
        
#         print (cm)
#         print(classification_report(y_test, y_pred, labels=[0,1,2,3,4,5,6,7,8,9]))

In [None]:
# archive = 'features_1.txt'

# main(archive)

In [None]:
vet_k = [1,2,3,4,5,6,7,8,9,10]

In [None]:
euclidean = []
euclidean_confusion = []

manhattan = []
manhattan_confusion = []

minkowski = []
minkowski_confusion = []

chebyshev = []
chebyshev_confusion = []

vet_acur = []
vet_confusion = []

In [None]:
method = ['euclidean', 'manhattan', 'minkowski', 'chebyshev']

In [None]:
arquivos = ['features_4.txt','features_2.txt','features_1.txt','features_3.txt','features_7.txt','features_6.txt','features_5.txt','features_10.txt','features_9.txt','features_8.txt']

for archive in arquivos:
    main(archive, 3, 'euclidean', vet_acur, vet_confusion)

In [None]:
archive = 'features_6.txt'

for distance in method:
    vet_acur = []
    vet_confusion = []
    for k_value in range(1,11,1):
        print(distance,': k =',k_value)
        main(archive, k_value, distance, vet_acur, vet_confusion)
    
    if distance == 'euclidean':
        euclidean = vet_acur.copy()
        euclidean_confusion = vet_confusion.copy()
    elif distance == 'manhattan':
        manhattan = vet_acur.copy()
        manhattan_confusion = vet_confusion.copy()
    elif distance == 'minkowski':
        minkowski = vet_acur.copy()
        minkowski_confusion = vet_confusion.copy()
    else:
        chebyshev = vet_acur.copy()
        chebyshev_confusion = vet_confusion.copy()

In [None]:
fig, ([ax1, ax2], [ax3, ax4]) = plt.subplots(2,2,figsize=(25,15), sharex=True)
#fig.suptitle('Comparativo da acurácia entre o uso de diferentes métricas de distâncias para diferentes tamanhos de redimensionamento das imagens ')

ax1.plot(vet_k, euclidean,'.-')
# ax1.set_xticklabels(x_labels, rotation=45, ha='right')
ax1.set_ylabel('Acurácia Euclidiana')
ax1.grid()


ax2.plot(vet_k, manhattan, '.-')
# ax2.set_xticklabels(x_labels, rotation=45, ha='right')
ax2.set_ylabel('Acurácia Manhattan')
ax2.grid()


ax3.plot(vet_k, minkowski, '.-')
# ax3.set_xticklabels(x_labels, rotation=45, ha='right')
ax3.set_ylabel('Acurácia Minkowski')
ax3.set_xlabel('K')
ax3.grid()


ax4.plot(vet_k, chebyshev, '.-')
# ax4.set_xticklabels(x_labels, rotation=45, ha='right')
ax4.set_ylabel('Acurácia Chebyshev')
ax4.set_xlabel('K')
ax4.grid()


plt.xticks(vet_k)
fig.tight_layout()
plt.show()
fig.savefig('k_variation.png', dpi=fig.dpi)

In [None]:
#https://stackoverflow.com/questions/43374920/how-to-automatically-annotate-maximum-value-in-pyplot
#https://matplotlib.org/stable/gallery/subplots_axes_and_figures/subplots_demo.html

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))

def annot_max(x,y, ax=None):
    xmax = x[np.argmax(y)]
    ymax = y.max()
    text= "x={:.3f}, y={:.3f}".format(xmax, ymax)
    if not ax:
        ax=plt.gca()
    bbox_props = dict(boxstyle="square,pad=0.3", fc="w", ec="k", lw=0.72)
    arrowprops=dict(arrowstyle="->",connectionstyle="angle,angleA=0,angleB=60")
    kw = dict(xycoords='data',textcoords="axes fraction",
              arrowprops=arrowprops, bbox=bbox_props, ha="right", va="top")
    ax.annotate(text, xy=(xmax, ymax), xytext=(0.94,0.96), **kw)

ax.plot(vet_k, chebyshev)


tempx = np.array(vet_k)
tempy = np.array(chebyshev)
annot_max(tempx,tempy)

ax.set(xlabel='K', ylabel='acurácia')
# ax.set_xticklabels(x_labels, rotation=45, ha='right')
ax.grid()

plt.xticks(vet_k)

fig.savefig("chebyshev.png")
plt.show()

In [None]:
print('-----MATRIZ DE CONFUSÃO (DISTÂNCIA EUCLIDIANA) -----')
print('- K=1\n', euclidean_confusion[0])
print('\n\n')
print('- K=10\n', euclidean_confusion[9])

In [None]:
print('-----MATRIZ DE CONFUSÃO (DISTÂNCIA DE MANHATTAN) -----')
print('- K=1\n', manhattan_confusion[0])
print('\n\n')
print('- K=10\n', manhattan_confusion[9])

In [None]:
print('-----MATRIZ DE CONFUSÃO (DISTÂNCIA DE MINKOWSKI) -----')
print('- K=1\n', minkowski_confusion[0])
print('\n\n')
print('- K=10\n', minkowski_confusion[9])

In [None]:
print('-----MATRIZ DE CONFUSÃO (DISTÂNCIA DE CHEBYSHEV) -----')
print('- K=5\n', chebyshev_confusion[4])
print('\n\n')
print('- K=7\n', chebyshev_confusion[6])