# KNN Test

Testando 30 rodadas com algoritmo KNN no **Iris dataset** utilizando a biblioteca `scikit-learn`.

## Importando Bibliotecas

In [1]:
import pandas as pd

from random import randint
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, recall_score

## Carregando _Digits DataSet_

In [2]:
iris_ds = load_iris()
iris_data, iris_target = load_iris(return_X_y=True)

## Rodadas de Teste

In [3]:
k_vizinhos = int(len(iris_data) ** 0.5)

iris_accuracy = []
iris_recall = []

for rodada in range(30):
    # Separando dados
    semente = randint(0, 8001)
    data_train, data_test, target_train, target_test = train_test_split(iris_data, iris_target, test_size=0.3, random_state=semente)

    # Instanciando classificador
    classifier = KNeighborsClassifier(n_neighbors=k_vizinhos)

    # Treinando modelo
    classifier.fit(data_train, target_train)

    # Realizando teste de classificação
    prediction = classifier.predict(data_test)

    # Resultados
    iris_accuracy.append(accuracy_score(target_test, prediction))

    iris_recall.append(recall_score(target_test, prediction, average=None))

    print("\n- RODADA {turn} - Semente {seed} -\n".format(turn=rodada, seed=semente))
    print(classification_report(target_test, prediction, target_names=iris_ds.target_names))
    print(confusion_matrix(target_test, prediction))



- RODADA 0 - Semente 761 -

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        16
  versicolor       1.00      0.94      0.97        16
   virginica       0.93      1.00      0.96        13

    accuracy                           0.98        45
   macro avg       0.98      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45

[[16  0  0]
 [ 0 15  1]
 [ 0  0 13]]

- RODADA 1 - Semente 6437 -

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        14
  versicolor       0.94      0.94      0.94        17
   virginica       0.93      0.93      0.93        14

    accuracy                           0.96        45
   macro avg       0.96      0.96      0.96        45
weighted avg       0.96      0.96      0.96        45

[[14  0  0]
 [ 0 16  1]
 [ 0  1 13]]

- RODADA 2 - Semente 1735 -

              precision    recall  f1-score   support

      setosa       1

## Lista de Acurácia (_accuracy_) por rodada

In [4]:
# Acurácia formatada para exibir 3 casas decimais
accuracy_formatada = [round(acc, 3) for acc in iris_accuracy]

accuracy_df = pd.DataFrame(data=iris_accuracy, columns=['Acurácia'])
accuracy_df['Acurácia formatada'] = accuracy_formatada
accuracy_df

Unnamed: 0,Acurácia,Acurácia formatada
0,0.977778,0.978
1,0.955556,0.956
2,0.933333,0.933
3,1.0,1.0
4,0.933333,0.933
5,0.911111,0.911
6,0.977778,0.978
7,0.911111,0.911
8,0.955556,0.956
9,0.977778,0.978


## Lista de Sensibilidade (_recall_) por rodada

In [5]:
pd.DataFrame(data=iris_recall, columns=iris_ds.target_names)

Unnamed: 0,setosa,versicolor,virginica
0,1.0,0.9375,1.0
1,1.0,0.941176,0.928571
2,1.0,0.944444,0.866667
3,1.0,1.0,1.0
4,1.0,0.882353,0.923077
5,1.0,1.0,0.75
6,1.0,0.941176,1.0
7,1.0,0.923077,0.823529
8,1.0,1.0,0.882353
9,1.0,0.933333,1.0


In [6]:
# Sensibilidade formatada para exibir 3 casas decimais
recall_formatado = [[round(setosa, 3), round(versicolor, 3), round(virginica, 3)] for setosa, versicolor, virginica in iris_recall]
pd.DataFrame(data=recall_formatado, columns=iris_ds.target_names)

Unnamed: 0,setosa,versicolor,virginica
0,1.0,0.938,1.0
1,1.0,0.941,0.929
2,1.0,0.944,0.867
3,1.0,1.0,1.0
4,1.0,0.882,0.923
5,1.0,1.0,0.75
6,1.0,0.941,1.0
7,1.0,0.923,0.824
8,1.0,1.0,0.882
9,1.0,0.933,1.0


## Calculando Média (Acurácia)

In [7]:
accuracy_media = 0
for acc in accuracy_formatada:
    accuracy_media += acc
accuracy_media /= 30

# Arredondando para 5 casas decimais
accuracy_media = round(accuracy_media, 5)
print("Acurácia média: {}".format(accuracy_media))

Acurácia média: 0.95857


## Calculando Média (Sensibilidade)

In [8]:
recall_media = [0, 0, 0]
for setosa, versicolor, virginica in recall_formatado:
    recall_media[0] += setosa
    recall_media[1] += versicolor
    recall_media[2] += virginica
recall_media = [media/30 for media in recall_media]

# Arredondando para 5 casas decimais
recall_media = [round(media, 5) for media in recall_media]
print("Sensibilidade média")
print("setosa: {setosa}".format(setosa=recall_media[0]))
print("versicolor: {versicolor}".format(versicolor=recall_media[1]))
print("virginica: {virginica}".format(virginica=recall_media[2]))

Sensibilidade média
setosa: 1.0
versicolor: 0.95647
virginica: 0.92683
