# KNN Test

Testando 30 rodadas com algoritmo KNN no **Iris dataset** utilizando a biblioteca `scikit-learn`.

## Importando Bibliotecas

In [1]:
import pandas as pd

from random import randint
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, recall_score

## Carregando _Digits DataSet_

In [2]:
iris_ds = load_iris()
iris_data, iris_target = load_iris(return_X_y=True)

## Rodadas de Teste

In [3]:
k_vizinhos = int(len(iris_data) ** 0.5)

iris_accuracy = []
iris_recall = []

for rodada in range(30):
    # Separando dados
    semente = randint(0, 8001)
    data_train, data_test, target_train, target_test = train_test_split(iris_data, iris_target, test_size=0.3, random_state=semente)

    # Instanciando classificador
    classifier = KNeighborsClassifier(n_neighbors=k_vizinhos)

    # Treinando modelo
    classifier.fit(data_train, target_train)

    # Realizando teste de classificação
    prediction = classifier.predict(data_test)

    # Resultados
    iris_accuracy.append(accuracy_score(target_test, prediction))

    iris_recall.append(recall_score(target_test, prediction, average=None))

    print("\n- RODADA {turn} - Semente {seed} -\n".format(turn=rodada, seed=semente))
    print(classification_report(target_test, prediction, target_names=iris_ds.target_names))
    print(confusion_matrix(target_test, prediction))



- RODADA 0 - Semente 4186 -

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        15
  versicolor       1.00      0.87      0.93        15
   virginica       0.88      1.00      0.94        15

    accuracy                           0.96        45
   macro avg       0.96      0.96      0.96        45
weighted avg       0.96      0.96      0.96        45

[[15  0  0]
 [ 0 13  2]
 [ 0  0 15]]

- RODADA 1 - Semente 1205 -

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        13
  versicolor       0.94      0.94      0.94        18
   virginica       0.93      0.93      0.93        14

    accuracy                           0.96        45
   macro avg       0.96      0.96      0.96        45
weighted avg       0.96      0.96      0.96        45

[[13  0  0]
 [ 0 17  1]
 [ 0  1 13]]

- RODADA 2 - Semente 7594 -

              precision    recall  f1-score   support

      setosa       

## Lista de Acurácia (_accuracy_) por rodada

In [4]:
# Acurácia formatada para exibir 3 casas decimais
accuracy_formatada = [round(acc, 3) for acc in iris_accuracy]

accuracy_df = pd.DataFrame(data=iris_accuracy, columns=['Acurácia'])
accuracy_df['Acurácia formatada'] = accuracy_formatada
accuracy_df

Unnamed: 0,Acurácia,Acurácia formatada
0,0.955556,0.956
1,0.955556,0.956
2,0.955556,0.956
3,0.955556,0.956
4,0.955556,0.956
5,0.955556,0.956
6,0.955556,0.956
7,1.0,1.0
8,0.955556,0.956
9,0.977778,0.978


## Lista de Sensibilidade (_recall_) por rodada

In [5]:
pd.DataFrame(data=iris_recall, columns=iris_ds.target_names)

Unnamed: 0,setosa,versicolor,virginica
0,1.0,0.866667,1.0
1,1.0,0.944444,0.928571
2,1.0,0.866667,1.0
3,1.0,0.875,1.0
4,1.0,0.9375,0.9375
5,1.0,0.888889,1.0
6,1.0,1.0,0.875
7,1.0,1.0,1.0
8,1.0,1.0,0.894737
9,1.0,1.0,0.928571


In [6]:
# Sensibilidade formatada para exibir 3 casas decimais
recall_formatado = [[round(setosa, 3), round(versicolor, 3), round(virginica, 3)] for setosa, versicolor, virginica in iris_recall]
pd.DataFrame(data=recall_formatado, columns=iris_ds.target_names)

Unnamed: 0,setosa,versicolor,virginica
0,1.0,0.867,1.0
1,1.0,0.944,0.929
2,1.0,0.867,1.0
3,1.0,0.875,1.0
4,1.0,0.938,0.938
5,1.0,0.889,1.0
6,1.0,1.0,0.875
7,1.0,1.0,1.0
8,1.0,1.0,0.895
9,1.0,1.0,0.929


## Calculando Média (Acurácia)

In [7]:
accuracy_media = 0

for acc in accuracy_formatada:
    accuracy_media += acc
    
accuracy_media /= 30

# Arredondando para 5 casas decimais
print("Acurácia média: {media}".format(media=round(accuracy_media, 5)))

Acurácia média: 0.9624


## Calculando Desvio Padrão (Acurácia)

In [8]:
accuracy_distancia = 0

for amostra in accuracy_formatada:
    accuracy_distancia += (amostra - accuracy_media) ** 2

accuracy_DP = (accuracy_distancia / len(accuracy_formatada)) ** 0.5

print("Desvio padrâo da Acurácia: {dp}".format(dp=round(accuracy_DP, 5)))

Desvio padrâo da Acurácia: 0.0245


## Calculando Média (Sensibilidade)

In [9]:
recall_media = [0, 0, 0]

for setosa, versicolor, virginica in recall_formatado:
    recall_media[0] += setosa
    recall_media[1] += versicolor
    recall_media[2] += virginica

accuracy_media = round(accuracy_media, 5)
recall_media = [media/30 for media in recall_media]

# Arredondando para 5 casas decimais
print("Sensibilidade média")

print("setosa: {setosa}".format(setosa=round(recall_media[0], 5)))

print("versicolor: {versicolor}".format(versicolor=round(recall_media[1], 5)))

print("virginica: {virginica}".format(virginica=round(recall_media[2], 5)))

Sensibilidade média
setosa: 1.0
versicolor: 0.95093
virginica: 0.9368


## Calculando Desvio Padrão (Sensibilidade)

In [10]:
recall_distancia = [0, 0, 0]

for Asetosa, Aversicolor, Avirginica in recall_formatado:
    recall_distancia[0] += (Asetosa - recall_media[0]) ** 2
    recall_distancia[1] += (Aversicolor - recall_media[1]) ** 2
    recall_distancia[2] += (Avirginica - recall_media[2]) ** 2

recall_DP = [(distancia / len(recall_formatado)) ** 0.5 for distancia in recall_distancia]

# Arredondando para 5 casas decimais
print("Desvio padrâo da Sensibilidade")

print("Setosa: {setosa}".format(setosa=round(recall_DP[0], 5)))

print("Versicolor: {versicolor}".format(versicolor=round(recall_DP[1], 5)))

print("Virginica: {virginica}".format(virginica=round(recall_DP[2], 5)))

Desvio padrâo da Sensibilidade
Setosa: 0.0
Versicolor: 0.05679
Virginica: 0.06246
