In [7]:
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [8]:
def print_metrics(y_test, predictions, model_name):
    # Matriz de Confusão
    cm = confusion_matrix(y_test, predictions)
    print(f"\nMatriz de Confusão ({model_name}):\n", cm)
    
    # Acurácia
    accuracy = accuracy_score(y_test, predictions)
    print(f"Acurácia ({model_name}): {accuracy:.2f}")
    
    # Precisão
    precision = precision_score(y_test, predictions, average='weighted')
    print(f"Precisão ({model_name}): {precision:.2f}")
    
    # Recall
    recall = recall_score(y_test, predictions, average='weighted')
    print(f"Recall ({model_name}): {recall:.2f}")
    
    # F1-Score
    f1 = f1_score(y_test, predictions, average='weighted')
    print(f"F1-Score ({model_name}): {f1:.2f}")

In [9]:
# Carregar o dataset Iris
def load_iris_dataset():
    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
    columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
    iris = pd.read_csv(url, header=None, names=columns)
    return iris

In [10]:
# Função para calcular a distância euclidiana
def euclidean_distance(row1, row2):
    return np.sqrt(np.sum((row1 - row2) ** 2))

In [11]:

# Implementação do KNN
def knn(train_data, test_data, k):
    predictions = []
    
    # Para cada amostra no conjunto de teste
    for test_row in test_data:
        distances = []
        
        # Calcular a distância de cada ponto do conjunto de treino
        for train_row in train_data:
            distance = euclidean_distance(test_row[:-1], train_row[:-1])
            distances.append((train_row, distance))
        
        # Ordenar por distância (os mais próximos vêm primeiro)
        distances.sort(key=lambda x: x[1])
        
        # Selecionar os 'k' mais próximos
        neighbors = [dist[0][-1] for dist in distances[:k]]
        
        # Contar a frequência das classes dos vizinhos e escolher a mais comum
        prediction = Counter(neighbors).most_common(1)[0][0]
        predictions.append(prediction)
    
    return predictions

In [12]:
# Função para calcular a acurácia
def accuracy(y_true, y_pred):
    return np.sum(y_true == y_pred) / len(y_true)

In [13]:
# Main
if __name__ == "__main__":
    # Carregar o dataset
    iris_data = load_iris_dataset()
    
    # Codificar as classes em valores numéricos
    iris_data['species'] = iris_data['species'].astype('category').cat.codes

    # Dividir o dataset em features e labels
    X = iris_data.iloc[:, :-1].values
    y = iris_data.iloc[:, -1].values
    
    # Dividir em treino e teste
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Concatenar as labels de treino com os dados para facilitar o processamento no algoritmo
    train_data = np.column_stack((X_train, y_train))
    test_data = np.column_stack((X_test, y_test))

    # Definir o valor de k
    k = 3
    
    # Fazer previsões
    predictions = knn(train_data, test_data, k)
    
    # Calcular a acurácia
    acc = accuracy(y_test, predictions)
    print(f"Acurácia do KNN: {acc * 100:.2f}%")


Acurácia do KNN: 100.00%


In [14]:
print_metrics(y_test, predictions, "KNN")



Matriz de Confusão (KNN):
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Acurácia (KNN): 1.00
Precisão (KNN): 1.00
Recall (KNN): 1.00
F1-Score (KNN): 1.00
