In [11]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [12]:
def print_metrics(y_test, predictions, model_name):
    # Matriz de Confusão
    cm = confusion_matrix(y_test, predictions)
    print(f"\nMatriz de Confusão ({model_name}):\n", cm)
    
    # Acurácia
    accuracy = accuracy_score(y_test, predictions)
    print(f"Acurácia ({model_name}): {accuracy:.2f}")
    
    # Precisão
    precision = precision_score(y_test, predictions, average='weighted')
    print(f"Precisão ({model_name}): {precision:.2f}")
    
    # Recall
    recall = recall_score(y_test, predictions, average='weighted')
    print(f"Recall ({model_name}): {recall:.2f}")
    
    # F1-Score
    f1 = f1_score(y_test, predictions, average='weighted')
    print(f"F1-Score ({model_name}): {f1:.2f}")

In [13]:
# Carregar o dataset Iris
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
data = pd.read_csv(url, header=None, names=columns)

# Separar características e rótulos
X = data.iloc[:, :-1].values  # características
y = data.iloc[:, -1].values    # rótulos

# Dividir em conjunto de treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
# Função para calcular a média e o desvio padrão
def summarize_dataset(X, y):
    summaries = {}
    for label in np.unique(y):
        X_label = X[y == label]
        summaries[label] = [(np.mean(column), np.std(column)) for column in zip(*X_label)]
    return summaries

In [15]:
# Função para calcular a probabilidade
def calculate_probability(x, mean, std):
    exponent = np.exp(-((x - mean) ** 2 / (2 * std ** 2)))
    return (1 / (np.sqrt(2 * np.pi) * std)) * exponent

In [16]:
# Função para fazer previsões
def predict(summaries, input_vector):
    total_rows = sum([len(summaries[label]) for label in summaries])
    probabilities = {}
    
    for label, summary in summaries.items():
        probabilities[label] = 1
        for i in range(len(summary)):
            mean, std = summary[i]
            probabilities[label] *= calculate_probability(input_vector[i], mean, std)
        probabilities[label] *= (len(summaries[label]) / total_rows)
    
    return max(probabilities, key=probabilities.get)


In [17]:
# Resumir o dataset
summaries = summarize_dataset(X_train, y_train)

# Fazer previsões
predictions = [predict(summaries, row) for row in X_test]

# Avaliar o modelo
accuracy = accuracy_score(y_test, predictions)

print(f"Acurácia: {accuracy:.2f}")


Acurácia: 1.00


In [18]:
print_metrics(y_test, predictions, "Naive Bayes")



Matriz de Confusão (Naive Bayes):
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Acurácia (Naive Bayes): 1.00
Precisão (Naive Bayes): 1.00
Recall (Naive Bayes): 1.00
F1-Score (Naive Bayes): 1.00
