In [7]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# 1. Carregar dados
data = load_breast_cancer()
X = data.data
y = data.target

# 2. Padronizar
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [8]:
def naive_gaussian_pdf(x, mean, var, epsilon=1e-9):
    coef = 1.0 / np.sqrt(2 * np.pi * (var + epsilon))
    exponent = -0.5 * ((x - mean) ** 2) / (var + epsilon)
    return np.maximum(coef * np.exp(exponent), epsilon)

In [9]:
# 4. Dividir treino/teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# 5. Separar por classe
X0 = X_train[y_train == 0]
X1 = X_train[y_train == 1]

# 6. Calcular médias e variâncias (por atributo, para Naive Bayes)
mean0 = X0.mean(axis=0)
mean1 = X1.mean(axis=0)
var0 = X0.var(axis=0)
var1 = X1.var(axis=0)

In [None]:
# 7. Classificação com Naive Bayes
y_pred = []
for x in X_test:
    # produto das Gaussianas univariadas (log para estabilidade numérica)
    log_p0 = np.sum(np.log(naive_gaussian_pdf(x, mean0, var0)))
    log_p1 = np.sum(np.log(naive_gaussian_pdf(x, mean1, var1)))
    y_pred.append(0 if log_p0 > log_p1 else 1)

# 8. Avaliação
acc = accuracy_score(y_test, y_pred)
print("Acurácia (Naive Bayes Gaussiano):", acc)

In [None]:
# 9. Espaço das verossimilhanças - Naive Bayes (sem log)
p0_list = []
p1_list = []

for x in X:
    p0 = np.prod(naive_gaussian_pdf(x, mean0, var0))
    p1 = np.prod(naive_gaussian_pdf(x, mean1, var1))
    p0_list.append(p0)
    p1_list.append(p1)

p0_list = np.array(p0_list)
p1_list = np.array(p1_list)

scaler_lik = MinMaxScaler()
lik_scaled = scaler_lik.fit_transform(np.vstack([p0_list, p1_list]).T)
p0_scaled = lik_scaled[:, 0]
p1_scaled = lik_scaled[:, 1]

plt.figure(figsize=(8, 6))
for i in range(len(p0_scaled)):
    color = 'red' if y[i] == 0 else 'blue'
    plt.scatter(p0_scaled[i], p1_scaled[i], color=color, alpha=0.6)

min_val = min(p0_scaled.min(), p1_scaled.min())
max_val = max(p0_scaled.max(), p1_scaled.max())
plt.plot([min_val, max_val], [min_val, max_val], 'k--', label='Fronteira de decisão')
plt.xlabel("$Q_2(x|C_0)$")
plt.ylabel("$Q_1(x|C_1)$")
plt.title("Espaço das verossimilhanças - Naive Bayes")
plt.grid(True)
plt.axis('equal')
plt.legend()
plt.show()

In [None]:
# 10. Fronteira de decisão no espaço x1 × x2
X_vis = X[:, :2]
mean0_vis = mean0[:2]
mean1_vis = mean1[:2]
var0_vis = var0[:2]
var1_vis = var1[:2]

x_min, x_max = X_vis[:, 0].min() - 1, X_vis[:, 0].max() + 1
y_min, y_max = X_vis[:, 1].min() - 1, X_vis[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.05),
                     np.arange(y_min, y_max, 0.05))
grid_points = np.c_[xx.ravel(), yy.ravel()]

zz = []
for point in grid_points:
    log_p0 = np.sum(np.log(naive_gaussian_pdf(point, mean0_vis, var0_vis)))
    log_p1 = np.sum(np.log(naive_gaussian_pdf(point, mean1_vis, var1_vis)))
    zz.append(0 if log_p0 > log_p1 else 1)

zz = np.array(zz).reshape(xx.shape)

plt.figure(figsize=(8, 6))
plt.contourf(xx, yy, zz, alpha=0.3, cmap=plt.cm.coolwarm)
for label, color in zip([0, 1], ['blue', 'red']):
    plt.scatter(X_vis[y == label, 0], X_vis[y == label, 1], color=color, label=f'Classe {label}', edgecolor='k')
plt.xlabel("x1")
plt.ylabel("x2")
plt.title("Classificação Naive Bayes no espaço $x1 \\times x2$")
plt.legend()
plt.grid(True)
plt.show()