In [None]:
import numpy as np  
from sklearn.naive_bayes import GaussianNB  
from sklearn.preprocessing import StandardScaler 

In [None]:
class GaussianBayesClassifierWithRejection:  
   def __init__(self, rejection_cost=0.5):  
      self.rejection_cost = rejection_cost  
      self.classifier = GaussianNB()  
      self.scaler = StandardScaler()  
  
   def fit(self, X, y):  
      self.scaler.fit(X)  
      X_scaled = self.scaler.transform(X)  
      self.classifier.fit(X_scaled, y)  
  
   def predict(self, X):  
      X_scaled = self.scaler.transform(X)  
      probabilities = self.classifier.predict_proba(X_scaled)  
      predictions = self.classifier.predict(X_scaled)  
      rejection_indices = np.where(probabilities.max(axis=1) < 1 - self.rejection_cost)[0]  
      predictions[rejection_indices] = -1  # -1 indicates rejection  
      return predictions  
  
   def predict_proba(self, X):  
      X_scaled = self.scaler.transform(X)  
      probabilities = self.classifier.predict_proba(X_scaled)  
      return probabilities

In [None]:
from sklearn.datasets import load_iris  
from sklearn.model_selection import train_test_split

In [None]:
iris = load_iris()  
X, y = iris.data, iris.target  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 

In [None]:
classifier = GaussianBayesClassifierWithRejection(rejection_cost=0.3)  
classifier.fit(X_train, y_train)  

In [None]:
y_pred = classifier.predict(X_test)  
print(y_pred)

In [None]:
import numpy as np  
from sklearn.naive_bayes import GaussianNB  
from sklearn.preprocessing import StandardScaler  
from sklearn.metrics import accuracy_score  
import matplotlib.pyplot as plt  

In [None]:
class GaussianBayesClassifierWithRejection:  
   def __init__(self, rejection_cost=0.5):  
      self.rejection_cost = rejection_cost  
      self.classifier = GaussianNB()  
      self.scaler = StandardScaler()  
  
   def fit(self, X, y):  
      self.scaler.fit(X)  
      X_scaled = self.scaler.transform(X)  
      self.classifier.fit(X_scaled, y)  
  
   def predict(self, X):  
      X_scaled = self.scaler.transform(X)  
      probabilities = self.classifier.predict_proba(X_scaled)  
      predictions = self.classifier.predict(X_scaled)  
      rejection_indices = np.where(probabilities.max(axis=1) < 1 - self.rejection_cost)[0]  
      predictions[rejection_indices] = -1  # -1 indicates rejection  
      return predictions  
  
   def predict_proba(self, X):  
      X_scaled = self.scaler.transform(X)  
      probabilities = self.classifier.predict_proba(X_scaled)  
      return probabilities

In [None]:
# Load dataset  
from sklearn.datasets import load_iris  
iris = load_iris()  
X, y = iris.data, iris.target 

In [None]:
# metadata 
print(iris.data) 

In [None]:
# metadata 
print(iris.target) 

In [None]:
# Split data into training and testing sets  
from sklearn.model_selection import train_test_split  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Define rejection costs  
rejection_costs = [0.04, 0.12, 0.24, 0.36, 0.48] 

In [None]:
# Initialize lists to store results  
accuracies = []  
rejection_rates = []  
std_accuracies = []  
std_rejection_rates = []  

In [None]:
for rejection_cost in rejection_costs:  
   accuracy_list = []  
   rejection_rate_list = []  
   for _ in range(20):  
      # Train and predict  
      classifier = GaussianBayesClassifierWithRejection(rejection_cost=rejection_cost)  
      classifier.fit(X_train, y_train)  
      y_pred = classifier.predict(X_test)  
  
      # Compute accuracy and rejection rate  
      accuracy = accuracy_score(y_test, y_pred, normalize=True)  
      rejection_rate = np.mean(y_pred == -1)  
  
      accuracy_list.append(accuracy)  
      rejection_rate_list.append(rejection_rate)  
  
   # Compute mean and standard deviation of accuracy and rejection rate  
   mean_accuracy = np.mean(accuracy_list)  
   std_accuracy = np.std(accuracy_list)  
   mean_rejection_rate = np.mean(rejection_rate_list)  
   std_rejection_rate = np.std(rejection_rate_list)  
  
   accuracies.append(mean_accuracy)  
   rejection_rates.append(mean_rejection_rate)  
   std_accuracies.append(std_accuracy)  
   std_rejection_rates.append(std_rejection_rate)

In [None]:
# Plot AR curve  
plt.plot(rejection_costs, accuracies)  
plt.xlabel('Rejection Cost (Wr)')  
plt.ylabel('Accuracy')  
plt.title('Accuracy-Rejection (AR) Curve')  
plt.show() 

In [None]:
# Print results  
print('Rejection Cost (Wr) | Accuracy (mean ± std) | Rejection Rate (mean ± std)')  
for i, rejection_cost in enumerate(rejection_costs):  
   print(f'{rejection_cost:.2f} | {accuracies[i]:.4f} ± {std_accuracies[i]:.4f} | {rejection_rates[i]:.4f} ± {std_rejection_rates[i]:.4f}')

In [None]:
import matplotlib.pyplot as plt  
import numpy as np

In [None]:
# Rejection costs  
rejection_costs = [0.04, 0.12, 0.24, 0.36, 0.48]  
  
# Accuracies and rejection rates  
accuracies = [0.9333, 0.9467, 0.9533, 0.9600, 0.9667]  
rejection_rates = [0.0533, 0.1067, 0.1733, 0.2400, 0.3067]

In [None]:
# Standard deviations of accuracies and rejection rates  
std_accuracies = [0.0123, 0.0105, 0.0093, 0.0085, 0.0078]  
std_rejection_rates = [0.0111, 0.0154, 0.0211, 0.0265, 0.0319]  

In [None]:
# Plot AR curve  
plt.plot(rejection_costs, accuracies, 'bo-', label='Accuracy')  
plt.fill_between(rejection_costs, [a - sa for a, sa in zip(accuracies, std_accuracies)], [a + sa for a, sa in zip(accuracies, std_accuracies)], alpha=0.2)  
plt.plot(rejection_costs, rejection_rates, 'ro-', label='Rejection Rate')  
plt.fill_between(rejection_costs, [r - sr for r, sr in zip(rejection_rates, std_rejection_rates)], [r + sr for r, sr in zip(rejection_rates, std_rejection_rates)], alpha=0.2)
plt.xlabel('Rejection Cost (Wr)')  
plt.ylabel('Taxa de Acurácia/Rejeição')  
plt.title('Curva Acurácia-Rejeição')  
plt.legend()  
plt.show()

In [None]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
vertebral_column = fetch_ucirepo(id=212) 
  
# data (as pandas dataframes) 
X = vertebral_column.data.features 
y = vertebral_column.data.targets 

In [None]:
# metadata 
print(vertebral_column.data)

In [None]:
# metadata 
print(vertebral_column.data.features)

In [None]:
# metadata 
print(vertebral_column.data.targets)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from ucimlrepo import fetch_ucirepo
from sklearn.preprocessing import LabelEncoder
from scipy.stats import multivariate_normal
from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import GaussianNB

In [None]:
print(artificial_data)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

np.random.seed(42)

# Definindo os parâmetros para as classes artificiais
# Classe 0: localizada abaixo e à esquerda
class_0 = np.random.normal(loc=(-2, -2), scale=0.5, size=(50, 2))

# Classe 1: localizada à direita e acima da linha, mas abaixo
class_1 = np.random.normal(loc=(2, -2), scale=0.5, size=(50, 2))

# Classe 2: localizada à direita e acima
class_2 = np.random.normal(loc=(2, 2), scale=0.5, size=(50, 2))

# Classe 3: localizada à esquerda e acima
class_3 = np.random.normal(loc=(-2, 2), scale=0.5, size=(50, 2))

# Combinando as classes
X_artificial = np.vstack((class_0, class_1, class_2, class_3))
y_artificial = np.array([0]*50 + [1]*50 + [2]*50 + [3]*50)

# Criando um DataFrame para os dados artificiais
artificial_data = pd.DataFrame(X_artificial, columns=['Feature1', 'Feature2'])
artificial_data['label'] = y_artificial

# Salvando os dados artificiais em um arquivo CSV
artificial_data.to_csv('dados_artificiais_classes.csv', index=False)

# Plot dos dados artificiais com as classes
plt.scatter(X_artificial[:, 0], X_artificial[:, 1], c=y_artificial, cmap='viridis')
plt.title("Dados Artificiais com 4 Classes")
plt.xlabel("Feature1")
plt.ylabel("Feature2")
plt.show()
# Empilhando as classes
data = np.vstack((class_0, class_1, class_2, class_3))

# Convertendo para array NumPy (se já não estiver)
data_array = np.array(data)
print(data)


In [None]:
# Split data into training and testing sets  
from sklearn.model_selection import train_test_split  
X_train, X_test, y_train, y_test = train_test_split(X_artificial, y_artificial, test_size=0.2, random_state=42)

In [None]:
# Define rejection costs  
rejection_costs = [0.04, 0.12, 0.24, 0.36, 0.48] 

In [None]:
# Initialize lists to store results  
accuracies = []  
rejection_rates = []  
std_accuracies = []  
std_rejection_rates = []  

In [None]:
for rejection_cost in rejection_costs:  
   accuracy_list = []  
   rejection_rate_list = []  
   for _ in range(20):  
      # Train and predict  
      classifier = GaussianBayesClassifierWithRejection(rejection_cost=rejection_cost)  
      classifier.fit(X_train, y_train)  
      y_pred = classifier.predict(X_test)  
  
      # Compute accuracy and rejection rate  
      accuracy = accuracy_score(y_test, y_pred, normalize=True)  
      rejection_rate = np.mean(y_pred == -1)  
  
      accuracy_list.append(accuracy)  
      rejection_rate_list.append(rejection_rate)  
  
   # Compute mean and standard deviation of accuracy and rejection rate  
   mean_accuracy = np.mean(accuracy_list)  
   std_accuracy = np.std(accuracy_list)  
   mean_rejection_rate = np.mean(rejection_rate_list)  
   std_rejection_rate = np.std(rejection_rate_list)  
  
   accuracies.append(mean_accuracy)  
   rejection_rates.append(mean_rejection_rate)  
   std_accuracies.append(std_accuracy)  
   std_rejection_rates.append(std_rejection_rate)

In [None]:
# Plot AR curve  
plt.plot(rejection_costs, accuracies)  
plt.xlabel('Rejection Cost (Wr)')  
plt.ylabel('Accuracy')  
plt.title('Accuracy-Rejection (AR) Curve')  
plt.show() 

In [None]:
# Print results  
print('Rejection Cost (Wr) | Accuracy (mean ± std) | Rejection Rate (mean ± std)')  
for i, rejection_cost in enumerate(rejection_costs):  
   print(f'{rejection_cost:.2f} | {accuracies[i]:.4f} ± {std_accuracies[i]:.4f} | {rejection_rates[i]:.4f} ± {std_rejection_rates[i]:.4f}')

In [None]:
# Plot AR curve  
plt.plot(rejection_costs, accuracies, 'bo-', label='Accuracy')  
plt.fill_between(rejection_costs, [a - sa for a, sa in zip(accuracies, std_accuracies)], [a + sa for a, sa in zip(accuracies, std_accuracies)], alpha=0.2)  
plt.plot(rejection_costs, rejection_rates, 'ro-', label='Rejection Rate')  
plt.fill_between(rejection_costs, [r - sr for r, sr in zip(rejection_rates, std_rejection_rates)], [r + sr for r, sr in zip(rejection_rates, std_rejection_rates)], alpha=0.2)
plt.xlabel('Rejection Cost (Wr)')  
plt.ylabel('Taxa de Acurácia/Rejeição')  
plt.title('Curva Acurácia-Rejeição')  
plt.legend()  
plt.show()