In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, roc_curve, auc
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import LabelEncoder

In [None]:
df = pd.read_csv("data/agaricus-lepiota.data", header=None)
df.head()

In [None]:
X = df.drop(columns=[0])
X

In [None]:
for col in X.columns: 
    X[col] = LabelEncoder().fit_transform(X[col])
X

In [None]:
y = df[0]
y = LabelEncoder().fit_transform(y)
y

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)
nb = GaussianNB()
nb.fit(X_train, y_train)

In [None]:
y_pred = nb.predict(X_test)
print(f"Accuracy : {accuracy_score(y_test,y_pred)}")

In [None]:
report = classification_report(y_test,y_pred)
print(report)

In [None]:
cm = confusion_matrix(y_test,y_pred)
ConfusionMatrixDisplay(cm).plot()

In [None]:
y_pred_proba = nb.predict_proba(X_test)[:,1]
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba) 
roc_auc = auc(fpr, tpr)

plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], 'k--', label='No Skill')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for Breast Cancer Classification')
plt.legend()
plt.show()