## **Library import**

In [9]:
import pandas as pd
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import MinMaxScaler

## **Loading CSV File**

In [2]:
df = pd.read_csv("breast-cancer-wisconsin.csv")
df.shape

(699, 11)

## **Dropping missing values row if any**

In [3]:
df.replace("?",np.nan,inplace=True)
df.dropna(inplace=True)
df.shape

(683, 11)

## **Extracting Class Column and dropping it from data frame**

In [4]:
target = df["Class"]
df.drop(columns=["Class", "Sample"], inplace=True)
df

Unnamed: 0,F1,F2,F3,F4,F5,F6,F7,F8,F9
0,5,1,1,1,2,1,3,1,1
1,5,4,4,5,7,10,3,2,1
2,3,1,1,1,2,2,3,1,1
3,6,8,8,1,3,4,3,7,1
4,4,1,1,3,2,1,3,1,1
...,...,...,...,...,...,...,...,...,...
694,3,1,1,1,3,2,1,1,1
695,2,1,1,1,2,1,1,1,1
696,5,10,10,3,7,3,8,10,2
697,4,8,6,4,3,4,10,6,1


## **Scaling Data using Min-Max Scaler**

In [5]:
scaler = MinMaxScaler()
df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
df

Unnamed: 0,F1,F2,F3,F4,F5,F6,F7,F8,F9
0,0.444444,0.000000,0.000000,0.000000,0.111111,0.000000,0.222222,0.000000,0.000000
1,0.444444,0.333333,0.333333,0.444444,0.666667,1.000000,0.222222,0.111111,0.000000
2,0.222222,0.000000,0.000000,0.000000,0.111111,0.111111,0.222222,0.000000,0.000000
3,0.555556,0.777778,0.777778,0.000000,0.222222,0.333333,0.222222,0.666667,0.000000
4,0.333333,0.000000,0.000000,0.222222,0.111111,0.000000,0.222222,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...
678,0.222222,0.000000,0.000000,0.000000,0.222222,0.111111,0.000000,0.000000,0.000000
679,0.111111,0.000000,0.000000,0.000000,0.111111,0.000000,0.000000,0.000000,0.000000
680,0.444444,1.000000,1.000000,0.222222,0.666667,0.222222,0.777778,1.000000,0.111111
681,0.333333,0.777778,0.555556,0.333333,0.222222,0.333333,1.000000,0.555556,0.000000


## **Dividing the data into train (70%) and test (30%) split**

In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(
    df, target, test_size=0.3, random_state=4, shuffle=True
)

## **Initializing Naiyes Bayes Classifier**

In [12]:
nb = GaussianNB()
nb.fit(X_train, Y_train)
Y_pred = nb.predict(X_test)
print(f"Model Accuracy: {accuracy_score(Y_test, Y_pred)}")
print(f"Confusion Matrix:\n {confusion_matrix(Y_test, Y_pred)}\n")
print(f"Classification Report:\n {classification_report(Y_test, Y_pred)}\n")

Model Accuracy: 0.9219512195121952
Confusion Matrix:
 [[118  14]
 [  2  71]]

Classification Report:
               precision    recall  f1-score   support

           2       0.98      0.89      0.94       132
           4       0.84      0.97      0.90        73

    accuracy                           0.92       205
   macro avg       0.91      0.93      0.92       205
weighted avg       0.93      0.92      0.92       205


