In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load dataset
cancer_df = pd.read_csv('Breast_Cancer.csv')

# Drop unnecessary columns
cancer_df.drop(['id', 'Unnamed: 32'], axis=1, inplace=True)

# Separate features and target
X = cancer_df.drop('diagnosis', axis=1)
y = cancer_df['diagnosis']

# Keep only numeric columns
X_numeric = X.select_dtypes(include=['number'])

# Train and evaluate (no imputation)
for ratio in [0.2, 0.3, 0.4]:
    X_train, X_test, y_train, y_test = train_test_split(X_numeric, y, test_size=ratio, random_state=42)
    model = GaussianNB()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print(f"\n🔹Train-Test Split: {int((1-ratio)*100)}-{int(ratio*100)}")
    print("Accuracy:", round(accuracy_score(y_test, y_pred), 4))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))



🔹Train-Test Split: 80-20
Accuracy: 0.9737
Confusion Matrix:
 [[71  0]
 [ 3 40]]
Classification Report:
               precision    recall  f1-score   support

           B       0.96      1.00      0.98        71
           M       1.00      0.93      0.96        43

    accuracy                           0.97       114
   macro avg       0.98      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114


🔹Train-Test Split: 70-30
Accuracy: 0.9415
Confusion Matrix:
 [[104   4]
 [  6  57]]
Classification Report:
               precision    recall  f1-score   support

           B       0.95      0.96      0.95       108
           M       0.93      0.90      0.92        63

    accuracy                           0.94       171
   macro avg       0.94      0.93      0.94       171
weighted avg       0.94      0.94      0.94       171


🔹Train-Test Split: 60-40
Accuracy: 0.9518
Confusion Matrix:
 [[144   4]
 [  7  73]]
Classification Report:
               precision