In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load Diabetes dataset
diabetes_df = pd.read_csv('diabetes.csv')
diabetes_df.fillna(diabetes_df.mean(numeric_only=True), inplace=True)
X = diabetes_df.drop('Outcome', axis=1)
y = diabetes_df['Outcome']

# Train and evaluate
for ratio in [0.2, 0.3, 0.4]:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=ratio, random_state=42)
    model = GaussianNB()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print(f"\n🔹Train-Test Split: {int((1-ratio)*100)}-{int(ratio*100)}")
    print("Accuracy:", round(accuracy_score(y_test, y_pred), 4))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))



🔹Train-Test Split: 80-20
Accuracy: 0.7662
Confusion Matrix:
 [[79 20]
 [16 39]]
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.80      0.81        99
           1       0.66      0.71      0.68        55

    accuracy                           0.77       154
   macro avg       0.75      0.75      0.75       154
weighted avg       0.77      0.77      0.77       154


🔹Train-Test Split: 70-30
Accuracy: 0.7446
Confusion Matrix:
 [[119  32]
 [ 27  53]]
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.79      0.80       151
           1       0.62      0.66      0.64        80

    accuracy                           0.74       231
   macro avg       0.72      0.73      0.72       231
weighted avg       0.75      0.74      0.75       231


🔹Train-Test Split: 60-40
Accuracy: 0.7597
Confusion Matrix:
 [[166  40]
 [ 34  68]]
Classification Report:
               precision