In [119]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import CategoricalNB
from sklearn.naive_bayes import ComplementNB
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

In [121]:
# Load dataset
dataset = pd.read_csv("./CKD.csv")

In [123]:
# Separate independent and dependent variables
independent = dataset.drop("classification", axis=1)
dependent = dataset["classification"]

In [125]:
# convert categorical data into binary format(one-hot encoding)
independent = pd.get_dummies(independent, drop_first=True)

In [127]:
# Dependent variable is categorical (e.g., 'yes'/'no'), encode it to numerical values
label_encoder = LabelEncoder()
dependent = label_encoder.fit_transform(dependent)

In [130]:
x_train, x_test, y_train, y_test = train_test_split(independent, dependent, test_size=1/3, random_state=0)

In [132]:
# using OneHotEncoder for categorical features
encoder = OneHotEncoder(handle_unknown='ignore')
x_train = encoder.fit_transform(x_train)
x_test = encoder.transform(x_test)


In [134]:
# MultinomialNB
classifier = MultinomialNB()
classifier.fit(x_train, y_train)
y_predict = classifier.predict(x_test)
confusion_matrix(y_test, y_predict)
report = classification_report(y_test, y_predict, zero_division=0)
print(report)

              precision    recall  f1-score   support

           0       0.98      0.98      0.98        51
           1       0.99      0.99      0.99        82

    accuracy                           0.98       133
   macro avg       0.98      0.98      0.98       133
weighted avg       0.98      0.98      0.98       133



In [136]:
param_grid = {
    'alpha': [0.1, 0.5, 1.0, 2.0, 10.0],
    'fit_prior': [True, False]
}

classifier = GridSearchCV(MultinomialNB(), param_grid, refit=True, verbose=3, n_jobs=-1, scoring='f1_weighted')
classifier.fit(x_train, y_train)

best_params = classifier.best_params_
print("Best Parameters:", best_params)

y_predict = classifier.predict(x_test)
confusion_matrix(y_test, y_predict)
report = classification_report(y_test, y_predict, zero_division=0)
print(report)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best Parameters: {'alpha': 0.1, 'fit_prior': True}
              precision    recall  f1-score   support

           0       0.98      1.00      0.99        51
           1       1.00      0.99      0.99        82

    accuracy                           0.99       133
   macro avg       0.99      0.99      0.99       133
weighted avg       0.99      0.99      0.99       133



In [139]:
classifier = BernoulliNB()
classifier.fit(x_train, y_train)
y_predict = classifier.predict(x_test)
confusion_matrix(y_test, y_predict)
report = classification_report(y_test, y_predict, zero_division=0)
print(report)

              precision    recall  f1-score   support

           0       0.96      1.00      0.98        51
           1       1.00      0.98      0.99        82

    accuracy                           0.98       133
   macro avg       0.98      0.99      0.98       133
weighted avg       0.99      0.98      0.99       133



In [141]:
param_grid = {
    'alpha': [0.1, 0.5, 1.0, 2.0, 10.0],
    'fit_prior': [True, False]
}

classifier = GridSearchCV(BernoulliNB(), param_grid, refit=True, verbose=3, n_jobs=-1, scoring='f1_weighted')
classifier.fit(x_train, y_train)

best_params = classifier.best_params_
print("Best Parameters:", best_params)

y_predict = classifier.predict(x_test)
confusion_matrix(y_test, y_predict)
report = classification_report(y_test, y_predict, zero_division=0)
print(report)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best Parameters: {'alpha': 0.5, 'fit_prior': False}
              precision    recall  f1-score   support

           0       0.96      1.00      0.98        51
           1       1.00      0.98      0.99        82

    accuracy                           0.98       133
   macro avg       0.98      0.99      0.98       133
weighted avg       0.99      0.98      0.99       133



In [152]:
cnb = CategoricalNB()
cnb.fit(x_train.toarray(), y_train)
y_predict = cnb.predict(x_test.toarray())
confusion_matrix(y_test, y_predict)
report = classification_report(y_test, y_predict, zero_division = 0)
print(report)

              precision    recall  f1-score   support

           0       0.96      1.00      0.98        51
           1       1.00      0.98      0.99        82

    accuracy                           0.98       133
   macro avg       0.98      0.99      0.98       133
weighted avg       0.99      0.98      0.99       133



In [158]:
param_grid = {
    'alpha': [0.1, 0.5, 1.0, 2.0, 10.0],
    'fit_prior': [True, False]
}

classifier = GridSearchCV(CategoricalNB(), param_grid, refit=True, verbose=3, n_jobs=-1, scoring='f1_weighted')
classifier.fit(x_train.toarray(), y_train)

best_params = classifier.best_params_
print("Best Parameters:", best_params)

y_predict = classifier.predict(x_test.toarray())
confusion_matrix(y_test, y_predict)
report = classification_report(y_test, y_predict, zero_division=0)
print(report)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 971, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 279, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 371, in _score
    y_pred = method_caller(
             ^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/metrics/_scorer.py", line 89, in _cached_call
    result, _ = _get_response_values(
                ^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/sklearn/utils/_response.py", line 211, in _get_response_values

Best Parameters: {'alpha': 0.1, 'fit_prior': True}
              precision    recall  f1-score   support

           0       0.98      1.00      0.99        51
           1       1.00      0.99      0.99        82

    accuracy                           0.99       133
   macro avg       0.99      0.99      0.99       133
weighted avg       0.99      0.99      0.99       133

