<a href="https://colab.research.google.com/github/triquang26/ml-course-01/blob/main/src/models/bayesian/Bayesian_Models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
!pip install numpy medmnist scikit-learn matplotlib



In [12]:
import numpy as np
from medmnist import INFO
import medmnist
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt

# For inline plotting in Colab
%matplotlib inline

In [13]:
# Load dataset information
dataset_info = INFO["pneumoniamnist"]
print(f"Dataset description: {dataset_info['description']}")
print(f"Number of classes: {len(dataset_info['label'])}, Labels: {dataset_info['label']}")

# Retrieve the dataset class using the info dictionary
DataClass = getattr(medmnist, dataset_info['python_class'])

# Load training and test sets
train_dataset = DataClass(split='train', download=True)
test_dataset  = DataClass(split='test', download=True)

# Extract images and labels
X_train = train_dataset.imgs   # shape (4708, 28, 28)
y_train = train_dataset.labels # shape (4708, ) or (4708, 1)
X_test  = test_dataset.imgs    # shape (624, 28, 28)
y_test  = test_dataset.labels  # shape (624, ) or (624, 1)

print("Training data shape:", X_train.shape, "Training labels shape:", y_train.shape)
print("Test data shape:", X_test.shape, "Test labels shape:", y_test.shape)


Dataset description: The PneumoniaMNIST is based on a prior dataset of 5,856 pediatric chest X-Ray images. The task is binary-class classification of pneumonia against normal. We split the source training set with a ratio of 9:1 into training and validation set and use its source validation set as the test set. The source images are gray-scale, and their sizes are (384−2,916)×(127−2,713). We center-crop the images and resize them into 1×28×28.
Number of classes: 2, Labels: {'0': 'normal', '1': 'pneumonia'}
Using downloaded and verified file: /root/.medmnist/pneumoniamnist.npz
Using downloaded and verified file: /root/.medmnist/pneumoniamnist.npz
Training data shape: (4708, 28, 28) Training labels shape: (4708, 1)
Test data shape: (624, 28, 28) Test labels shape: (624, 1)


In [14]:
# Preprocessing for Gaussian NB: flatten images and normalize pixel values to [0,1]
X_train_gaussian = X_train.reshape(X_train.shape[0], -1).astype(np.float32)
X_test_gaussian  = X_test.reshape(X_test.shape[0], -1).astype(np.float32)
X_train_gaussian /= 255.0
X_test_gaussian  /= 255.0

# Preprocessing for Bernoulli NB: binarize the normalized data (threshold at 0.5)
X_train_bernoulli = (X_train_gaussian > 0.5).astype(np.int32)
X_test_bernoulli  = (X_test_gaussian > 0.5).astype(np.int32)

# Preprocessing for Multinomial NB: use original integer pixel values (0 to 255)
X_train_multinomial = X_train.reshape(X_train.shape[0], -1).astype(np.int32)
X_test_multinomial  = X_test.reshape(X_test.shape[0], -1).astype(np.int32)

# Ensure labels are 1D arrays
y_train = y_train.reshape(-1)
y_test  = y_test.reshape(-1)


In [16]:
# Train each model in one pass using the entire training data

# Gaussian Naive Bayes: using normalized continuous features
gnb = GaussianNB()
gnb.fit(X_train_gaussian, y_train)

# Bernoulli Naive Bayes: using binarized features
bnb = BernoulliNB()
bnb.fit(X_train_bernoulli, y_train)

# Multinomial Naive Bayes: using original integer pixel values
mnb = MultinomialNB()
mnb.fit(X_train_multinomial, y_train)

# Define evaluation function (unchanged)
def evaluate_model(model, X_test, y_test, model_name):
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    cr = classification_report(y_test, y_pred, target_names=['Normal', 'Pneumonia'])
    print(f"\n{model_name} Results on Test Data:")
    print(f"Accuracy: {acc:.4f}")
    print("Confusion Matrix:")
    print(cm)
    print("\nClassification Report:")
    print(cr)
    return y_pred, acc, cm, cr

# Evaluate each model on the test data
evaluate_model(gnb, X_test_gaussian, y_test, "Gaussian NB")
evaluate_model(bnb, X_test_bernoulli, y_test, "Bernoulli NB")
evaluate_model(mnb, X_test_multinomial, y_test, "Multinomial NB")




Gaussian NB Results on Test Data:
Accuracy: 0.8333
Confusion Matrix:
[[171  63]
 [ 41 349]]

Classification Report:
              precision    recall  f1-score   support

      Normal       0.81      0.73      0.77       234
   Pneumonia       0.85      0.89      0.87       390

    accuracy                           0.83       624
   macro avg       0.83      0.81      0.82       624
weighted avg       0.83      0.83      0.83       624


Bernoulli NB Results on Test Data:
Accuracy: 0.8013
Confusion Matrix:
[[175  59]
 [ 65 325]]

Classification Report:
              precision    recall  f1-score   support

      Normal       0.73      0.75      0.74       234
   Pneumonia       0.85      0.83      0.84       390

    accuracy                           0.80       624
   macro avg       0.79      0.79      0.79       624
weighted avg       0.80      0.80      0.80       624


Multinomial NB Results on Test Data:
Accuracy: 0.8333
Confusion Matrix:
[[174  60]
 [ 44 346]]

Classification

(array([1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1,
        1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0,
        1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1,
        1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
        0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0,
        0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1,
        1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
        1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
        0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1,
        1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,
        0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1,
        1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1,
        0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0,
        1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 