In [8]:
# Import Libraries
import pandas as pd
import numpy as np

In [9]:
# Read the CSV file and Splitting the data
data = pd.read_csv('./datasets/P8_dataset.csv')

X = data[['glucose', 'bloodpressure']]
y = data['diabetes']

In [13]:
# Calculate Prior Probabilities
class_prior = y.value_counts(normalize=True)

# Calculate Likelihoods using Gaussian distribution
likelihoods = {
    feature: {
        class_value: {
            'mean': X[feature][y == class_value].mean(),
            'std': X[feature][y == class_value].std()
        }
        for class_value in y.unique()
    }
    for feature in X.columns
}

In [14]:
# Gaussian PDF Function
def gaussian_pdf(x, mean, std):
    return (1 / (np.sqrt(2 * np.pi) * std)) * np.exp(-((x - mean) ** 2 / (2 * std ** 2)))

# Classify New Data Points
def classify(instance):
    posteriors = {}
    for class_value in class_prior.index:
        posterior = class_prior[class_value]
        for feature in instance.index:
            mean = likelihoods[feature][class_value]['mean']
            std = likelihoods[feature][class_value]['std']
            posterior *= gaussian_pdf(instance[feature], mean, std)
        posteriors[class_value] = posterior
    return max(posteriors, key=posteriors.get)

In [15]:
# Example: Classify a new instance
new_instance = pd.Series({'glucose': 120, 'bloodpressure': 70})
predicted_class = classify(new_instance)
predicted_class_label = 'Yes' if predicted_class == 1 else 'No'
print(f'The predicted class for the instance {new_instance.to_dict()} is {predicted_class_label}')

The predicted class for the instance {'glucose': 120, 'bloodpressure': 70} is Yes
