In [1]:
import pandas as pd
import math
from collections import defaultdict


In [2]:
data = {
    'Fever': ['Yes', 'No', 'Yes', 'No', 'Yes'],
    'Cough': ['Yes', 'Yes', 'No', 'Yes', 'No'],
    'Fatigue': ['Yes', 'No', 'Yes', 'No', 'Yes'],
    'TravelHistory': ['Yes', 'No', 'No', 'Yes', 'No'],
    'DiseaseDiagnosis': ['Positive', 'Negative', 'Positive', 'Negative', 'Positive']
}

df = pd.DataFrame(data)


In [4]:
X = df.drop('DiseaseDiagnosis', axis=1)
y = df['DiseaseDiagnosis']


In [5]:
def calculate_priors(y):
    priors = {}
    total = len(y)
    for label in y.unique():
        priors[label] = len(y[y == label]) / total
    return priors


In [6]:
def calculate_likelihoods(X, y):
    likelihoods = defaultdict(lambda: defaultdict(dict))

    for label in y.unique():
        X_label = X[y == label]
        for column in X.columns:
            values = X[column].unique()
            for value in values:
                count = len(X_label[X_label[column] == value])
                likelihoods[label][column][value] = count / len(X_label)
    return likelihoods


In [7]:
def predict(X_test, priors, likelihoods):
    predictions = []

    for _, row in X_test.iterrows():
        posteriors = {}

        for label in priors:
            posterior = math.log(priors[label])

            for column in X_test.columns:
                value = row[column]
                prob = likelihoods[label][column].get(value, 1e-6)
                posterior += math.log(prob)

            posteriors[label] = posterior

        predictions.append(max(posteriors, key=posteriors.get))

    return predictions


In [10]:
priors = calculate_priors(y)
likelihoods = calculate_likelihoods(X, y)


In [12]:
test_data = pd.DataFrame({
    'Fever': ['Yes'],
    'Cough': ['No'],
    'Fatigue': ['Yes'],
    'TravelHistory': ['No']
})

# Workaround to prevent math domain error by smoothing likelihoods
# The ideal fix would be to apply Laplace smoothing directly in the calculate_likelihoods function.
from collections import defaultdict
smoothed_likelihoods = defaultdict(lambda: defaultdict(dict))
for label, columns in likelihoods.items():
    for column, values in columns.items():
        for value, prob in values.items():
            # Replace zero probabilities with a small epsilon
            smoothed_likelihoods[label][column][value] = max(prob, 1e-6)

prediction = predict(test_data, priors, smoothed_likelihoods)
print("Predicted Diagnosis:", prediction[0])

Predicted Diagnosis: Positive
