In [77]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import CategoricalNB
from sklearn.metrics import accuracy_score
import numpy as np
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator
from pgmpy.inference import VariableElimination
from random import shuffle

In [78]:
data = pd.read_csv('2020_bn_nb_data.txt', delimiter='\t')

In [79]:
grade_mapping = { 'AA': 0, 'AB': 1, 'BB': 2, 'BC': 3, 'CC': 4, 'CD': 5, 'DD': 6, 'F': 7 }

data.replace(grade_mapping, inplace=True)

  data.replace(grade_mapping, inplace=True)


In [80]:
bayesian_model = BayesianNetwork([('EC100', 'PH100'), ('IT101', 'PH100'), ('MA101', 'PH100')])

In [81]:
bayesian_model.fit(data, estimator=MaximumLikelihoodEstimator)

In [82]:
inference = VariableElimination(bayesian_model)

# Query : What grade will a student get in PH100 if he earns DD in EC100, CC in IT101 and CD in MA101.

In [83]:
reverse_grade_mapping = {v: k for k, v in grade_mapping.items()}

query_result = inference.map_query(variables=['PH100'], evidence={'EC100': 6, 'IT101': 4, 'MA101': 5})
predicted_PH100_grade = query_result['PH100']

predicted_grade_label = reverse_grade_mapping[predicted_PH100_grade]

print("Predicted grade in PH100 : ", predicted_grade_label)


Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]

Predicted grade in PH100 :  BB





In [84]:
X = data.drop(columns=['QP'])
y = data['QP'].apply(lambda x: 1 if x == 'y' else 0)

In [85]:
def run_experiment(X, y):
    accuracies = []

    for i in range(20):  # 20 random experiments
        
        # Split data: 70% training, 30% testing
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=i)
        
        # Convert X_train and X_test to numpy arrays (if they are DataFrames)
        X_train = np.array(X_train)
        X_test = np.array(X_test)
        
        # Initialize the Categorical Naive Bayes with Laplace smoothing (alpha=1 to handle unseen categories)
        nb_model = CategoricalNB(alpha=1.0)
        
        # Train the Naive Bayes classifier
        nb_model.fit(X_train, y_train)
        
        # Predict on the test set
        y_pred = nb_model.predict(X_test)
        
        # Calculate accuracy
        accuracy = accuracy_score(y_test, y_pred)
        accuracies.append(accuracy)
    
    # Calculate mean and standard deviation
    mean_accuracy = np.mean(accuracies)
    std_accuracy = np.std(accuracies)

    # Print results
    print(f"Naive Bayes Classifier Mean Accuracy over 20 runs: {mean_accuracy:.4f}")
    print(f"Standard Deviation of Accuracy over 20 runs: {std_accuracy:.4f}")

In [86]:
run_experiment(X, y)

Naive Bayes Classifier Mean Accuracy over 20 runs: 0.9771
Standard Deviation of Accuracy over 20 runs: 0.0154


In [87]:
dependent_model = BayesianNetwork([('EC100', 'QP'), ('IT101', 'QP'), ('MA101', 'QP')])
dependent_model.fit(data, estimator=BayesianEstimator, prior_type="BDeu")