In [24]:
import pandas as pd; pd.set_option('display.max_columns', 500)
import numpy as np 
import math
import matplotlib.pyplot as plt
from matplotlibStyle import *; setPlotly() # Custom File Delete if you want to

from sklearn.metrics import classification_report

In [8]:
df = pd.read_csv('heart_disease.csv')
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,condition
0,69,1,0,160,234,1,2,131,0,0.1,1,1,0,0
1,69,0,0,140,239,0,0,151,0,1.8,0,2,0,0
2,66,0,0,150,226,0,0,114,0,2.6,2,0,0,0
3,65,1,0,138,282,1,2,174,0,1.4,1,1,0,1
4,64,1,0,110,211,0,2,144,1,1.8,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,40,1,3,152,223,0,0,181,0,0.0,0,0,2,1
293,39,1,3,118,219,0,0,140,0,1.2,1,0,2,1
294,35,1,3,120,198,0,0,130,1,1.6,1,0,2,1
295,35,0,3,138,183,0,0,182,0,1.4,0,0,0,0


In [38]:
import seaborn as sns

sns.pairplot(df)

<seaborn.axisgrid.PairGrid at 0x10cd5a60ee0>

Here are some tips for improving power of Naive Bayes Model:

If continuous features do not have normal distribution, we should use transformation or different methods to convert it in normal distribution.
If test data set has zero frequency issue, apply smoothing techniques “Laplace Correction” to predict the class of test data set.
Remove correlated features, as the highly correlated features are voted twice in the model and it can lead to over inflating importance.
Naive Bayes classifiers has limited options for parameter tuning like alpha=1 for smoothing, fit_prior=[True|False] to learn class prior probabilities or not and some other options (look at detail here). I would recommend to focus on your  pre-processing of data and the feature selection.
You might think to apply some classifier combination technique like ensembling, bagging and boosting but these methods would not help. Actually, “ensembling, boosting, bagging” won’t help since their purpose is to reduce variance. Naive Bayes has no variance to minimize.

In [34]:
class GaussianNaiveBayes():
    def __init__(self, ): 
        self.verbose=0 

    def separate_by_classes(self, X, y):
        ''' This function separates our dataset in subdatasets by classes '''
        self.classes = np.unique(y)
        classes_index = {}
        subdatasets = {}
        cls, counts = np.unique(y, return_counts=True)
        self.class_freq = dict(zip(cls, counts))
        if self.verbose: 
            print(self.class_freq)
        for class_type in self.classes:
            classes_index[class_type] = np.argwhere(y==class_type)
            subdatasets[class_type] = X[classes_index[class_type], :]
            self.class_freq[class_type] = self.class_freq[class_type]/sum(list(self.class_freq.values()))
        return subdatasets


    def fit(self, X, y, verbose=0):
        self.verbose=verbose
        ''' The fitting function '''
        separated_X = self.separate_by_classes(X, y)
        self.means = {}
        self.std = {}
        for class_type in self.classes:
            # Here we calculate the mean and the standart deviation from datasets
            self.means[class_type] = np.mean(separated_X[class_type], axis=0)[0]
            self.std[class_type] = np.std(separated_X[class_type], axis=0)[0]


    def calculate_probability(self, x, mean, stdev):
        ''' This function calculates the class probability using gaussian distribution '''
        exponent = math.exp(-((x - mean) ** 2 / (2 * stdev ** 2)))
        return (1 / (math.sqrt(2 * math.pi) * stdev)) * exponent
    
    
    def predict_proba(self, X):
        ''' This function predicts the probability for every class '''
        self.class_prob = {cls:math.log(self.class_freq[cls], math.e) for cls in self.classes}
        for cls in self.classes:
            for i in range(len(self.means)):
                if self.verbose: 
                    print(X[i])
                self.class_prob[cls]+=math.log(self.calculate_probability(X[i], self.means[cls][i], self.std[cls][i]), math.e)
        self.class_prob = {cls: math.e**self.class_prob[cls] for cls in self.class_prob}
        return self.class_prob


    def predict(self, X):
        ''' This funtion predicts the class of a sample '''
        pred = []
        for x in X:
            pred_class = None
            max_prob = 0
            for cls, prob in self.predict_proba(x).items():
                if prob>max_prob:
                    max_prob = prob
                    pred_class = cls
            pred.append(pred_class)
        return pred

In [35]:
clf = GaussianNaiveBayes()

X = df.drop('condition', axis=1)
y = df.condition

clf.fit(X.values, y.values)
pred = clf.predict(X.values)


rep = classification_report(y, pred, output_dict=True)
rep_df = pd.DataFrame(rep).T
rep_df

Unnamed: 0,precision,recall,f1-score,support
0,0.712963,0.48125,0.574627,160.0
1,0.560847,0.773723,0.650307,137.0
accuracy,0.616162,0.616162,0.616162,0.616162
macro avg,0.636905,0.627486,0.612467,297.0
weighted avg,0.642795,0.616162,0.609536,297.0


In [36]:
from sklearn.naive_bayes import GaussianNB
X = df.drop('condition', axis=1)
y = df.condition

gnb = GaussianNB()
pred = gnb.fit(X, y).predict(X)

rep = classification_report(y, pred, output_dict=True)
rep_df = pd.DataFrame(rep).T
rep_df

Unnamed: 0,precision,recall,f1-score,support
0,0.856287,0.89375,0.874618,160.0
1,0.869231,0.824818,0.846442,137.0
accuracy,0.861953,0.861953,0.861953,0.861953
macro avg,0.862759,0.859284,0.86053,297.0
weighted avg,0.862258,0.861953,0.861621,297.0
