In [1]:
import pandas as pd 

data = pd.read_csv('heart.csv')

In [2]:
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


##2,3

In [23]:
import numpy as np
import pandas as pd
import math


class GaussianClf:
    def separate_by_classes(self, X, y):
        """a function that separates our dataset in subdatasets by classes

        Args:
            X (np.ndarray): features
            y (np.ndarray): labels

        Returns:
            np.ndarray: separated data
        """        
        self.classes = np.unique(y)
        classes_index = {}
        subdatasets = {}
        cls, counts = np.unique(y, return_counts=True)
        self.class_freq = dict(zip(cls, counts))
    
        for class_type in self.classes:
            classes_index[class_type] = np.argwhere(y==class_type)
            subdatasets[class_type] = X[classes_index[class_type], :]
            self.class_freq[class_type] = self.class_freq[class_type]/sum(list(self.class_freq.values()))
        return subdatasets

    def fit(self, X, y):
        """a function to fit model

        Args:
            X (np.ndarray): features
            y (np.ndarray): labels
        """        
        
        separated_X = self.separate_by_classes(X, y)
        self.means = {}
        self.std = {}
        for class_type in self.classes:
            self.means[class_type] = np.mean(separated_X[class_type], axis=0)[0]
            self.std[class_type] = np.std(separated_X[class_type], axis=0)[0]
        
    def calculate_probability(self, x, mean, stdev):
        """a function that calculates the class probability using gaussian distribution

        Args:
            x (np.array): feature
            mean (float)): mean of feature
            stdev (float): standard deviation of feature

        Returns:
            float: class probability
        """        

        exponent = math.exp(-((x - mean) ** 2 / (2 * stdev ** 2)))
        return (1 / (math.sqrt(2 * math.pi) * stdev)) * exponent

    def predict_proba(self, X):
        """a function that predicts the probability for every class

        Args:
            X (np.ndarray): features

        Returns:
            float: class probability
        """        
        
        self.class_prob = {cls:math.log(self.class_freq[cls], math.e) for cls in self.classes}
        for cls in self.classes:
            for i in range(len(self.means)):
                self.class_prob[cls]+=math.log(self.calculate_probability(X[i], self.means[cls][i], self.std[cls][i]), math.e)
        self.class_prob = {cls: math.e**self.class_prob[cls] for cls in self.class_prob}
        return self.class_prob

    def predict(self, X):
        """a funtion that predicts the class of a sample

        Args:
            X (np.ndarray): features

        Returns:
            list : predictions
        """        
        pred = []
        for x in X:
            pred_class = None
            max_prob = 0
        
            for cls, prob in self.predict_proba(x).items():
                if prob>max_prob:
                    max_prob = prob
                    pred_class = cls
            pred.append(pred_class)
        return pred

### our model

In [25]:
##use only three features for train and test with our model

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score

x = data[['thalach', 'trestbps', 'chol']].to_numpy()
y = data['target'].to_numpy()
x_train,x_test,y_train,y_test=train_test_split(x , y,test_size=.2, random_state=42)

clf = GaussianClf()
clf.fit(x_train, y_train)
pred= clf.predict(x_test)
print(f"f1_score: {f1_score(y_test, pred)}")
print(f"precision: {precision_score(y_test, pred)}")
print(f"recall: {recall_score(y_test, pred)}")

f1_score: 0.763157894736842
precision: 0.6590909090909091
recall: 0.90625


In [27]:
## use all features for train and test with our model 

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.naive_bayes import GaussianNB

x = data.drop(['target'], axis=1).to_numpy()
y = data['target'].to_numpy()
x_train,x_test,y_train,y_test=train_test_split(x , y,test_size=.2, random_state=42)

clf = GaussianClf()
clf.fit(x_train, y_train)
pred= clf.predict(x_test)
print(f"f1_score: {f1_score(y_test, pred)}")
print(f"precision: {precision_score(y_test, pred)}")
print(f"recall: {recall_score(y_test, pred)}")

f1_score: 0.676056338028169
precision: 0.6153846153846154
recall: 0.75


### sklearn model

In [26]:
## use all features for train and test with our model ##use only three features for train and test with sklearn model

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.naive_bayes import GaussianNB

x = data[['thalach', 'trestbps', 'chol']].to_numpy()
y = data['target'].to_numpy()
x_train,x_test,y_train,y_test=train_test_split(x , y,test_size=.2, random_state=42)

clf = GaussianNB()
clf.fit(x_train, y_train)
pred= clf.predict(x_test)
print(f"f1_score: {f1_score(y_test, pred)}")
print(f"precision: {precision_score(y_test, pred)}")
print(f"recall: {recall_score(y_test, pred)}")

f1_score: 0.7761194029850748
precision: 0.7428571428571429
recall: 0.8125


In [28]:
## use all features for train and test with sklearn model

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.naive_bayes import GaussianNB

x = data.drop(['target'], axis=1).to_numpy()
y = data['target'].to_numpy()
x_train,x_test,y_train,y_test=train_test_split(x , y,test_size=.2, random_state=42)

clf = GaussianNB()
clf.fit(x_train, y_train)
pred= clf.predict(x_test)
print(f"f1_score: {f1_score(y_test, pred)}")
print(f"precision: {precision_score(y_test, pred)}")
print(f"recall: {recall_score(y_test, pred)}")

f1_score: 0.870967741935484
precision: 0.9
recall: 0.84375
