In [1]:
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

In [2]:
class GaussianNaiveBayes(BaseEstimator, ClassifierMixin):
    def _calculate_probability(self, mean: np.ndarray, var: np.ndarray, x: np.ndarray) -> np.ndarray:
        eps = 1e-9
        coeff = 1.0 / np.sqrt(2.0 * np.pi * var + eps)
        exponent = np.exp(-((x - mean) ** 2) / (2 * var + eps))
        return coeff * exponent
    
    def _calculate_posterior(self, X: np.ndarray) -> np.ndarray:
        posteriors = []
        for cls in self.classes_:
            prior = np.log(self.priors_[cls])
            class_conditional = np.sum(np.log(self._calculate_probability(self.means_[cls], self.vars_[cls], X)), axis=1)
            posterior = prior + class_conditional
            posteriors.append(posterior)

        return self.classes_[np.argmax(posteriors, axis=0)]
    
    def fit(self, X: np.ndarray, y: np.ndarray) -> 'GaussianNaiveBayes':
        self.classes_ = np.unique(y)
        self.means_ = {}
        self.vars_ = {}
        self.priors_ = {}
        
        for cls in self.classes_:
            X_cls = X[y == cls]
            self.means_[cls] = np.mean(X_cls, axis=0)
            self.vars_[cls] = np.var(X_cls, axis=0)
            self.priors_[cls] = X_cls.shape[0] / X.shape[0]
        
        return self
    
    def predict(self, X: np.ndarray) -> np.ndarray:
        return self._calculate_posterior(X)

In [3]:
# Load iris dataset
iris = load_iris()
X, y = iris.data, iris.target

In [4]:
SEED = 2134

In [5]:
# Initialize classifiers
gnb = GaussianNaiveBayes()
dt = DecisionTreeClassifier(random_state=SEED)
svm = SVC(kernel='linear', random_state=SEED)

# Stratified K-Fold cross-validation
cv = StratifiedKFold(n_splits=5, random_state=SEED, shuffle=True)

# Define scoring metrics
scoring_metrics = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']

# Cross-validation scores for Gaussian Naive Bayes
gnb_scores = {metric: cross_val_score(gnb, X, y, cv=cv, scoring=metric) for metric in scoring_metrics}
# Cross-validation scores for Decision Tree
dt_scores = {metric: cross_val_score(dt, X, y, cv=cv, scoring=metric) for metric in scoring_metrics}
# Cross-validation scores for SVM
svm_scores = {metric: cross_val_score(svm, X, y, cv=cv, scoring=metric) for metric in scoring_metrics}

# Calculate mean and std for each metric
def summarize_scores(scores):
    return {metric: (np.mean(values), np.std(values)) for metric, values in scores.items()}

gnb_summary = summarize_scores(gnb_scores)
dt_summary = summarize_scores(dt_scores)
svm_summary = summarize_scores(svm_scores)

In [6]:
print(gnb_summary)

{'accuracy': (0.9466666666666667, 0.03399346342395189), 'precision_macro': (0.94996632996633, 0.03306500166407341), 'recall_macro': (0.9466666666666665, 0.03399346342395191), 'f1_macro': (0.9464651527809422, 0.03410602725792345)}


In [9]:
import pandas as pd
 
df = pd.DataFrame({"Naiwny Bayes": gnb_summary, "Drzewo decyzyjne": dt_summary, "SVM": svm_summary})
formatted_df = df.map(lambda x: f"{x[0]:.3f} $\pm$ {x[1]:.3f}")
print(formatted_df)
print(formatted_df.to_latex())

                      Naiwny Bayes   Drzewo decyzyjne                SVM
accuracy         0.947 $\pm$ 0.034  0.953 $\pm$ 0.027  0.987 $\pm$ 0.016
precision_macro  0.950 $\pm$ 0.033  0.958 $\pm$ 0.024  0.988 $\pm$ 0.015
recall_macro     0.947 $\pm$ 0.034  0.953 $\pm$ 0.027  0.987 $\pm$ 0.016
f1_macro         0.946 $\pm$ 0.034  0.953 $\pm$ 0.027  0.987 $\pm$ 0.016
\begin{tabular}{llll}
\toprule
 & Naiwny Bayes & Drzewo decyzyjne & SVM \\
\midrule
accuracy & 0.947 $\pm$ 0.034 & 0.953 $\pm$ 0.027 & 0.987 $\pm$ 0.016 \\
precision_macro & 0.950 $\pm$ 0.033 & 0.958 $\pm$ 0.024 & 0.988 $\pm$ 0.015 \\
recall_macro & 0.947 $\pm$ 0.034 & 0.953 $\pm$ 0.027 & 0.987 $\pm$ 0.016 \\
f1_macro & 0.946 $\pm$ 0.034 & 0.953 $\pm$ 0.027 & 0.987 $\pm$ 0.016 \\
\bottomrule
\end{tabular}



  formatted_df = df.map(lambda x: f"{x[0]:.3f} $\pm$ {x[1]:.3f}")
