In [77]:
## Instalowanie modułów

# !pip3 install --upgrade pip

# !pip3 install -U numpy
# !pip3 install -U pandas
# !pip3 install -U jinja2
# !pip3 install -U scikit-learn

In [78]:
## Importowanie modułów

import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

## Import do powtórzenia zadania 4
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

In [79]:
## Implementacja naiwnego klasyfikatora Bayesa

class GaussianNaiveBayes(BaseEstimator, ClassifierMixin):
    def fit(self, X, y):
        self.classes_ = np.unique(y)
        self.class_prior_ = np.zeros(len(self.classes_))
        
        self.means_ = np.zeros((len(self.classes_), X.shape[1]))    # mu daszkowe
        self.vars_ = np.zeros((len(self.classes_), X.shape[1]))     # sigma daszkowe
        
        for idx, c in enumerate(self.classes_):
            X_c = X[y == c]
            self.class_prior_[idx] = X_c.shape[0] / X.shape[0]  # P(y=k)
            self.means_[idx, :] = X_c.mean(axis=0)
            self.vars_[idx, :] = X_c.var(axis=0)
        
        return self
    
    def predict(self, X):
        return np.array([self._predict_instance(x) for x in X])
    
    def _predict_instance(self, x):
        posteriors = []
        for idx, c in enumerate(self.classes_):
            prior = np.log(self.class_prior_[idx])
            posterior = np.sum(np.log(self._pdf(idx, x)))
            posteriors.append(prior + posterior)
        return self.classes_[np.argmax(posteriors)]
    
    def _pdf(self, class_idx, x):
        mean = self.means_[class_idx]
        var = self.vars_[class_idx]
        licznik = np.exp(- (x - mean) ** 2 / (2 * var))
        mianownik = np.sqrt(2 * np.pi * var)
        return licznik / mianownik


In [80]:
## Wczytywanie danych o iryskach

iris = load_iris()
X, y = iris.data, iris.target


In [81]:
## Ewaluacja

def evaluate_model(model, X, y):
    skf = StratifiedKFold(n_splits=5)   # kf = KFold(n_splits=5, shuffle=True, random_state=42)
    accuracy = cross_val_score(model, X, y, cv=skf, scoring='accuracy')
    precision = cross_val_score(model, X, y, cv=skf, scoring='precision_macro')
    recall = cross_val_score(model, X, y, cv=skf, scoring='recall_macro')
    f1 = cross_val_score(model, X, y, cv=skf, scoring='f1_macro')
    
    return {
        'accuracy': (accuracy.mean(), accuracy.std()),
        'precision': (precision.mean(), precision.std()),
        'recall': (recall.mean(), recall.std()),
        'f1': (f1.mean(), f1.std())
    }

gnb = GaussianNaiveBayes()
gnb_results = evaluate_model(gnb, X, y)


In [82]:


dt = DecisionTreeClassifier()
dt_results = evaluate_model(dt, X, y)

## Najlepsze parametry z zadania 4
svm = SVC(kernel='linear', C=1.0)
svm_results = evaluate_model(svm, X, y)

# Tabela wyników
results_df = pd.DataFrame({
    'Model': ['Gaussian Naive Bayes', 'Decision Tree', 'SVM'],
    'Accuracy': [gnb_results['accuracy'][0], dt_results['accuracy'][0], svm_results['accuracy'][0]],
    'Accuracy ±': [gnb_results['accuracy'][1], dt_results['accuracy'][1], svm_results['accuracy'][1]],
    'Precision': [gnb_results['precision'][0], dt_results['precision'][0], svm_results['precision'][0]],
    'Precision ±': [gnb_results['precision'][1], dt_results['precision'][1], svm_results['precision'][1]],
    'Recall': [gnb_results['recall'][0], dt_results['recall'][0], svm_results['recall'][0]],
    'Recall ±': [gnb_results['recall'][1], dt_results['recall'][1], svm_results['recall'][1]],
    'F1': [gnb_results['f1'][0], dt_results['f1'][0], svm_results['f1'][0]],
    'F1 ±': [gnb_results['f1'][1], dt_results['f1'][1], svm_results['f1'][1]]
})

print(results_df)


                  Model  Accuracy  Accuracy ±  Precision  Precision ±  \
0  Gaussian Naive Bayes  0.953333    0.026667   0.958384     0.023983   
1         Decision Tree  0.960000    0.032660   0.962290     0.032183   
2                   SVM  0.980000    0.016330   0.981818     0.014845   

     Recall  Recall ±        F1      F1 ±  
0  0.953333  0.026667  0.953047  0.026862  
1  0.960000  0.032660  0.966583  0.036606  
2  0.980000  0.016330  0.979950  0.016371  


In [83]:
print(results_df.to_latex(index=False))


\begin{tabular}{lrrrrrrrr}
\toprule
Model & Accuracy & Accuracy ± & Precision & Precision ± & Recall & Recall ± & F1 & F1 ± \\
\midrule
Gaussian Naive Bayes & 0.953333 & 0.026667 & 0.958384 & 0.023983 & 0.953333 & 0.026667 & 0.953047 & 0.026862 \\
Decision Tree & 0.960000 & 0.032660 & 0.962290 & 0.032183 & 0.960000 & 0.032660 & 0.966583 & 0.036606 \\
SVM & 0.980000 & 0.016330 & 0.981818 & 0.014845 & 0.980000 & 0.016330 & 0.979950 & 0.016371 \\
\bottomrule
\end{tabular}

