# WSI ćwiczenie nr 7

## Naiwny klasyfikator bayesowski

Mikołaj Wewiór 318407

In [1]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold

from src import dataset
from src.naiveBayes import NaiveBayesClassifier
# try:
#     import dataset
# except ModuleNotFoundError:
#     from src import dataset


In [2]:
SEED = 318407
np.random.seed(SEED)

## Porównanie Klasyfikacji

In [3]:
ds_skl = dataset.DataSetSKL()

SKF = StratifiedKFold(n_splits=5)  #, random_state=SEED)
X = ds_skl.features
Y = ds_skl.target

### SVM

In [4]:
clfSVM = SVC(
    C=0.1, kernel="rbf", tol=10e-16, max_iter=int(25), random_state=SEED
)

SVM_accuracy = cross_val_score(clfSVM, X, Y, cv=SKF, scoring='accuracy')
SVM_precision = cross_val_score(clfSVM, X, Y, cv=SKF, scoring='precision_weighted')
SVM_recall = cross_val_score(clfSVM, X, Y, cv=SKF, scoring='recall_weighted')
SVM_f1 = cross_val_score(clfSVM, X, Y, cv=SKF, scoring='f1_weighted')

SVM_scores = [SVM_accuracy, SVM_precision, SVM_recall, SVM_f1]




### Tree

In [5]:
clfTree = DecisionTreeClassifier(
    criterion="entropy", splitter="best", max_depth=4, random_state=SEED
)

TREE_accuracy = cross_val_score(clfTree, X, Y, cv=SKF, scoring="accuracy")
TREE_precision = cross_val_score(clfTree, X, Y, cv=SKF, scoring="precision_weighted")
TREE_recall = cross_val_score(clfTree, X, Y, cv=SKF, scoring="recall_weighted")
TREE_f1 = cross_val_score(clfTree, X, Y, cv=SKF, scoring="f1_weighted")

TREE_scores = [TREE_accuracy, TREE_precision, TREE_recall, TREE_f1]


### Bayes

In [6]:
clfGNB = GaussianNB()

GNB_accuracy = cross_val_score(clfGNB, ds_skl.features, ds_skl.target, cv=SKF, scoring='accuracy')
GNB_precision = cross_val_score(clfGNB, ds_skl.features, ds_skl.target, cv=SKF, scoring='precision_weighted')
GNB_recall = cross_val_score(clfGNB, ds_skl.features, ds_skl.target, cv=SKF, scoring='recall_weighted')
GNB_f1 = cross_val_score(clfGNB, ds_skl.features, ds_skl.target, cv=SKF, scoring='f1_weighted')

GNB_scores = [GNB_accuracy, GNB_precision, GNB_recall, GNB_f1]


In [7]:
clfBayes = NaiveBayesClassifier()

BAYES_accuracy = cross_val_score(clfBayes, X, Y, cv=SKF, scoring="accuracy")
BAYES_precision = cross_val_score(clfBayes, X, Y, cv=SKF, scoring="precision_weighted")
BAYES_recall = cross_val_score(clfBayes, X, Y, cv=SKF, scoring="recall_weighted")
BAYES_f1 = cross_val_score(clfBayes, X, Y, cv=SKF, scoring="f1_weighted")

BAYES_scores = [BAYES_accuracy, BAYES_precision, BAYES_recall, BAYES_f1]


### OUTPUT

In [8]:
def print_results(scores, classifier_name):
    metrics = [" accuracy", "precision", "   recall", "       f1"]
    print(f"\n{classifier_name} scores (mean +- deviation)")
    for score, metric in zip(scores, metrics):
        print(f"{metric}: {100*score.mean()} +- {100*score.std()} [%]")


print_results(SVM_scores, "SVM")
print_results(TREE_scores, "Decision Tree")
print_results(GNB_scores, "(Sklearn Gausian NB) Naive Bayes")
print_results(BAYES_scores, "(Own) Naive Bayes")



SVM scores (mean +- deviation)
 accuracy: 95.33333333333334 +- 3.399346342395189 [%]
precision: 95.72390572390572 +- 3.260260858426985 [%]
   recall: 95.33333333333334 +- 3.399346342395189 [%]
       f1: 95.31151110098477 +- 3.413878690742713 [%]

Decision Tree scores (mean +- deviation)
 accuracy: 93.33333333333333 +- 5.577733510227169 [%]
precision: 93.64983164983165 +- 5.5688892129758765 [%]
   recall: 93.33333333333333 +- 5.577733510227169 [%]
       f1: 93.30984025720866 +- 5.591759514319502 [%]

(Sklearn Gausian NB) Naive Bayes scores (mean +- deviation)
 accuracy: 94.66666666666666 +- 3.399346342395189 [%]
precision: 95.21885521885523 +- 3.223543069333713 [%]
   recall: 94.66666666666666 +- 3.399346342395189 [%]
       f1: 94.6330472646262 +- 3.415890573037327 [%]

(Own) Naive Bayes scores (mean +- deviation)
 accuracy: 92.0 +- 7.483314773547882 [%]
precision: 93.44078144078142 +- 5.972173474025375 [%]
   recall: 92.0 +- 7.483314773547882 [%]
       f1: 91.70105143506679 +- 7.8