### Gaussian Naive Bayes

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score

In [133]:
from sklearn import datasets
iris = datasets.load_iris()

In [134]:
X_iris = iris.data
y_iris = iris.target
X_iris.shape, y_iris.shape

((150, 4), (150,))

In [135]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_iris, y_iris)

In [136]:
X_train[:20,:]

array([[ 5.7,  2.8,  4.1,  1.3],
       [ 5. ,  3.5,  1.6,  0.6],
       [ 5.6,  3. ,  4.5,  1.5],
       [ 6.4,  2.9,  4.3,  1.3],
       [ 5. ,  2.3,  3.3,  1. ],
       [ 6.1,  3. ,  4.9,  1.8],
       [ 6.4,  2.8,  5.6,  2.1],
       [ 5.8,  4. ,  1.2,  0.2],
       [ 4.7,  3.2,  1.6,  0.2],
       [ 5.2,  3.4,  1.4,  0.2],
       [ 6.5,  3. ,  5.8,  2.2],
       [ 4.3,  3. ,  1.1,  0.1],
       [ 5.1,  3.4,  1.5,  0.2],
       [ 4.9,  3. ,  1.4,  0.2],
       [ 5.5,  3.5,  1.3,  0.2],
       [ 5.1,  3.5,  1.4,  0.3],
       [ 4.9,  3.1,  1.5,  0.1],
       [ 6.4,  2.8,  5.6,  2.2],
       [ 5.7,  2.6,  3.5,  1. ],
       [ 6. ,  2.9,  4.5,  1.5]])

In [137]:
y_train

array([1, 0, 1, 1, 1, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 1, 0,
       0, 2, 1, 2, 2, 0, 1, 2, 2, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 2, 0, 2, 1,
       0, 1, 2, 0, 2, 2, 0, 2, 2, 2, 1, 2, 2, 1, 1, 2, 0, 2, 0, 1, 2, 2, 1,
       1, 0, 1, 1, 1, 1, 2, 1, 2, 0, 2, 0, 1, 2, 2, 1, 0, 0, 1, 0, 0, 1, 1,
       2, 1, 1, 0, 0, 2, 1, 0, 2, 2, 0, 1, 2, 2, 0, 2, 0, 2, 0, 2])

In [138]:
np.unique(y_train)  # rodzaje klas w zbiorze danych

array([0, 1, 2])

In [139]:
(y_train == 0).sum(), (y_train == 1).sum(), (y_train == 2).sum()  # ilosc elementow kazdej klasy w zbiorze danych

(38, 36, 38)

In [140]:
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(X_train, y_train)

GaussianNB(priors=None)

In [141]:
model.classes_  # rodzaje klas w modelu

array([0, 1, 2])

In [142]:
model.class_count_  # ilosc elementow kazdej klasy w modelu

array([ 38.,  36.,  38.])

In [143]:
model.sigma_  # odchylenie standardowe, 4 kolumny dla kazdego feature, 3 wiersze dla kazdej klasy

array([[ 0.11443214,  0.11916898,  0.02720222,  0.01058172],
       [ 0.26582562,  0.09465278,  0.21404321,  0.04376544],
       [ 0.31141275,  0.09277701,  0.27185596,  0.07271468]])

In [144]:
model.theta_  # srednia, 4 kolumny dla kazdego feature, 3 wiersze dla kazdej klasy

array([[ 5.03684211,  3.46315789,  1.47368421,  0.23157895],
       [ 5.94722222,  2.79166667,  4.26111111,  1.31111111],
       [ 6.72631579,  3.03421053,  5.68421053,  2.07894737]])

In [145]:
y_pred = model.predict(X_test)
y_pred

array([0, 0, 1, 1, 2, 1, 2, 1, 2, 2, 0, 1, 1, 0, 0, 2, 1, 2, 1, 0, 2, 2, 1,
       1, 1, 1, 0, 1, 1, 0, 0, 2, 0, 1, 0, 0, 2, 1])

In [146]:
GaussianNB_score = accuracy_score(y_test, y_pred)
GaussianNB_score

0.94736842105263153

### Multinomial Naive Bayes

In [147]:
from sklearn.naive_bayes import MultinomialNB

In [148]:
model = MultinomialNB()
model.fit(X_train, y_train)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [149]:
model.class_count_  # ilosc elementow kazdej klasy w modelu

array([ 38.,  36.,  38.])

In [150]:
model.feature_count_

array([[ 191.4,  131.6,   56. ,    8.8],
       [ 214.1,  100.5,  153.4,   47.2],
       [ 255.6,  115.3,  216. ,   79. ]])

In [151]:
model.coef_

array([[-0.71117497, -1.08341443, -1.92770024, -3.68836912],
       [-0.88118613, -1.63223037, -1.21274253, -2.37693014],
       [-0.95961   , -1.75095539, -1.12723109, -2.12510181]])

In [152]:
model.intercept_

array([-1.08091271, -1.13497993, -1.08091271])

In [153]:
y_pred = model.predict(X_test)
y_pred

array([0, 0, 2, 1, 2, 1, 2, 1, 2, 2, 0, 1, 2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 1,
       1, 1, 1, 0, 1, 1, 0, 0, 2, 0, 2, 0, 0, 2, 2])

In [154]:
MultinomialNB_score = accuracy_score(y_test, y_pred)
MultinomialNB_score

0.89473684210526316

### LogisticRegression

In [155]:
from sklearn.linear_model import LogisticRegression

In [156]:
model = LogisticRegression()
model.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [157]:
model.coef_

array([[ 0.35423826,  1.39832219, -2.0947701 , -0.93926195],
       [ 0.372339  , -1.49090994,  0.51872407, -1.24472564],
       [-1.4425207 , -1.44016122,  2.13933483,  2.29922373]])

In [158]:
model.intercept_

array([ 0.23497577,  1.04328871, -1.04244389])

In [159]:
y_pred = model.predict(X_test)
y_pred

array([0, 0, 2, 1, 2, 1, 2, 1, 2, 2, 0, 1, 1, 0, 0, 2, 2, 2, 1, 0, 2, 2, 1,
       1, 1, 1, 0, 1, 1, 0, 0, 2, 0, 1, 0, 0, 2, 2])

In [160]:
LogisticRegression_score = accuracy_score(y_test, y_pred)
LogisticRegression_score

0.97368421052631582

### K-Nearest Neighbors

In [161]:
from sklearn.neighbors import KNeighborsClassifier

In [162]:
model = KNeighborsClassifier()
model.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

In [167]:
y_pred = model.predict(X_test)
y_pred

array([0, 0, 2, 1, 2, 1, 2, 1, 2, 2, 0, 1, 1, 0, 0, 2, 1, 2, 1, 0, 2, 2, 1,
       1, 1, 1, 0, 1, 1, 0, 0, 2, 0, 2, 0, 0, 2, 1])

In [168]:
KNeighborsClassifier_score = accuracy_score(y_test, y_pred)
KNeighborsClassifier_score

0.94736842105263153

### Support Vector Machine

In [182]:
from sklearn.svm import SVC # "Support vector classifier"
model = SVC(kernel='linear')
model.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [183]:
model.dual_coef_

array([[  5.52729424e-07,   5.54016559e-01,   0.00000000e+00,
         -0.00000000e+00,  -0.00000000e+00,  -0.00000000e+00,
         -0.00000000e+00,  -0.00000000e+00,  -0.00000000e+00,
         -5.54017112e-01,  -0.00000000e+00,  -0.00000000e+00,
         -5.30485451e-02,  -0.00000000e+00,  -0.00000000e+00,
         -1.06018267e-01,  -0.00000000e+00,  -0.00000000e+00,
         -0.00000000e+00,  -0.00000000e+00],
       [  0.00000000e+00,   0.00000000e+00,   1.59066813e-01,
          1.00000000e+00,   1.00000000e+00,   4.68269900e-01,
          1.00000000e+00,   1.00000000e+00,   1.00000000e+00,
          0.00000000e+00,   1.00000000e+00,   1.00000000e+00,
         -1.00000000e+00,  -9.72200894e-01,  -8.92651893e-01,
         -1.00000000e+00,  -1.00000000e+00,  -1.00000000e+00,
         -6.03417114e-01,  -1.00000000e+00]])

In [184]:
y_pred = model.predict(X_test)
y_pred

array([0, 0, 2, 1, 2, 1, 2, 1, 2, 2, 0, 1, 1, 0, 0, 2, 1, 2, 1, 0, 2, 1, 1,
       1, 1, 1, 0, 1, 1, 0, 0, 2, 0, 1, 0, 0, 2, 2])

In [185]:
SVM_score = accuracy_score(y_test, y_pred)
SVM_score

0.97368421052631582

In [186]:
print('GaussianNB_score =', GaussianNB_score)
print('MultinomialNB_score =', MultinomialNB_score)
print('LogisticRegression_score =', LogisticRegression_score)
print('KNeighborsClassifier_score =', KNeighborsClassifier_score)
print('SVM_score =', SVM_score)

GaussianNB_score = 0.947368421053
MultinomialNB_score = 0.894736842105
LogisticRegression_score = 0.973684210526
KNeighborsClassifier_score = 0.947368421053
SVM_score = 0.973684210526
