## 1. Gaussian Naive Bayes

- 데이터 모듈 불러오기

In [1]:
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB

In [4]:
import pandas as pd

In [5]:
iris = datasets.load_iris()
df_X = pd.DataFrame(iris.data)
df_Y = pd.DataFrame(iris.target)

In [6]:
df_X.head()

Unnamed: 0,0,1,2,3
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [7]:
df_Y.head()

Unnamed: 0,0
0,0
1,0
2,0
3,0
4,0


- 모델 피팅

In [8]:
gnb = GaussianNB()
fitted = gnb.fit(iris.data, iris.target)
y_pred = fitted.predict(iris.data)

In [9]:
fitted.predict_proba(iris.data)[[1,48,51,100]]

array([[1.00000000e+000, 1.51480769e-017, 2.34820051e-025],
       [1.00000000e+000, 2.63876217e-018, 2.79566024e-025],
       [7.27347795e-102, 9.45169639e-001, 5.48303606e-002],
       [3.23245181e-254, 6.35381031e-011, 1.00000000e+000]])

In [10]:
fitted.predict(iris.data)[[1,48,51,100]]

array([0, 0, 1, 2])

- Confusion matrix 구하기

In [11]:
from sklearn.metrics import confusion_matrix

In [12]:
confusion_matrix(iris.target, y_pred)

array([[50,  0,  0],
       [ 0, 47,  3],
       [ 0,  3, 47]], dtype=int64)

- Prior 설정하기

In [14]:
gnb2 = GaussianNB(priors=[1/100,1/100,98/100])  # 3번째 범주에 출현확률을 높여줌
fitted2 = gnb2.fit(iris.data, iris.target)
y_pred2 = fitted2.predict(iris.data)
confusion_matrix(iris.target, y_pred2)

array([[50,  0,  0],
       [ 0, 33, 17],
       [ 0,  0, 50]], dtype=int64)

In [15]:
gnb2 = GaussianNB(priors=[1/100,98/100,1/100])
fitted2 = gnb2.fit(iris.data, iris.target)
y_pred2 = fitted2.predict(iris.data)
confusion_matrix(iris.target, y_pred2)

array([[50,  0,  0],
       [ 0, 50,  0],
       [ 0, 14, 36]], dtype=int64)

## 2. Multinomial naive bayes

- 모듈 불러오기 및 데이터 생성

In [17]:
from sklearn.naive_bayes import MultinomialNB

In [20]:
import numpy as np

In [22]:
X = np.random.randint(5, size=(6,100))
y = np.array([1,2,3,4,5,6])

In [23]:
X

array([[1, 1, 1, 2, 2, 0, 1, 3, 2, 2, 0, 2, 2, 4, 0, 4, 2, 1, 1, 0, 4, 2,
        0, 2, 2, 4, 0, 4, 3, 3, 1, 3, 2, 4, 4, 4, 4, 4, 3, 2, 3, 3, 4, 2,
        2, 2, 2, 0, 1, 2, 0, 2, 2, 3, 3, 3, 3, 2, 1, 2, 2, 1, 2, 0, 3, 3,
        1, 2, 4, 2, 1, 3, 1, 4, 1, 3, 2, 1, 3, 0, 4, 2, 3, 3, 0, 2, 0, 1,
        1, 0, 4, 1, 1, 2, 2, 4, 2, 4, 1, 4],
       [0, 3, 1, 2, 3, 2, 2, 3, 3, 3, 4, 3, 1, 3, 3, 4, 2, 0, 2, 1, 1, 1,
        1, 0, 1, 0, 0, 1, 2, 3, 3, 0, 2, 3, 3, 0, 0, 1, 0, 2, 0, 1, 3, 1,
        4, 0, 0, 2, 3, 3, 1, 2, 1, 1, 3, 1, 4, 4, 0, 4, 2, 3, 4, 3, 4, 0,
        0, 3, 3, 4, 4, 3, 2, 3, 2, 1, 4, 3, 3, 0, 3, 4, 2, 4, 0, 2, 2, 1,
        1, 3, 3, 3, 0, 4, 4, 3, 3, 1, 0, 0],
       [2, 2, 2, 3, 4, 3, 1, 2, 0, 3, 4, 0, 2, 2, 0, 0, 2, 2, 3, 0, 1, 1,
        4, 1, 2, 0, 0, 3, 3, 3, 1, 1, 2, 1, 0, 2, 3, 3, 1, 3, 1, 3, 4, 4,
        4, 1, 4, 4, 4, 0, 0, 3, 2, 0, 4, 3, 3, 0, 1, 2, 3, 4, 0, 3, 3, 1,
        0, 3, 2, 0, 1, 1, 3, 2, 1, 0, 1, 4, 2, 1, 2, 3, 2, 1, 0, 2, 1, 4,
        3, 3, 0, 0, 0,

In [24]:
y

array([1, 2, 3, 4, 5, 6])

- Multinomial naive bayes 모델 생성

In [25]:
clf = MultinomialNB()
clf.fit(X,y)

MultinomialNB()

In [26]:
print(clf.predict(X[2:3]))

[3]


In [28]:
clf.predict_proba(X[2:3])

array([[9.09821742e-31, 5.39248444e-29, 1.00000000e+00, 7.27491872e-36,
        3.51595552e-35, 4.15667391e-30]])

- prior 변경해보기

In [35]:
clf2 = MultinomialNB(class_prior=[0.1,0.5,0.1,0.1,0.1,0.1])
clf2.fit(X,y)

MultinomialNB(class_prior=[0.1, 0.5, 0.1, 0.1, 0.1, 0.1])

In [36]:
clf2.predict_proba(X[2:3])

array([[9.09821742e-31, 2.69624222e-28, 1.00000000e+00, 7.27491872e-36,
        3.51595552e-35, 4.15667391e-30]])