In [1]:
# 나이브 베이즈 분류(Naive Bayes Classification) :
# - 베이즈 기법을 이용한 확률적 기계 학습 알고리즘
# - 텍스트 분류, 감성 분석, 추천 시스템, 스팸 메일 필터 등에 활용
# - 모든 사건이 독립

In [2]:
# sklearn.naive_bayes.BernouliNB()
# - alpha : 평활 모수 (default = 1.0)
# 베르누이 : 이진 분류
# 멀티 : 다지 분류
# 가우시안 : 회귀

In [3]:
from sklearn.naive_bayes import BernoulliNB
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

In [4]:
# 이진분할
breast_cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(breast_cancer.data,breast_cancer.target,test_size = 0.2, random_state = 1234, stratify = breast_cancer.target)

In [5]:
nb_B = BernoulliNB()
model_nb_B = nb_B.fit(X_train,y_train)

In [6]:
from sklearn.metrics import roc_curve,auc
y_pred = model_nb_B.predict_proba(X_test)[:,1]

In [7]:
fpr,tpr,thresholds = roc_curve(y_test,y_pred)
AUC = auc(fpr,tpr)
print(AUC)

0.5208333333333334


In [8]:
# 다지 분류
# sklearn.naive_bayes.MultinomialNB()
# alpha : 평활 모수(default = 1.0)

In [9]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

In [10]:
iris = load_iris()
X_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size = 0.2, random_state = 1234, stratify = iris.target)

In [11]:
nb_M = MultinomialNB()
model_nb_M = nb_M.fit(X_train,y_train)

In [12]:
from sklearn.metrics import f1_score
y_pred = model_nb_M.predict(X_test)

In [13]:
macro_f1 = f1_score(y_test,y_pred,average = 'macro')
print(macro_f1)

0.9326599326599326


In [14]:
# 나이브 베이즈 회귀(Naive Bayes Regression)
# sklearn.naive_bayes.GaussianNB()
# var_smoothing : 분산에 추가된 모든 형상의 가장 큰 분산 부분(default = 1e-9)

In [15]:
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes

In [16]:
diabetes = load_diabetes()
X_train,X_test,y_train,y_test = train_test_split(diabetes.data,diabetes.target,test_size = 0.2, random_state = 1234)

In [17]:
nb_G = GaussianNB()
model_nb_G = nb_G.fit(X_train,y_train)

In [18]:
from sklearn.metrics import mean_absolute_error
y_pred = model_nb_G.predict(X_test)

In [19]:
mae = mean_absolute_error(y_test,y_pred)
print(mae)

53.91011235955056
