In [1]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC()

voting_clf = VotingClassifier(
  estimators=[('lr',log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
  voting='hard'
)

In [3]:
from sklearn.metrics import accuracy_score
def show_pred():
  for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test,y_pred))
show_pred()

LogisticRegression 0.864
RandomForestClassifier 0.872
SVC 0.888
VotingClassifier 0.888




#### 使用軟投票法

硬: 每個分類器選出一類，取最多的那類作為預測結果

軟：每個分類器都對每一類做出可能預測(predict_prob)，再加總所有預測值算出最高的類

In [4]:
svm_clf = SVC(probability=True)
voting_clf = VotingClassifier(
  estimators=[('lr',log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
  voting='soft'
)
show_pred()

LogisticRegression 0.864
RandomForestClassifier 0.864
SVC 0.888
VotingClassifier 0.912




### bagging, pasting

前面展示獲得不同種分類器的方法之一是使用不同訓練算法(模型)

還有另一種方法是用相同算法，但在不同的訓練集隨機子集上訓練。

In [5]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=100,
    bootstrap=True,
    n_jobs=-1
)

In [6]:
bag_clf.fit(X_train,y_train)
y_pred = bag_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.92

#### 使用包外評估

In [7]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(),
    n_estimators=500,
    bootstrap=True,
    n_jobs=-1,
    oob_score=True # 包外評估
)
bag_clf.fit(X_train,y_train)
print("oob_score: ",bag_clf.oob_score_)
y_pred = bag_clf.predict(X_test)
print("accuracy: ",accuracy_score(y_test, y_pred))

oob_score:  0.8933333333333333
accuracy:  0.904
