## Ensemble method

In [34]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

from sklearn.datasets import make_moons

In [39]:
X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

log_clf = LogisticRegression(random_state = 42)
rnd_clf = RandomForestClassifier(random_state = 42)
svm_clf = SVC(random_state = 42, probability = True)

voting_clf = VotingClassifier([('lr', log_clf), ('rnd', rnd_clf), ('svm', svm_clf)], voting = 'soft')

In [40]:
log_clf.fit(X_train, y_train)
rnd_clf.fit(X_train, y_train)
svm_clf.fit(X_train, y_train)
voting_clf.fit(X_train, y_train)
print('log :', log_clf.score(X_test, y_test))
print('rnd :', rnd_clf.score(X_test, y_test))
print('svm :', svm_clf.score(X_test, y_test))
print('voting :', voting_clf.score(X_test, y_test))

log : 0.864
rnd : 0.872
svm : 0.888
voting : 0.912


## Bagging and Pasting 
- Bagging : Replacement (복원 추출)
- Pasting : Non-replacement (비복원 추출)

In [49]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

#100개 Random 비복원추출하는 방식으로 500번에 대해 각각 Training 시킨 후 Aggregate 진행한다. 
#Soft Voting(Default) | Hard Voting
bag_clf = BaggingClassifier(
            DecisionTreeClassifier(), n_estimators = 500, 
            max_samples = 100, bootstrap = True, n_jobs = -1, oob_score = True)

bag_clf.fit(X_train, y_train)
print(bag_clf.score(X_test, y_test))

tree_clf = DecisionTreeClassifier()
tree_clf.fit(X_train, y_train)
print(tree_clf.score(X_test, y_test))

0.92
0.84


In [50]:
bag_clf.oob_score_

0.92266666666666663

## Random Forest
-. Decision tree 방법으로, Bagging 을 사용해서 복원 추출 Sampling으로 진행한 다음에 각각에 대해 Training, 
그리고 나온 결과들로 soft voting 진행 