## 앙상블 학습

In [6]:
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()

In [10]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(cancer.data)

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    scaled_data, cancer.target, test_size=0.2, random_state=2021
)

### 앙상블 학습을 위한 분류기

- 로지스틱 회구
- 서포트 벡터 머신
- K 최근접 이웃

In [18]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

lr = LogisticRegression(random_state=2021)
svc = SVC(random_state=2021)
knn = KNeighborsClassifier()

In [22]:
# 앙상블 학습을 위한 보팅 분류기
from sklearn.ensemble import VotingClassifier

vo_clf = VotingClassifier(
    estimators=[('LR', lr), ('SVC', svc), ('KNN', knn)],
    voting='hard'
)

In [26]:
vo_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('LR', LogisticRegression(random_state=2021)),
                             ('SVC', SVC(random_state=2021)),
                             ('KNN', KNeighborsClassifier())])

In [30]:
pred = vo_clf.predict(X_test)

In [34]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, pred)

0.9824561403508771

### 개별 모델의 학습/예측/평가

In [38]:
for classifier in [lr, svc, knn]:
    classifier.fit(X_train, y_train)
    pred = classifier.predict(X_test)
    acc = accuracy_score(y_test, pred)
    class_name = classifier.__class__.__name__
    print(f'{class_name} : {acc:.4f}')

LogisticRegression : 0.9561
SVC : 0.9912
KNeighborsClassifier : 0.9825


### 소프트 보팅

In [39]:
# probability=True 인 경우에만 predict_proba() 메소드를 이용할 수 있음
svc = SVC(probability=True, random_state=2021)

In [40]:
vo_clf = VotingClassifier(
    estimators=[('LR', lr), ('SVC', svc), ('KNN', knn)],
    voting='soft'
)

In [41]:
vo_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('LR', LogisticRegression(random_state=2021)),
                             ('SVC', SVC(probability=True, random_state=2021)),
                             ('KNN', KNeighborsClassifier())],
                 voting='soft')

In [42]:
pred = vo_clf.predict(X_test)

In [43]:
accuracy_score(y_test, pred)

0.9824561403508771

### 랜덤 포레스트

In [45]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(random_state=2021)
rfc.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 2021,
 'verbose': 0,
 'warm_start': False}

In [46]:
rfc.fit(X_train, y_train)
pred = rfc.predict(X_test)
accuracy_score(y_test, pred)

0.9649122807017544

In [47]:
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier(random_state=2021)
dtc.fit(X_train, y_train)
pred = dtc.predict(X_test)
accuracy_score(y_test, pred)

0.9122807017543859

### K-Nearest Neighbor(KNN)

In [49]:
knn.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}