- 앙상블 학습

In [1]:
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()

In [2]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
cancer_scaled = scaler.fit_transform(cancer.data)

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    cancer_scaled, cancer.target, stratify=cancer.target,
    test_size=0.2, random_state=2021
)

### 앙상블 학습을 위한 분류기
- 로지스틱 회귀
- 서포트 벡터 머신
- k 최근접 이웃

In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

In [5]:
lr = LogisticRegression()
svc = SVC()
knn = KNeighborsClassifier()

In [6]:
from sklearn.ensemble import VotingClassifier

vo_clf = VotingClassifier(
    estimators=[('LR',lr),('SVC',svc),('KNN',knn)],
    voting='hard'
)

In [7]:
vo_clf.fit(X_train, y_train)
pred = vo_clf.predict(X_test)

In [8]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, pred)

0.9824561403508771

In [9]:
# 개별 모델의 학습/예측/평가
for classifier in [lr,svc, knn]:
    classifier.fit(X_train, y_train)
    pred = classifier.predict(X_test)
    acc = accuracy_score(y_test, pred)
    classifier_name = classifier.__class__.__name__
    print(f'{classifier_name} 정확도: {acc:.4f}')

LogisticRegression 정확도: 0.9825
SVC 정확도: 0.9825
KNeighborsClassifier 정확도: 0.9825


### 소프트 보팅 
- 로지스틱 회귀
- k최근접 이웃

In [10]:
vo_clf = VotingClassifier(
    estimators=[('LR',lr),('KNN',knn)],
    voting='soft'
)
vo_clf.fit(X_train, y_train)
pred = vo_clf.predict(X_test)
accuracy_score(y_test, pred)

0.9824561403508771

In [11]:
vo_clf.predict_proba(X_test)

array([[3.81741108e-01, 6.18258892e-01],
       [9.85814714e-01, 1.41852858e-02],
       [1.93478251e-01, 8.06521749e-01],
       [1.06673095e-02, 9.89332690e-01],
       [2.74195491e-02, 9.72580451e-01],
       [9.30322916e-01, 6.96770844e-02],
       [3.39990385e-03, 9.96600096e-01],
       [9.89140696e-01, 1.08593044e-02],
       [5.37170015e-02, 9.46282999e-01],
       [9.15335816e-01, 8.46641840e-02],
       [9.94520071e-01, 5.47992883e-03],
       [1.99503549e-02, 9.80049645e-01],
       [1.16612555e-02, 9.88338744e-01],
       [9.92385249e-01, 7.61475147e-03],
       [3.02133809e-03, 9.96978662e-01],
       [2.03397821e-02, 9.79660218e-01],
       [2.95441394e-03, 9.97045586e-01],
       [4.37869338e-01, 5.62130662e-01],
       [3.66609434e-02, 9.63339057e-01],
       [1.06760919e-02, 9.89323908e-01],
       [3.31585268e-02, 9.66841473e-01],
       [7.02909623e-01, 2.97090377e-01],
       [6.26316282e-02, 9.37368372e-01],
       [2.50973093e-01, 7.49026907e-01],
       [1.445278