In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/multi/0426

/content/drive/MyDrive/multi/0426


In [54]:
from IPython.core.display import display, HTML
display(HTML("<style> .container{width:90% !important;}</style>"))

## Ensemble 학습

In [3]:
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()

In [4]:
from sklearn.preprocessing import MinMaxScaler

cancer_scaled = MinMaxScaler().fit_transform(cancer.data)

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    cancer_scaled, cancer.target, stratify=cancer.target, test_size=0.2, random_state=2022
)

### 1. Voting 방식

### 1.1 Hard Voting
- logistic regression
- svm
- K-NN

In [8]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier


In [23]:
lrc = LogisticRegression(random_state=2022)
svc = SVC(random_state=2022, probability=True)
knn = KNeighborsClassifier()

In [24]:
from sklearn.ensemble import VotingClassifier

voc = VotingClassifier(
    estimators=[('LRC',lrc), ('SVC',svc), ('KNN',knn)], voting='hard'
)

In [25]:
voc.fit(X_train, y_train)
voc.score(X_test, y_test)

1.0

In [26]:
# logistic regression 모델 성능
lrc.fit(X_train, y_train)
lrc.score(X_test, y_test)

0.9912280701754386

In [27]:
# svm 모델 성능
svc.fit(X_train, y_train)
svc.score(X_test, y_test)

1.0

In [28]:
# knn 모델 성능
knn.fit(X_train, y_train)
knn.score(X_test, y_test)

0.9824561403508771

### 1.2 Soft Voting

In [None]:
# 객체의 속성과 메소드
dir(lrc)

In [19]:
lrc.predict(X_test[:5])

array([0, 1, 0, 1, 0])

In [20]:
# 각 클래스 확률까지 표시
lrc.predict_proba(X_test[:5])

array([[0.99792166, 0.00207834],
       [0.07775117, 0.92224883],
       [0.9774613 , 0.0225387 ],
       [0.05952966, 0.94047034],
       [0.99554778, 0.00445222]])

- SVM

In [29]:
svc.predict_proba(X_test[:5])

array([[9.99896299e-01, 1.03701492e-04],
       [3.84470713e-03, 9.96155293e-01],
       [9.99896384e-01, 1.03616009e-04],
       [5.97356113e-03, 9.94026439e-01],
       [9.99311796e-01, 6.88204061e-04]])

In [34]:
knn.predict_proba(X_test[-5:])

array([[0.8, 0.2],
       [1. , 0. ],
       [0.8, 0.2],
       [0. , 1. ],
       [0. , 1. ]])

### Soft Voting

In [35]:
voc2 = VotingClassifier(
    estimators=[('LRC',lrc), ('SVC',svc), ('KNN', knn)], voting='soft'
)

In [37]:
voc2.fit(X_train, y_train)
voc2.score(X_test, y_test)

1.0

In [38]:
voc2.predict_proba(X_test[:5])

array([[9.99272654e-01, 7.27346212e-04],
       [2.71986265e-02, 9.72801374e-01],
       [9.92452563e-01, 7.54743719e-03],
       [2.18344062e-02, 9.78165594e-01],
       [9.98286525e-01, 1.71347474e-03]])

- GridSearchCV

In [43]:
lrc.C, svc.C

(1.0, 1.0)

In [47]:
params = {
    'LRC__C': [5,10,30],
    'SVC__C': [0.05,0.1,0.3]
}

In [48]:
from sklearn.model_selection import GridSearchCV

grid_voc = GridSearchCV(voc2, params, scoring='accuracy', cv=5)
grid_voc.fit(X_train, y_train)
grid_voc.best_params_

{'LRC__C': 10, 'SVC__C': 0.05}

In [49]:
best_voc = grid_voc.best_estimator_
best_voc.score(X_test, y_test)

1.0

### 2. bagging 방식
- Random Forest

In [50]:
from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(random_state=2022)
rfc.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 2022,
 'verbose': 0,
 'warm_start': False}

In [51]:
rfc.fit(X_train, y_train)
rfc.score()

1.0