In [5]:
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split

from sklearn.svm import SVC
##
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier

from sklearn.metrics import accuracy_score

In [9]:
X, y= load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y, random_state=0)

In [10]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [18]:
####
svc = SVC(random_state = 0)
knn = KNeighborsClassifier(n_neighbors=5)
rf = RandomForestClassifier(n_estimators=200, max_depth=5, random_state=0)

svc.fit(X_train_scaled, y_train)
knn.fit(X_train_scaled, y_train)
rf.fit(X_train, y_train)

#평가
pred_train_svc = svc.predict(X_train_scaled)
pred_train_knn = knn.predict(X_train_scaled)
pred_train_rf = rf.predict(X_train)

pred_test_svc = svc.predict(X_test_scaled)
pred_test_knn = knn.predict(X_test_scaled)
pred_test_rf = rf.predict(X_test)

In [19]:
print(accuracy_score(y_train, pred_train_svc))
print(accuracy_score(y_train, pred_train_knn))
print(accuracy_score(y_train, pred_train_rf))

0.9929577464788732
0.9788732394366197
0.9976525821596244


In [22]:
print(accuracy_score(y_test, pred_test_svc))
print(accuracy_score(y_test, pred_test_knn))
print(accuracy_score(y_test, pred_test_rf))

0.958041958041958
0.951048951048951
0.9440559440559441


In [23]:
#hard voting
#앙상블
estimators = [
    ('svc', svc),
    ('knn', knn),
    ('rf', rf)
]

In [24]:
###
voting = VotingClassifier(estimators)

In [25]:
voting.fit(X_train_scaled, y_train)

VotingClassifier(estimators=[('svc', SVC(random_state=0)),
                             ('knn', KNeighborsClassifier()),
                             ('rf',
                              RandomForestClassifier(max_depth=5,
                                                     n_estimators=200,
                                                     random_state=0))])

In [28]:
###
pred_train = voting.predict(X_train_scaled)
pred_test = voting.predict(X_test_scaled)

##

print(accuracy_score(y_train, pred_train))
accuracy_score(y_test, pred_test)

0.9929577464788732


0.951048951048951

In [30]:
#
estimators = [
    ###
    ('svc', SVC(random_state=0, probability=True)),
    ('knn', knn),
    ('rf', rf)
]

#
voting = VotingClassifier(estimators, voting='soft')
#
voting.fit(X_train_scaled, y_train)

VotingClassifier(estimators=[('svc', SVC(probability=True, random_state=0)),
                             ('knn', KNeighborsClassifier()),
                             ('rf',
                              RandomForestClassifier(max_depth=5,
                                                     n_estimators=200,
                                                     random_state=0))],
                 voting='soft')

In [32]:
pred_train = voting.predict(X_train_scaled)
pred_test = voting.predict(X_test_scaled)

print(accuracy_score(y_train, pred_train))
accuracy_score(y_test, pred_test)

0.9953051643192489


0.958041958041958

In [39]:
#파이프라인 이용하기
##
from sklearn.pipeline import Pipeline, make_pipeline

order_knn = [
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier(n_neighbors=5))
]

order_svc = [
    ('scaler', StandardScaler()),
    ('svc', SVC(random_state=0, probability=True))
]

#
knn_pl = Pipeline(order_knn)
svc_pl = Pipeline(order_svc)

rf = RandomForestClassifier(n_estimators=500, max_depth=5, random_state=0)

##
estimators=[
    ('knn', knn_pl),
    ('svc', svc_pl),
    ('rf', rf)
]

voting = VotingClassifier(estimators, voting='soft')

In [40]:
voting.fit(X_train, y_train)

VotingClassifier(estimators=[('knn',
                              Pipeline(steps=[('scaler', StandardScaler()),
                                              ('knn',
                                               KNeighborsClassifier())])),
                             ('svc',
                              Pipeline(steps=[('scaler', StandardScaler()),
                                              ('svc',
                                               SVC(probability=True,
                                                   random_state=0))])),
                             ('rf',
                              RandomForestClassifier(max_depth=5,
                                                     n_estimators=500,
                                                     random_state=0))],
                 voting='soft')

In [41]:
###
print(accuracy_score(y_train, voting.predict(X_train)))
print(accuracy_score(y_test, voting.predict(X_test)))


0.9929577464788732
0.958041958041958
