# 多數決演算法(VotingClassifier)測試

## 載入相關套件

In [16]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np

## 載入資料集

In [17]:
X, y = datasets.load_breast_cancer(return_X_y=True)

## 資料分割

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

## 特徵縮放

In [30]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)

## 模型訓練

In [31]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.naive_bayes import GaussianNB

estimators = [('svc', SVC()), ('rf', RandomForestClassifier()), ('nb', GaussianNB())]
clf = VotingClassifier(estimators)
clf.fit(X_train_std, y_train)

## 模型評估

In [32]:
# 計算準確率
print(f'{clf.score(X_test_std, y_test)*100:.2f}%')

97.37%


## 個別模型評估

In [33]:
svc = SVC()
svc.fit(X_train_std, y_train)
print(f'{svc.score(X_test_std, y_test)*100:.2f}%') 

98.25%


In [34]:
rf = RandomForestClassifier()
rf.fit(X_train_std, y_train)
print(f'{rf.score(X_test_std, y_test)*100:.2f}%') 

98.25%


In [35]:
nb = GaussianNB()
nb.fit(X_train_std, y_train)
print(f'{nb.score(X_test_std, y_test)*100:.2f}%') 

93.86%


## 模型預測

In [36]:
clf.predict(X_test_std)

array([1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1,
       1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0,
       1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
       1, 0, 1, 1])

## 交叉驗證

In [37]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(estimator=clf,
                         X=X_test_std,
                         y=y_test,
                         cv=10,
                         n_jobs=-1)
print(f'K折分數: %s' % scores)
print(f'平均值: {np.mean(scores):.2f}, 標準差: {np.std(scores):.2f}')

K折分數: [0.91666667 1.         0.91666667 0.91666667 0.90909091 1.
 0.90909091 0.90909091 1.         1.        ]
平均值: 0.95, 標準差: 0.04


In [38]:
scores = cross_val_score(estimator=svc,
                         X=X_test_std,
                         y=y_test,
                         cv=10,
                         n_jobs=-1)
print(f'K折分數: %s' % scores)
print(f'平均值: {np.mean(scores):.2f}, 標準差: {np.std(scores):.2f}')

K折分數: [0.91666667 1.         0.91666667 1.         0.90909091 1.
 0.90909091 0.90909091 1.         1.        ]
平均值: 0.96, 標準差: 0.04


In [39]:
scores = cross_val_score(estimator=rf,
                         X=X_test_std,
                         y=y_test,
                         cv=10,
                         n_jobs=-1)
print(f'K折分數: %s' % scores)
print(f'平均值: {np.mean(scores):.2f}, 標準差: {np.std(scores):.2f}')

K折分數: [0.83333333 0.91666667 0.91666667 0.91666667 1.         1.
 1.         1.         1.         1.        ]
平均值: 0.96, 標準差: 0.06


In [40]:
scores = cross_val_score(estimator=nb,
                         X=X_test_std,
                         y=y_test,
                         cv=10,
                         n_jobs=-1)
print(f'K折分數: %s' % scores)
print(f'平均值: {np.mean(scores):.2f}, 標準差: {np.std(scores):.2f}')

K折分數: [1.         1.         0.91666667 0.91666667 0.90909091 1.
 0.81818182 0.90909091 1.         1.        ]
平均值: 0.95, 標準差: 0.06
