# 3.4 加權軟投票(分類)

## 使用驗證資料來找到各個基學習器的最佳權重
### 先訓練基學習器，接著使用基學習器輸出驗證資料的預測機率，接下來找一組最佳的權重組合，可以讓加權平均後的預測值，在驗證資料集上得到最高的分數。

In [2]:
# --- 第 1 部分 ---
# 載入函式庫
from sklearn import datasets, naive_bayes, svm, neighbors
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score
import numpy as np
# 載入資料集
breast_cancer = datasets.load_breast_cancer()
x, y = breast_cancer.data, breast_cancer.target

# 把資料分為訓練資料集與驗證資料集
test_samples = 100
x_train, y_train = x[:-test_samples], y[:-test_samples]
x_test, y_test = x[-test_samples:], y[-test_samples:]


In [3]:
# --- 第 2 部分 ---
# 初始化基學習器
learner_1 = neighbors.KNeighborsClassifier(n_neighbors=5)
learner_2 = naive_bayes.GaussianNB()
learner_3 = svm.SVC(gamma=0.001, probability=True)

# 訓練基學習器
learner_1.fit(x_train, y_train)
learner_2.fit(x_train, y_train)
learner_3.fit(x_train, y_train)

# 取得基學習器的預測
prob_1 = learner_1.predict_proba(x_test)
prob_2 = learner_2.predict_proba(x_test)
prob_3 = learner_3.predict_proba(x_test)

In [4]:
best = 0
best_weight = np.zeros(3)
space = np.linspace(start = 0, stop = 1, num = 100) # Numpy 中的 linspace 函數可以在一定範圍內來均勻地撒點，在0到1之間產生100個點
for weight_1 in space:
    for weight_2 in space:
        if((weight_1 + weight_2) <= 1):
            prob_avg = weight_1 * prob_1 + weight_2 * prob_2 + (1 - weight_1 - weight_2) * prob_3
            pred = [np.argmax(row) for row in prob_avg]
            score = accuracy_score(y_test, pred)
            if(score > best):
                best = score
                best_weight[0] = weight_1
                best_weight[1] = weight_2
                best_weight[2] = 1 - weight_1 - weight_2
            
print('Weight:', best_weight)
print('Weighted Soft Voting:', best)


Weight: [0.18181818 0.50505051 0.31313131]
Weighted Soft Voting: 0.97
