# 4.7 建立堆疊的函式

In [7]:
# --- 第 1 部分 ---
# 匯入函式庫
import numpy as np
from sklearn.model_selection import KFold
from copy import deepcopy

In [36]:
# --- 第 2 部分 ---
class StackingClassifier():
    def __init__(self, learners):
        # 接收基學習器、超學習器、以及堆疊中每一層分別有多少學習器
        # 複製學習器
        self.level_sizes = []
        self.learners = []
        for learning_level in learners:
            self.level_sizes.append(len(learning_level))
            level_learners = []
            for learner in learning_level:
                level_learners.append(deepcopy(learner))
            self.learners.append(level_learners)
            
# --- 第 3 部分 ---
    # fit 函式
    # 用第i-1層的基學習器預測值來訓練第i層的基學習器
    def fit(self, x, y):
        # 第1層基學習器的訓練資料即為原始資料
        meta_data = [x]
        meta_targets = [y]
        for i in range(len(self.learners)):
            level_size = self.level_sizes[i]

            # 建立第i層預測值的儲存空間
            data_z = np.zeros((level_size, len(x)))
            target_z = np.zeros(len(x))

            # 取得第i層訓練資料集
            train_x = meta_data[i]
            train_y = meta_targets[i]

            # 建立交叉驗證
            KF = KFold(n_splits=5)
            meta_index = 0
            for train_indices, test_indices in KF.split(x):
                for j in range(len(self.learners[i])):
                    # 使用前K-1折訓練第j個基學習器
                    learner = self.learners[i][j]
                    learner.fit(train_x[train_indices],train_y[train_indices])
                    # 使用第K折驗證第j個基學習器
                    predictions = learner.predict(train_x[test_indices])
                    # 儲存第K折第j個基學習器預測結果
                    data_z[j][meta_index:meta_index+len(test_indices)] = predictions

                target_z[meta_index:meta_index + len(test_indices)] = train_y[test_indices]
                meta_index += len(test_indices)

            # 儲存第i層基學習器的預測結果
            # 作為第i+1層基學習器的訓練資料
            data_z = data_z.transpose()
            meta_data.append(data_z)
            meta_targets.append(target_z)


            # 使用完整的訓練資料來訓練基學習器
            for learner in self.learners[i]:
                learner.fit(train_x, train_y)
                
# --- 第 4 部分 ---
    # predict 函式
    def predict(self, x):

        # 儲存每一層的預測
        meta_data = [x]
        for i in range(len(self.learners)):
            level_size = self.level_sizes[i]

            data_z = np.zeros((level_size, len(x)))

            test_x = meta_data[i]
            
            
            KF = KFold(n_splits=3)
            for train_indices, test_indices in KF.split(x):
                for j in range(len(self.learners[i])):

                    learner = self.learners[i][j]
                    predictions = learner.predict(test_x)
                    data_z[j] = predictions
            

#             for j in range(len(self.learners[i])):
#                 learner = self.learners[i][j]
#                 predictions = learner.predict(test_x)
#                 data_z[j] = predictions

            # 儲存第i層基學習器的預測結果
            # 作為第i+1層基學習器的輸入
            data_z = data_z.transpose()
            meta_data.append(data_z)

        # 傳回預測結果
#         return meta_data
        return meta_data[-1]



    def predict_proba(self, x):

        # 儲存每一層的預測
        meta_data = [x]
        for i in range(len(self.learners)-1):
            level_size = self.level_sizes[i]

            data_z = np.zeros((level_size, len(x)))

            test_x = meta_data[i]

            KF = KFold(n_splits=5)
            for train_indices, test_indices in KF.split(x):
                for j in range(len(self.learners[i])):

                    learner = self.learners[i][j]
                    predictions = learner.predict(test_x)
                    data_z[j] = predictions

            # 儲存第i層基學習器的預測結果
            # 作為第i+1層基學習器的輸入
            data_z = data_z.transpose()
            meta_data.append(data_z)

        # 傳回預測結果
        learner = self.learners[-1][-1]
        return learner.predict_proba(meta_data[-1])

In [38]:
# 載入函式庫與資料集
from sklearn.datasets import load_breast_cancer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from sklearn import metrics
import numpy as np
bc = load_breast_cancer()

train_x, train_y = bc.data[:400], bc.target[:400]
test_x, test_y = bc.data[400:], bc.target[400:]



base_learners = []

knn = KNeighborsClassifier(n_neighbors=2)
base_learners.append(knn)

dtr = DecisionTreeClassifier(max_depth=4, random_state=2)
base_learners.append(dtr)

mlpc = MLPClassifier(hidden_layer_sizes =(100, ), random_state=2)
base_learners.append(mlpc)

meta_learner = LogisticRegression() # 超學習器為邏輯斯迴歸

# 傳入我們建立的函式
sc = StackingClassifier([[knn,dtr,mlpc],[meta_learner]])


# 訓練、預測
sc.fit(train_x, train_y)
meta_data = sc.predict(test_x)

# 衡量基學習器跟集成後效能
base_acc = []
for i in range(len(base_learners)):
    learner = base_learners[i]
#     predictions = meta_data[1][:,i]
    predictions = meta_data
    acc = metrics.accuracy_score(test_y, predictions)
    base_acc.append(acc)

# acc = metrics.accuracy_score(test_y, meta_data[-1])
acc = metrics.accuracy_score(test_y, meta_data)

# 顯示結果
print('Acc  Name')
print('-'*20)
for i in range(len(base_learners)):
    learner = base_learners[i]
    print(f'{base_acc[i]:.2f} {learner.__class__.__name__}')
print(f'{acc:.2f} Ensemble')



ValueError: Classification metrics can't handle a mix of binary and continuous-multioutput targets