# 堆疊(Stacking)測試

## 載入相關套件

In [5]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np

## 載入資料集

In [6]:
X, y = datasets.load_breast_cancer(return_X_y=True)

## 資料分割

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

## 模型訓練

In [12]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import StackingClassifier

def get_models():
    models = []
    models.append(('knn', KNeighborsClassifier()))
    models.append(('cart', DecisionTreeClassifier()))
    models.append(('svm', SVC()))
    models.append(('bayes', GaussianNB()))
    return models

estimators = get_models()
model = StackingClassifier(
    estimators=estimators, final_estimator=LogisticRegression()
)

model.fit(X_train,y_train)

## 模型評估

In [13]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(model, X_test,y_test, cv=10)
print(f'平均分數: {np.mean(scores)}, 標準差: {np.std(scores)}')

平均分數: 0.9303030303030303, 標準差: 0.08393720596645175


## 使用迴歸模型

In [37]:
from sklearn.linear_model import RidgeCV
from sklearn.svm import LinearSVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.preprocessing import StandardScaler

X, y = datasets.load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)

estimators = [
    ('lr', RidgeCV()),
    ('svr', LinearSVR(random_state=42))
]

model = StackingRegressor(
    estimators=estimators,
    final_estimator=RandomForestRegressor(n_estimators=10, random_state=42))
model.fit(X_train_std, y_train)
scores = cross_val_score(model, X_test_std, y_test, cv=10)
print(f'平均分數: {np.mean(scores)}, 標準差: {np.std(scores)}')

平均分數: 0.12143159519945441, 標準差: 0.4732757387323812


In [38]:
svc = LinearSVR()
svc.fit(X_train_std, y_train)
scores = cross_val_score(svc, X_test_std, y_test, cv=10)
print(f'平均分數: {np.mean(scores)}, 標準差: {np.std(scores)}')

平均分數: -1.0399780386178537, 標準差: 0.36412901584183494
