# Stacking Exercise

In [140]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
from sklearn import datasets
from sklearn.model_selection import train_test_split

from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import mean_squared_error

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier

import xgboost as xgb
from xgboost import XGBClassifier
import lightgbm as lgb
from lightgbm import LGBMClassifier
from mlxtend.classifier import StackingClassifier

In [141]:
dataset = datasets.load_breast_cancer()

X = dataset.data
y = dataset.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)
X_train.shape

(398, 30)

### Model 1: k-NN

In [177]:
np.random.seed(42)

knn = KNeighborsClassifier(50)

knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

In [178]:
acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:0.3f}".format(acc))

Accuracy: 0.947


### Model 2: Random Forest

In [179]:
np.random.seed(42)

rf = RandomForestClassifier(n_estimators = 500)

rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

In [180]:
acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:0.3f}".format(acc))

Accuracy: 0.971


### Model 3: XGBoost

In [181]:
np.random.seed(42)

xgb = XGBClassifier(learning_rate = 0.9, max_depth = 5, n_estimators = 500)

xgb.fit(X_train,y_train)
y_pred = xgb.predict(X_test)

In [182]:
acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:0.3f}".format(acc))

Accuracy: 0.971


### Model 4: Light GBM

In [183]:
np.random.seed(42)

lgb = LGBMClassifier()

lgb.fit(X_train,y_train)
y_pred = lgb.predict(X_test)

In [184]:
acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:0.3f}".format(acc))

Accuracy: 0.965


### Model 5: Decision Tree

In [185]:
np.random.seed(42)

dt = DecisionTreeClassifier()

dt.fit(X_train,y_train)
y_pred = dt.predict(X_test)

In [186]:
acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:0.3f}".format(acc))

Accuracy: 0.942


### Stack Model

In [190]:
clf1 = KNeighborsClassifier(50)
clf2 = DecisionTreeClassifier()
clf3 = RandomForestClassifier(n_estimators = 500)

meta = LogisticRegression()

In [191]:
np.random.seed(42)

stack = StackingClassifier(
classifiers=[clf1, clf2, clf3],
meta_classifier = meta,
use_probas = False,
use_features_in_secondary = False)

stack.fit(X_train, y_train)
y_pred = stack.predict(X_test)



In [192]:
acc = accuracy_score(y_test, y_pred)
print("Accuracy: {:0.3f}".format(acc))

Accuracy: 0.971


The stacking model is not any better compared those three models I tested above (KNN, Decision Trees and Random Forest). The accuracy score for stacking model follows the accuracy score of Random Forest.