# Regression
***

# Import Packages

In [1]:
# for creating dataset
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# general import
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score

# this package
from stack import StackModel, StackMaster

# models
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from xgboost import XGBClassifier

# Create dataset

In [2]:
iris = load_iris()
X, y = iris.data, iris.target

# Make train/test split
# As usual in machine learning task we have X_train, y_train, and X_test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# convert pandas dataframe or series
X_train = pd.DataFrame(X_train, columns=iris.feature_names)
X_test = pd.DataFrame(X_test, columns=iris.feature_names)
y_train = pd.Series(y_train, name='species')
y_test = pd.Series(y_test, name='species')

# Fit stage 1 models

In [3]:
# initialize models
models_1 = [
    StackModel(
        model_name='LinearDiscriminantAnalysis',
        model=LinearDiscriminantAnalysis,
        x_names=['sepal length (cm)', 'sepal width (cm)'], 
        merge_method='mode'),
    StackModel(
        model_name='GaussianNB',
        model=GaussianNB,
        x_names=['sepal length (cm)', 'sepal width (cm)'], 
        merge_method='mode'),
    StackModel(
        model_name='SVC',
        model=SVC, 
        x_names=['petal length (cm)'], 
        merge_method='mode'),
    StackModel(
        model_name='RandomForestClassifier',
        model=RandomForestClassifier,
        params={'random_state': 0, 'n_jobs': -1, 'n_estimators': 100, 'max_depth': 3}, 
        merge_method='mode'),
    StackModel(
        model_name='ExtraTreesClassifier',
        model=ExtraTreesClassifier,
        params={'random_state': 0, 'n_jobs': -1, 'n_estimators': 100, 'max_depth': 3}, 
        merge_method='mode')
]

# fit models
master = StackMaster(models=models_1)
master.fit(X_train, y_train, refit=True)

2019-09-21 16:07:54,577 - stack - INFO - LinearDiscriminantAnalysis start fit
2019-09-21 16:07:54,645 - stack - INFO - LinearDiscriminantAnalysis end fit
2019-09-21 16:07:54,650 - stack - INFO - LinearDiscriminantAnalysis save fit pkl
2019-09-21 16:07:54,651 - stack - INFO - GaussianNB start fit
2019-09-21 16:07:54,761 - stack - INFO - GaussianNB end fit
2019-09-21 16:07:54,764 - stack - INFO - GaussianNB save fit pkl
2019-09-21 16:07:54,765 - stack - INFO - SVC start fit
2019-09-21 16:07:54,859 - stack - INFO - SVC end fit
2019-09-21 16:07:54,866 - stack - INFO - SVC save fit pkl
2019-09-21 16:07:54,866 - stack - INFO - RandomForestClassifier start fit
2019-09-21 16:07:56,403 - stack - INFO - RandomForestClassifier end fit
2019-09-21 16:07:56,441 - stack - INFO - RandomForestClassifier save fit pkl
2019-09-21 16:07:56,443 - stack - INFO - ExtraTreesClassifier start fit
2019-09-21 16:07:57,827 - stack - INFO - ExtraTreesClassifier end fit
2019-09-21 16:07:57,861 - stack - INFO - ExtraT

In [4]:
# look at the predicted train data
master.train_pred.head()

Unnamed: 0,LinearDiscriminantAnalysis,GaussianNB,SVC,RandomForestClassifier,ExtraTreesClassifier
0,2,2,2,2,2
1,1,0,1,1,1
2,0,0,0,0,0
3,1,1,1,2,2
4,2,2,2,2,2


# Fit Stage 2 model

In [5]:
# initialize model
model_2 = StackModel(
    model_name='XGBClassifier', 
    model=XGBClassifier, 
    params={'random_state': 0, 'n_jobs': -1, 'n_estimators': 100, 'max_depth': 3}, 
    merge_method='mode')

# fit model
model_2.fit(master.train_pred, y_train, refit=True)

2019-09-21 16:07:58,068 - stack - INFO - XGBClassifier start fit
2019-09-21 16:07:58,336 - stack - INFO - XGBClassifier end fit
2019-09-21 16:07:58,341 - stack - INFO - XGBClassifier save fit pkl


# Predict test data

In [6]:
master.predict(X_test, repredict=True)
model_2.predict(master.test_pred, repredict=True)

2019-09-21 16:07:58,354 - stack - INFO - LinearDiscriminantAnalysis start predict
2019-09-21 16:07:58,399 - stack - INFO - LinearDiscriminantAnalysis end predict
2019-09-21 16:07:58,402 - stack - INFO - LinearDiscriminantAnalysis save pred pkl
2019-09-21 16:07:58,404 - stack - INFO - GaussianNB start predict
2019-09-21 16:07:58,466 - stack - INFO - GaussianNB end predict
2019-09-21 16:07:58,482 - stack - INFO - GaussianNB save pred pkl
2019-09-21 16:07:58,488 - stack - INFO - SVC start predict
2019-09-21 16:07:58,548 - stack - INFO - SVC end predict
2019-09-21 16:07:58,554 - stack - INFO - SVC save pred pkl
2019-09-21 16:07:58,556 - stack - INFO - RandomForestClassifier start predict
2019-09-21 16:07:59,131 - stack - INFO - RandomForestClassifier end predict
2019-09-21 16:07:59,133 - stack - INFO - RandomForestClassifier save pred pkl
2019-09-21 16:07:59,134 - stack - INFO - ExtraTreesClassifier start predict
2019-09-21 16:07:59,671 - stack - INFO - ExtraTreesClassifier end predict
201

In [7]:
# MAE for test data
print('Stage 1 : LinearDiscriminantAnalysis')
print('                    {:.4f}'.format(accuracy_score(y_test, master.test_pred.LinearDiscriminantAnalysis)))
print('Stage 1 : GaussianNB')
print('                    {:.4f}'.format(accuracy_score(y_test, master.test_pred.GaussianNB)))
print('Stage 1 : SVC')
print('                    {:.4f}'.format(accuracy_score(y_test, master.test_pred.SVC)))
print('Stage 1 : RandomForestClassifier')
print('                    {:.4f}'.format(accuracy_score(y_test, master.test_pred.RandomForestClassifier)))
print('Stage 1 : ExtraTreesClassifier')
print('                    {:.4f}'.format(accuracy_score(y_test, master.test_pred.ExtraTreesClassifier)))
print('Stage 2 : XGBClassifier')
print('                    {:.4f}'.format(accuracy_score(y_test, model_2.test_pred)))

Stage 1 : LinearDiscriminantAnalysis
                    0.7333
Stage 1 : GaussianNB
                    0.7333
Stage 1 : SVC
                    0.9667
Stage 1 : RandomForestClassifier
                    0.9667
Stage 1 : ExtraTreesClassifier
                    0.9667
Stage 2 : XGBClassifier
                    1.0000
