# Regression
***

# Import Packages

In [10]:
# for creating dataset
from sklearn.datasets import load_boston

# general import
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error

# this package
from stack import StackModel, StackMaster

# models
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from xgboost import XGBRegressor

# Create dataset

In [11]:
# load data
boston = load_boston()

# convert pandas dataframe
all_df = pd.DataFrame(boston.data, columns=boston.feature_names)
all_df['PRICE'] = boston.target

# split data for train and test
train = all_df.sample(frac=0.8, random_state=0)
test = all_df.drop(train.index)

# drop target column from test dataframe
test_y = test.PRICE
test = test.drop('PRICE', axis=1)

# Fit stage 1 models

In [None]:
# initialize models
models_1 = [
    StackModel(
        model_name='LinearRegression',
        model=LinearRegression,
        x_names=['CRIM', 'ZN', 'INDUS', 'CHAS'],
        y_names='PRICE'),
    StackModel(
        model_name='Ridge',
        model=Ridge,
        x_names=['NOX', 'RM', 'AGE', 'DIS'],
        y_names='PRICE'),
    StackModel(
        model_name='Lasso',
        model=Lasso,
        x_names=['RAD', 'TAX', 'PTRATIO', 'B'],
        y_names='PRICE'),
    StackModel(
        model_name='RandomForestRegressor',
        model=RandomForestRegressor,
        x_names=boston.feature_names,
        y_names='PRICE',
        params={'random_state': 0, 'n_jobs': -1, 'n_estimators': 100, 'max_depth': 3}),
    StackModel(
        model_name='ExtraTreesRegressor',
        model=ExtraTreesRegressor,
        x_names=boston.feature_names,
        y_names='PRICE',
        params={'random_state': 0, 'n_jobs': -1, 'n_estimators': 100, 'max_depth': 3})
]

# fit models
master = StackMaster(models=models_1)
master.fit(train, refit=True)

2019-09-21 13:02:01,218 - stack - INFO - LinearRegression start fit
2019-09-21 13:02:01,281 - stack - INFO - LinearRegression end fit
2019-09-21 13:02:01,288 - stack - INFO - LinearRegression save fit pkl
2019-09-21 13:02:01,292 - stack - INFO - Ridge start fit
2019-09-21 13:02:01,408 - stack - INFO - Ridge end fit
2019-09-21 13:02:01,438 - stack - INFO - Ridge save fit pkl
2019-09-21 13:02:01,446 - stack - INFO - Lasso start fit
2019-09-21 13:02:01,539 - stack - INFO - Lasso end fit
2019-09-21 13:02:01,543 - stack - INFO - Lasso save fit pkl
2019-09-21 13:02:01,544 - stack - INFO - RandomForestRegressor start fit


In [None]:
# look at the predicted train data
master.train_pred.head()

# Fit Stage 2 model

In [None]:
# initialize model
model_2 = StackModel(
    model_name='XGBRegressor', 
    model=XGBRegressor, 
    x_names=master.train_pred.columns, 
    y_names='PRICE', 
    params={'random_state': 0, 'n_jobs': -1, 'n_estimators': 100, 'max_depth': 3})

# make data for stage 2
train_2 = pd.concat([master.train_pred, train.PRICE], axis=1)

# fit model
model_2.fit(train_2, refit=True)

# Predict test data

In [None]:
master.predict(test, repredict=True)
model_2.predict(master.test_pred, repredict=True)

In [None]:
# MAE for test data
print('Stage 1 : LinearRegression')
print('                    {:.4f}'.format(mean_absolute_error(test_y, master.test_pred.LinearRegression)))
print('Stage 1 : Ridge')
print('                    {:.4f}'.format(mean_absolute_error(test_y, master.test_pred.Ridge)))
print('Stage 1 : Lasso')
print('                    {:.4f}'.format(mean_absolute_error(test_y, master.test_pred.Lasso)))
print('Stage 1 : RandomForestRegressor')
print('                    {:.4f}'.format(mean_absolute_error(test_y, master.test_pred.RandomForestRegressor)))
print('Stage 1 : ExtraTreesRegressor')
print('                    {:.4f}'.format(mean_absolute_error(test_y, master.test_pred.ExtraTreesRegressor)))
print('Stage 2 : XGBRegressor')
print('                    {:.4f}'.format(mean_absolute_error(test_y, model_2.test_pred)))