In [112]:
import pandas as pd
import pandas as pd
import numpy as np
import seaborn as sns
from scipy import stats
from matplotlib import pyplot as plt

from sklearn import linear_model
from sklearn.ensemble import GradientBoostingRegressor as gbr, RandomForestRegressor as rfr, ExtraTreesRegressor as etr
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.preprocessing import RobustScaler

from sklearn.pipeline import make_pipeline
import xgboost as xgb
import lightgbm as lgb 

%matplotlib inline

In [98]:
#loading train
df_train1=pd.read_csv('./train.csv')
#loading test
df_test1=pd.read_csv('./test.csv')

#loading final
df_train=pd.read_csv('./final_train.csv')
#loading test
df_test=pd.read_csv('./final_test.csv')
df_id=pd.read_csv('./test_id.csv', index_col=0, header=None)
df_id.columns=['Id']


In [100]:
##Setting index as ID columns
df_test.head()
df_test.set_index(df_id['Id'], inplace = True)
#df_train.set_index(df_train1['Id'], inplace = True)

In [99]:
##Setting y and x to begin running models
Y_vals = pd.DataFrame(df_train['SalePrice']).copy()
Y_vals = np.ravel(Y_vals)
df_test.drop(['Unnamed: 0', 'SalePrice'],inplace=True, axis =1)
df_train.drop(['Unnamed: 0', 'SalePrice'],inplace=True, axis =1)
X_train=df_train.copy()
X_test=df_test.copy()


# Lasso

In [113]:
lasso_model = make_pipeline(RobustScaler(), linear_model.LassoCV(random_state=0,n_alphas=1000, normalize=True, eps = 0.0001, max_iter=10000))
mod = lasso_model.fit(X_train, Y_vals)
mod.score(X_train, Y_vals)
##score=0.93082405005697799
#lasso_model.coef_

0.93076590337025844

In [102]:
##Grid search for optimal lasso parameters
grid_para_lasso = [{
    "n_alphas": range(100,1000, 100),
    "random_state": [0],
    "eps" : [0.0001],
    "normalize" : [True]
}]
grid_search_lasso = GridSearchCV(lasso_model, grid_para_lasso, cv=5, n_jobs=-1, verbose=1)
%time grid_search_lasso.fit(X_train, Y_vals)

Fitting 5 folds for each of 9 candidates, totalling 45 fits


KeyboardInterrupt: 

In [None]:
print(grid_search_lasso.best_params_)
print(grid_search_lasso.best_score_)


# Elastic Net

In [129]:
elastic_model = make_pipeline(RobustScaler(), linear_model.ElasticNetCV(l1_ratio=0.96326530612244898,n_alphas=1000, normalize=True,eps = 0.0001,max_iter=10000))
elastic_model = elastic_model.fit(X_train, Y_vals)
#elastic =linear_model.ElasticNetCV(l1_ratio=0.41224489795918373,n_alphas=1000, normalize=True,eps = 0.0001,max_iter=10000).fit(X_train,Y_vals)
mse = mean_squared_error(Y_vals, elastic_model.predict(X_train))
print(elastic_model.score(X_train, np.ravel(Y_vals)))
print(np.sqrt(mse))
#1-0.112947265448


0.930705340648
0.105184513626


In [127]:
##Grid search for optimal elastic parameters
grid_para_elastic = [{
    "n_alphas": [1000],
    #"alphas": np.linspace(0,20,num=5),
    "l1_ratio": np.linspace(0.1,1),
    "normalize" : [True]
}]
grid_search_elastic = GridSearchCV(elastic, grid_para_elastic,scoring ="neg_mean_squared_error", cv=5, n_jobs=-1, verbose=1)
%time grid_search_elastic.fit(X_train, Y_vals)

Fitting 5 folds for each of 50 candidates, totalling 250 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   17.7s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 250 out of 250 | elapsed:  2.4min finished


CPU times: user 8.9 s, sys: 285 ms, total: 9.19 s
Wall time: 2min 23s


GridSearchCV(cv=5, error_score='raise',
       estimator=ElasticNetCV(alphas=None, copy_X=True, cv=None, eps=0.0001,
       fit_intercept=True, l1_ratio=0.41224489795918373, max_iter=10000,
       n_alphas=1000, n_jobs=1, normalize=True, positive=False,
       precompute='auto', random_state=None, selection='cyclic',
       tol=0.0001, verbose=0),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid=[{'n_alphas': [1000], 'l1_ratio': array([ 0.1    ,  0.11837,  0.13673,  0.1551 ,  0.17347,  0.19184,
        0.2102 ,  0.22857,  0.24694,  0.26531,  0.28367,  0.30204,
        0.32041,  0.33878,  0.35714,  0.37551,  0.39388,  0.41224,
        0.43061,  0.44898,  0.46735,  0.48571,  0.5040... 0.8898 ,  0.90816,  0.92653,  0.9449 ,  0.96327,
        0.98163,  1.     ]), 'normalize': [True]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='neg_mean_squared_error', verbose=1)

In [128]:
print(grid_search_elastic.best_params_)
print(grid_search_elastic.best_score_)

{'l1_ratio': 0.96326530612244898, 'n_alphas': 1000, 'normalize': True}
-0.0130335765055


# Gradient Boosted Regressor

In [131]:
gbr_model = gbr(random_state=0, learning_rate = 0.005, max_features='sqrt',
        min_samples_leaf=15, min_samples_split=10, 
        n_estimators=3000, max_depth=3, ).fit(X_train, Y_vals)

print(gbr_model.score(X_train, Y_vals))


0.951016506727


# Random Forest Regressor

In [55]:
rfr_model = rfr(random_state=0, min_samples_leaf = 2, min_samples_split =2,
        n_estimators=1000, max_depth=9,  max_features='sqrt', oob_score=True).fit(X_train, np.ravel(Y_vals))
print(rfr_model.score(X_train, Y_vals))
print(rfr_model.oob_score_)

0.939921604694
0.875588318315


In [450]:
##Grid search for optimal random tree parameters
from sklearn.model_selection import GridSearchCV
grid_para_rfr = [{
    "n_estimators": [1000],
    "max_depth": range(8,11),
    "min_samples_leaf": range(2, 5),
    "min_samples_split": np.linspace(start=2, stop=30, num=5, dtype=int),
    "random_state": [0]
}]
grid_search_rfr = GridSearchCV(rfr_model, grid_para_rfr, cv=5, n_jobs=-1, verbose=1)
%time grid_search_rfr.fit(X_train, np.ravel(Y_vals))

Fitting 5 folds for each of 45 candidates, totalling 225 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   20.7s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 225 out of 225 | elapsed:  2.0min finished


CPU times: user 4.47 s, sys: 177 ms, total: 4.65 s
Wall time: 2min 3s


GridSearchCV(cv=5, error_score='raise',
       estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=9,
           max_features='sqrt', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=1000, n_jobs=1,
           oob_score=True, random_state=0, verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid=[{'n_estimators': [1000], 'max_depth': range(8, 11), 'min_samples_leaf': range(2, 5), 'min_samples_split': array([ 2,  9, 16, 23, 30]), 'random_state': [0]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=1)

In [452]:
print(grid_search_rfr.best_params_)
print(grid_search_rfr.best_score_)
#0.88759734575697047
#{'max_depth': 9,
# 'min_samples_leaf': 1,
# 'min_samples_split': 2,
# 'n_estimators': 1000,
# 'random_state': 0}

{'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 1000, 'random_state': 0}
0.854867286087


# Extra Random Trees

In [56]:
etr_model = etr(random_state=0,min_samples_split=2,
        n_estimators=1000, max_depth=9, min_samples_leaf= 1, max_features='sqrt', bootstrap=True,oob_score=True).fit(X_train,Y_vals)
print(etr_model.score(X_train, Y_vals))
print(etr_model.oob_score_)

0.9100583558
0.833867651717


In [449]:
##Grid search for extra random tree parameters
from sklearn.model_selection import GridSearchCV
grid_para_etr = [{
    "n_estimators": [1000],
    "max_depth": range(1,10),
    "min_samples_leaf": range(1, 5),
    "min_samples_split": np.linspace(start=2, stop=30, num=5, dtype=int),
    "random_state": [0]
}]
grid_search_etr = GridSearchCV(etr_model, grid_para_etr, cv=5, n_jobs=-1, verbose=1)
%time grid_search_etr.fit(X_train, np.ravel(Y_vals))

Fitting 5 folds for each of 180 candidates, totalling 900 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   14.5s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:  2.5min
[Parallel(n_jobs=-1)]: Done 784 tasks      | elapsed:  4.9min
[Parallel(n_jobs=-1)]: Done 900 out of 900 | elapsed:  5.8min finished


CPU times: user 9.43 s, sys: 328 ms, total: 9.76 s
Wall time: 5min 47s


GridSearchCV(cv=5, error_score='raise',
       estimator=ExtraTreesRegressor(bootstrap=True, criterion='mse', max_depth=9,
          max_features='sqrt', max_leaf_nodes=None,
          min_impurity_decrease=0.0, min_impurity_split=None,
          min_samples_leaf=1, min_samples_split=2,
          min_weight_fraction_leaf=0.0, n_estimators=1000, n_jobs=1,
          oob_score=True, random_state=0, verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid=[{'n_estimators': [1000], 'max_depth': range(1, 10), 'min_samples_leaf': range(1, 5), 'min_samples_split': array([ 2,  9, 16, 23, 30]), 'random_state': [0]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=1)

In [455]:
print(grid_search_etr.best_params_)
print(grid_search_etr.best_score_)
#{'max_depth': 9, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 1000, 'random_state': 0}
#0.81466266301

{'max_depth': 9, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 1000, 'random_state': 0}
0.809695308898


# XGB

In [61]:
xgb_model = xgb.XGBRegressor(
                 colsample_bytree=1,
                 gamma=0.0,
                 learning_rate=0.05,
                 max_depth=20,
                 min_child_weight=0.1,
                 n_estimators=1000,                                                                  
                 reg_alpha=0,
                 reg_lambda=1,
                 subsample=1,
                 seed=1,
                 silent=False)

xgb_model.fit(X_train, Y_vals)

XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=1,
       gamma=0.0, learning_rate=0.05, max_delta_step=0, max_depth=20,
       min_child_weight=0.1, missing=None, n_estimators=1000, nthread=-1,
       objective='reg:linear', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=1, silent=False, subsample=1)

In [159]:
#Grid Search for XGB -- Optimized via Ilyas Bayesion Optimizer instead
grid_para_xgb = [{
    'max_depth': [3,4,5,6,7],
    'learning_rate': [.001, .01, .1],
    'n_estimators': [200],
    'gamma': [0.1],
    'reg_alpha': [.25, .5, .75],
    'reg_lambda': [.25, .5, .75],
    'subsample': [.3 ,.5,.7,.9],
    'colsample_bytree': [.4,.5,.6],
    'min_child_weight': [.3, .6, .9]
}]
grid_search_xgb = GridSearchCV(xgb_model, grid_para_xgb, cv=5, n_jobs=-1, verbose=1, scoring='neg_mean_squared_error')
%time grid_search_xgb.fit(X_train, Y_vals)

Fitting 5 folds for each of 4860 candidates, totalling 24300 fits


KeyboardInterrupt: 

In [62]:
xgb_model.score(X_train, Y_vals)

0.9999978652226702

# LGBM

In [65]:
##Should tune this algorithm, but not sure what any of the parameters do

lgb_model = lgb.LGBMRegressor(objective='regression',num_leaves=5,
                              learning_rate=0.05, n_estimators=720,
                              max_bin = 55, bagging_fraction = 0.8,
                              bagging_freq = 5, feature_fraction = 0.2319,
                              feature_fraction_seed=9, bagging_seed=9,
                              min_data_in_leaf =6, min_sum_hessian_in_leaf = 11)
lgb_model.fit(X_train,Y_vals)


LGBMRegressor(bagging_fraction=0.8, bagging_freq=5, bagging_seed=9,
       boosting_type='gbdt', colsample_bytree=1.0, feature_fraction=0.2319,
       feature_fraction_seed=9, learning_rate=0.05, max_bin=55,
       max_depth=-1, min_child_samples=10, min_child_weight=5,
       min_data_in_leaf=6, min_split_gain=0.0, min_sum_hessian_in_leaf=11,
       n_estimators=720, n_jobs=-1, num_leaves=5, objective='regression',
       random_state=0, reg_alpha=0.0, reg_lambda=0.0, silent=True,
       subsample=1.0, subsample_for_bin=50000, subsample_freq=1)

In [66]:
lgb_model.score(X_train, Y_vals)

0.96699903921654085

# Results

In [69]:
n_folds = 4

scorer = make_scorer(mean_squared_error,greater_is_better = False)
def rmse_CV_train(model):
    kf = KFold(n_folds,shuffle=True,random_state=42).get_n_splits(X_train.values)
    rmse = np.sqrt(-cross_val_score(model,X_train,Y_vals,scoring ="neg_mean_squared_error",cv=kf, n_jobs=-1, verbose=1))
    return (rmse)
def rmsle(y, y_pred):
    return np.sqrt(mean_squared_error(np.log(y), np.log(y_pred)))



In [157]:
models = [lasso_model, elastic_model, gbr_model, xgb_model, rfr_model, etr_model, lgb_model]
for i in models:
    score = rmsle(Y_vals, i.predict(X_train))
    print('rmse for '+str(i)[0:7]+ ": ",score)
print(rmsle(Y_vals, stacked_averaged_models.predict(X_train.values)))
   

rmse for Pipelin:  0.00890286178342
rmse for Pipelin:  0.0089063996142
rmse for Gradien:  0.00749615904899
rmse for XGBRegr:  4.85620431737e-05
rmse for RandomF:  0.00828870865222
rmse for ExtraTr:  0.0100412636367
rmse for LGBMReg:  0.00612980743951
0.00791398361066


# RMSE Score on each model

sklearn.pipeline.Pipeline

In [142]:
models = [lasso_model, elastic_model, gbr_model, rfr_model, etr_model, lgb_model]
for i in models:
    score = rmse_CV_train(i)
    print('rmse mean/std for '+str(i)[0:7]+ ": ",score.mean(), score.std())
   

rmse mean/std for Pipelin:  0.113807607032 0.00931302016419


[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:    3.6s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:    3.6s finished


rmse mean/std for Pipelin:  0.11385059959 0.00940717488018


[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:    3.5s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:    3.5s finished


rmse mean/std for Gradien:  0.115415381919 0.0102640046325


[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:    1.8s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:    1.8s finished


rmse mean/std for RandomF:  0.142657117469 0.0130156120196


[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:    2.4s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:    2.4s finished


rmse mean/std for ExtraTr:  0.164415678131 0.012591930959


[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:    1.9s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:    1.9s finished


rmse mean/std for LGBMReg:  0.114331427186 0.0097265896501


[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:    3.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:    3.3s finished


In [439]:
rmse_CV_train(xgb_model)

Process ForkPoolWorker-183:
Process ForkPoolWorker-184:
Process ForkPoolWorker-182:
Process ForkPoolWorker-181:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/michaelchuang/anaconda/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/michaelchuang/anaconda/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/michaelchuang/anaconda/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/michaelchuang/anaconda/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/michaelchuang/anaconda/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/michaelchuang/anaconda/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args

KeyboardInterrupt: 

# Saving results to CSV

In [72]:
for i in models:
    results=round(pd.DataFrame(np.exp(i.predict(X_test))),3)
    results.columns=['SalePrice']
    results.set_index(df_test.index, inplace=True)
    results.to_csv('output_'+str(i)[0:7]+'.csv')

In [73]:
df_etr=pd.read_csv('./output_ExtraTr.csv', index_col='Id')
df_rfr=pd.read_csv('./output_RandomF.csv', index_col='Id')
df_lasso=pd.read_csv('./output_LassoCV.csv', index_col='Id')
df_elastic=pd.read_csv('./output_Elastic.csv', index_col='Id')
df_xgb=pd.read_csv('./output_XGBRegr.csv', index_col='Id')
df_lgb=pd.read_csv('./output_LGBMReg.csv', index_col='Id')



# Averaged Model

In [105]:
df_average = (df_lasso+df_etr+df_xgb)/3
df_average.head()

Unnamed: 0_level_0,SalePrice
Id,Unnamed: 1_level_1
1461,121598.816063
1462,157244.731958
1463,184337.968583
1464,197938.0025
1465,195191.995542


In [106]:
df_average.to_csv('ALL_average.csv')

# Stacking

In [74]:
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin, clone

class StackingAveragedModels(BaseEstimator, RegressorMixin, TransformerMixin):
    def __init__(self, base_models, meta_model, n_folds=5):
        self.base_models = base_models
        self.meta_model = meta_model
        self.n_folds = n_folds
   
    # We again fit the data on clones of the original models
    def fit(self, X, y):
        self.base_models_ = [list() for x in self.base_models]
        self.meta_model_ = clone(self.meta_model)
        kfold = KFold(n_splits=self.n_folds, shuffle=True, random_state=156)
        
        # Train cloned base models then create out-of-fold predictions
        # that are needed to train the cloned meta-model
        out_of_fold_predictions = np.zeros((X.shape[0], len(self.base_models)))
        for i, model in enumerate(self.base_models):
            for train_index, holdout_index in kfold.split(X, y):
                instance = clone(model)
                self.base_models_[i].append(instance)
                instance.fit(X[train_index], y[train_index])
                y_pred = instance.predict(X[holdout_index])
                out_of_fold_predictions[holdout_index, i] = y_pred
                
        # Now train the cloned  meta-model using the out-of-fold predictions as new feature
        self.meta_model_.fit(out_of_fold_predictions, y)
        return self
   
    #Do the predictions of all base models on the test data and use the averaged predictions as 
    #meta-features for the final prediction which is done by the meta-model
    def predict(self, X):
        meta_features = np.column_stack([
            np.column_stack([model.predict(X) for model in base_models]).mean(axis=1)
            for base_models in self.base_models_ ])
        return self.meta_model_.predict(meta_features)

In [144]:
stacked_averaged_models = StackingAveragedModels(base_models = (elastic_model, rfr_model,gbr_model, xgb_model),
                                                 meta_model = lasso_model)




In [146]:
stacked_averaged_models.fit(X_train.values, Y_vals)



StackingAveragedModels(base_models=(Pipeline(memory=None,
     steps=[('robustscaler', RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
       with_scaling=True)), ('elasticnetcv', ElasticNetCV(alphas=None, copy_X=True, cv=None, eps=0.0001,
       fit_intercept=True, l1_ratio=0.963265306122449, ma...g:linear', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=1, silent=False, subsample=1)),
            meta_model=Pipeline(memory=None,
     steps=[('robustscaler', RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
       with_scaling=True)), ('lassocv', LassoCV(alphas=None, copy_X=True, cv=None, eps=0.0001, fit_intercept=True,
    max_iter=10000, n_alphas=1000, n_jobs=1, normalize=True,
    positive=False, precompute='auto', random_state=0, selection='cyclic',
    tol=0.0001, verbose=False))]),
            n_folds=5)

In [149]:
stacked_pred = np.exp(stacked_averaged_models.predict(X_test.values))
xgb_pred=np.exp(xgb_model.predict(X_test))
lgb_pred=np.exp(lgb_model.predict(X_test))
ensemble= stacked_pred*0.7 + xgb_pred *0.15 + lgb_pred*0.15



In [150]:
sub = pd.DataFrame()
sub['SalePrice'] = np.round(ensemble,3)
sub.set_index(df_test.index, inplace=True)
sub.to_csv('submission.csv')

In [352]:
stacked_results=round(pd.DataFrame(np.exp(stacked_averaged_models.predict(X_test.values))),3)
stacked_results.columns=['SalePrice']
stacked_results.set_index(df_test.index, inplace=True)
stacked_results.to_csv('stacked_model.csv')

In [81]:
sub.head()

Unnamed: 0_level_0,SalePrice
Id,Unnamed: 1_level_1
1461,119909.617
1462,159880.774
1463,182757.289
1464,197924.055
1465,198624.862


In [82]:
from stacking import stacking_regression


In [83]:
meta_model = lasso_model

In [84]:
stacking_prediction = stacking_regression(models, meta_model, X_train, Y_vals, X_test,
                              transform_pred = np.expm1, 
                               metric=rmsle, verbose=1)

metric: [rmsle]

model 0: [LassoCV]
    ----
    MEAN:   [9.54440962]

model 1: [ElasticNetCV]
    ----
    MEAN:   [9.54409655]

model 2: [RandomForestRegressor]
    ----
    MEAN:   [9.54258111]

model 3: [ExtraTreesRegressor]
    ----
    MEAN:   [9.54102201]

model 4: [LGBMRegressor]
    ----
    MEAN:   [9.54495980]



In [85]:
stacking_prediction

array([ 120914.22808668,  148449.84363465,  169228.8688654 , ...,
        154109.46738638,  120619.43063773,  214777.04548172])

In [86]:
sp=round(pd.DataFrame(stacking_prediction),3)
sp.set_index(df_test.index, inplace=True)
sp.columns = ['SalePrice']
sp.to_csv('ZYrun.csv')
sp

Unnamed: 0_level_0,SalePrice
Id,Unnamed: 1_level_1
1461,120914.228
1462,148449.844
1463,169228.869
1464,187025.250
1465,188871.334
1466,162232.839
1467,165725.348
1468,155583.079
1469,180385.714
1470,120802.773
