In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import BaggingRegressor, BaseEnsemble
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import Imputer, StandardScaler
from house_features import CategoricalFeatures, LinearFeatures, TreeFeatures

In [2]:
train = pd.read_csv('../input/train.csv')

In [3]:
model_bagged_lasso = make_pipeline(
                            CategoricalFeatures(),
                            LinearFeatures(),
                            Imputer(strategy='most_frequent'),
                            StandardScaler(),
                            BaggingRegressor(Lasso(alpha=0.002), n_estimators=50, max_samples=0.4, max_features=1.0, 
                                             oob_score=True, random_state=1337))

In [203]:
model_bagged_lasso.fit(train, np.log(train.SalePrice))
oob_preds = model_bagged_lasso.named_steps['baggingregressor'].oob_prediction_
score = np.sqrt(mean_squared_error(np.log(train.SalePrice), oob_preds))
print "RMSE: {}".format(score)

RMSE: 0.127620955983


In [204]:
model_bagged_dt = make_pipeline(
                            CategoricalFeatures(),
                            TreeFeatures(),
                            Imputer(strategy='most_frequent'),
                            BaggingRegressor(DecisionTreeRegressor(), n_estimators=400, 
                                             max_samples=0.6, max_features=0.7, 
                                             bootstrap=True, bootstrap_features=False, oob_score=True, 
                                             warm_start=False, n_jobs=1, random_state=1337, verbose=0))

In [206]:
model_bagged_dt.fit(train, np.log(train.SalePrice))
oob_preds = model_bagged_dt.named_steps['baggingregressor'].oob_prediction_
score = np.sqrt(mean_squared_error(np.log(train.SalePrice), oob_preds))
print "RMSE: {}".format(score)

RMSE: 0.138310882524


In [207]:
oob_preds1 = model_bagged_lasso.named_steps['baggingregressor'].oob_prediction_
oob_preds2 = model_bagged_dt.named_steps['baggingregressor'].oob_prediction_

In [208]:
oob_preds3 = 0.75 * oob_preds1 + 0.25 * oob_preds2
score = np.sqrt(mean_squared_error(np.log(train.SalePrice), oob_preds3))
print "RMSE: {}".format(score)

RMSE: 0.124286429321
