## Import Modules for XGBoost and AdaBoost and read raw data

In [14]:
import os
import pandas as pd
import numpy as np
from scipy.stats import skew
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_squared_error
from xgboost.sklearn import XGBClassifier
from sklearn.model_selection import cross_val_score
import xgboost as xgb
import matplotlib.pyplot as plt
import helpers as fp
%matplotlib inline

In [3]:
train = pd.read_csv(os.path.join('data', 'train.csv'))
test = pd.read_csv(os.path.join('data', 'test.csv'))
y = train.iloc[:, -1]
train = train.iloc[:, 1:-1]
test = test.iloc[:, 1:]
submission = test.iloc[:, 0]

## Pre-Processing

In [4]:
train, test = fp.mssubclass(train, test)
train, test, y = fp.log(train, test, y)
train, test = fp.lotfrontage(train, test)
train, test = fp.garageyrblt(train, test)
train, test = fp.impute_mean(train, test)
train, test = fp.dummies(train, test)

## AdaBoost

In [16]:
clf = AdaBoostRegressor(n_estimators=100)
scores = cross_val_score(clf, train, y)
scores.mean()

0.80381813423311377

## XGBoost

In [5]:
regr = xgb.XGBRegressor(
                 colsample_bytree=0.3,
                 gamma=0.0,
                 learning_rate=0.01,
                 max_depth=4,
                 min_child_weight=1.5,
                 n_estimators=1668,                                                                  
                 reg_alpha=1,
                 reg_lambda=0.6,
                 subsample=0.2,
                 seed=42,
                 silent=1)

regr.fit(train, y)

XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=0.3,
       gamma=0.0, learning_rate=0.01, max_delta_step=0, max_depth=4,
       min_child_weight=1.5, missing=None, n_estimators=1668, nthread=-1,
       objective='reg:linear', reg_alpha=1, reg_lambda=0.6,
       scale_pos_weight=1, seed=42, silent=1, subsample=0.2)

In [6]:
y_pred = regr.predict(train)
y_train = y
print("XGBoost score on training set: ", mean_squared_error(y_train, y_pred))

XGBoost score on training set:  0.0102029204812


In [7]:
y_pred_xgb = regr.predict(test)
y_pred_xgb

array([ 11.78995037,  11.9664135 ,  12.1092701 , ...,  12.03260136,
        11.68862724,  12.32989597], dtype=float32)