In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error,r2_score,make_scorer
from sklearn.model_selection import cross_val_score,GridSearchCV,RandomizedSearchCV
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor

  import pandas.util.testing as tm


In [3]:
train = pd.read_csv("treated_train.csv").set_index('id')

In [4]:
test = pd.read_csv("treated_test.csv").set_index('id')

In [5]:
X = train.drop('price',axis=1)
y = train['price']

In [6]:
X.shape,test.shape

((34226, 219), (14669, 219))

In [7]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=0)

#### Score metrics

In [8]:
def scores(y_test,y_pred):
    print(r2_score(y_test,y_pred),np.sqrt(mean_squared_error(y_test,y_pred)))

#### Modelling

In [9]:
model = LinearRegression()

In [10]:
model.fit(X_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [11]:
y_pred = model.predict(X_test)

In [12]:
scores(y_test,y_pred)

0.5479545419855143 0.47166893713013014


#### L1 Regularization

In [13]:
l1 = Lasso()

In [14]:
l1.fit(X_train,y_train)

Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

In [15]:
y_pred = l1.predict(X_test)

In [16]:
scores(y_test,y_pred)

0.006819936824098982 0.6991336686530787


#### L2 Regularization

In [18]:
l2 = Ridge()

In [19]:
l2.fit(X_train,y_train)

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [20]:
y_pred = l2.predict(X_test)

In [21]:
scores(y_test,y_pred)

0.5489859688854424 0.4711305290520922


In [22]:
l2.fit(X,y)

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [23]:
y_pred = l2.predict(test)

#### Random Forest

In [24]:
rf = RandomForestRegressor(random_state=0)

In [25]:
rf.fit(X_train,y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=0, verbose=0, warm_start=False)

In [26]:
y_pred = rf.predict(X_test)

In [27]:
scores(y_test,y_pred)

0.5726228513697746 0.45861881656131587


In [28]:
rf.score(X_train,y_train)

0.9413958664947766

In [52]:
rf.fit(X,y)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=0, verbose=0, warm_start=False)

In [53]:
y_pred = rf.predict(test)

#### Decision Tree

In [29]:
dt = DecisionTreeRegressor(random_state=0)

In [30]:
dt.fit(X_train,y_train)

DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=0, splitter='best')

In [31]:
y_pred = dt.predict(X_test)

In [32]:
scores(y_test,y_pred)

0.21862352078826874 0.6201209272476336


In [33]:
dt.fit(X,y)

DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=0, splitter='best')

In [35]:
y_pred = dt.predict(test)

In [40]:
cross_val = cross_val_score(rf,X_train,y_train,cv=10,scoring=scorer).mean()

#### XGBoost

In [38]:
xgr = XGBRegressor(random_state=0)

In [39]:
xgr.fit(X_train,y_train)



XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0,
             importance_type='gain', learning_rate=0.1, max_delta_step=0,
             max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
             n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
             silent=None, subsample=1, verbosity=1)

In [45]:
y_pred = xgr.predict(X_test)

In [46]:
scores(y_test,y_pred)

0.5866149035024284 0.4510489003290035


In [47]:
xgr.fit(X,y)



XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0,
             importance_type='gain', learning_rate=0.1, max_delta_step=0,
             max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
             n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
             silent=None, subsample=1, verbosity=1)

In [48]:
y_pred = xgr.predict(test)

### Tuning

#### Random Forest

In [59]:
rf = RandomForestRegressor(random_state=0)
parameter = {"n_estimators":[100,200,300,400,600],
             'max_depth':[20,30,40,60,'None','sqrt'],
            "max_features":[100,200,219],
            "min_samples_split":[2,3,6],
            "min_samples_leaf":[1,3,4,6]}

In [60]:
random = RandomizedSearchCV(estimator=rf,param_distributions=parameter,n_jobs=-1,n_iter=10,cv=3,random_state=0,verbose=1)

In [61]:
random.fit(X_train,y_train)

Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:  4.0min finished


RandomizedSearchCV(cv=3, error_score=nan,
                   estimator=RandomForestRegressor(bootstrap=True,
                                                   ccp_alpha=0.0,
                                                   criterion='mse',
                                                   max_depth=None,
                                                   max_features='auto',
                                                   max_leaf_nodes=None,
                                                   max_samples=None,
                                                   min_impurity_decrease=0.0,
                                                   min_impurity_split=None,
                                                   min_samples_leaf=1,
                                                   min_samples_split=2,
                                                   min_weight_fraction_leaf=0.0,
                                                   n_estimators=100,
                              

In [62]:
random.best_params_

{'n_estimators': 400,
 'min_samples_split': 3,
 'min_samples_leaf': 4,
 'max_features': 100,
 'max_depth': 20}

In [365]:
rf = RandomForestRegressor(random_state=0,n_estimators=600,min_samples_split=2,min_samples_leaf=1,max_features='sqrt',
                           max_depth=30,bootstrap=True)

In [366]:
rf.fit(X_train,y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=30, max_features='sqrt', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=600, n_jobs=None, oob_score=False,
                      random_state=0, verbose=0, warm_start=False)

In [367]:
y_pred = rf.predict(X_test)

In [368]:
scores(y_test,y_pred)

0.5906157725843209 0.4488608978917244


In [228]:
rf.fit(X,y)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=16, max_features=200, max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=4,
                      min_samples_split=3, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=0, verbose=0, warm_start=False)

In [229]:
y_pred = rf.predict(test)

In [64]:
random.best_params_

{'n_estimators': 200,
 'min_samples_split': 3,
 'min_samples_leaf': 3,
 'max_features': 200,
 'max_depth': 12,
 'bootstrap': True}

In [57]:
parameter = {"n_estimators":[10,100,200,300],
             'max_depth':[12,14,20,40,60,'None'],
            "max_features":[100,200,219],
            "min_samples_split":[2,3,6],
            "min_samples_leaf":[1,3,4]}

In [58]:
rf = RandomForestRegressor(random_state=0)

In [273]:
grid = GridSearchCV(estimator=rf,param_grid=parameter,verbose=1,n_jobs=-1)

In [274]:
grid.fit(X_train,y_train)

Fitting 5 folds for each of 48 candidates, totalling 240 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  7.2min
[Parallel(n_jobs=-1)]: Done 240 out of 240 | elapsed: 10.4min finished


GridSearchCV(cv=None, error_score=nan,
             estimator=RandomForestRegressor(bootstrap=True, ccp_alpha=0.0,
                                             criterion='mse', max_depth=None,
                                             max_features='auto',
                                             max_leaf_nodes=None,
                                             max_samples=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             n_estimators=100, n_jobs=None,
                                             oob_score=False, random_state=0,
                                             verbose=0, warm_start=False),
             iid='deprecated', n_jo

In [276]:
grid.best_params_

{'max_depth': 14,
 'max_features': 100,
 'min_samples_leaf': 3,
 'min_samples_split': 2,
 'n_estimators': 100}

In [277]:
rf = RandomForestRegressor(random_state=0,n_estimators=100,max_depth=14,max_features=200,min_samples_leaf=3,min_samples_split=2)

In [278]:
rf.fit(X_train,y_train)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=14, max_features=100, max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=3,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=0, verbose=0, warm_start=False)

In [279]:
y_pred = rf.predict(X_test)

In [280]:
scores(y_test,y_pred)

0.5912720135615336 0.44850099265579185


#### Decision Tree

In [29]:
parameter = {'max_depth':[3,8,10,40,60,'None'],
            "max_features":[100,200,221],
            "min_samples_split":[2,3,6],
            "min_samples_leaf":[1,3,6]}

In [64]:
dt = DecisionTreeRegressor(random_state=0)

In [31]:
grid = GridSearchCV(dt,param_grid=parameter)

In [32]:
grid.fit(X_train,y_train)

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_feat

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_feat

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_feat

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_feat

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

ValueError: max_features must be in (0, n_features]

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between i

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances 

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances 

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances 

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances 

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'

TypeError: '<=' not supported between instances of 'str' and 'int'



GridSearchCV(cv=None, error_score=nan,
             estimator=DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse',
                                             max_depth=None, max_features=None,
                                             max_leaf_nodes=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             presort='deprecated',
                                             random_state=0, splitter='best'),
             iid='deprecated', n_jobs=None,
             param_grid={'max_depth': [3, 8, 10, 'None'],
                         'max_features': [100, 200, 221],
                         'min_samples_leaf': [1, 3, 6],
                         'mi

In [33]:
grid.best_params_

{'max_depth': 8,
 'max_features': 200,
 'min_samples_leaf': 3,
 'min_samples_split': 2}

In [34]:
dt = DecisionTreeRegressor(random_state=0,max_depth=8,max_features=200,min_samples_leaf=3,min_samples_split=2)

In [35]:
dt.fit(X_train,y_train)

DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=8,
                      max_features=200, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=3, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=0, splitter='best')

In [36]:
y_pred = dt.predict(X_test)

In [37]:
scores(y_test,y_pred)

0.5319209073432747 0.47996088155480443


In [65]:
dt.fit(X,y)

DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=0, splitter='best')

In [66]:
y_pred = dt.predict(test)

#### XGBoost

In [359]:
parameter = {"n_estimators":[10,100,200,400,600],
             'max_depth':[10,20,30,40,60,80,'None'],
            "max_features":['auto','sqrt'],
            "min_samples_split":[2,3,5,10],
            "min_samples_leaf":[1,2,4]}

In [316]:
xgr = XGBRegressor(random_state=0)
parameter={"max_depth":[3,6,8,'None'],
          "n_estimator":[100,200],
          "min_child_weight":[3,6,8]}

In [317]:
random = RandomizedSearchCV(xgr,parameter,verbose=1,n_jobs=-1)

In [318]:
random.fit(X_train,y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  3.6min
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:  4.5min finished




RandomizedSearchCV(cv=None, error_score=nan,
                   estimator=XGBRegressor(base_score=0.5, booster='gbtree',
                                          colsample_bylevel=1,
                                          colsample_bynode=1,
                                          colsample_bytree=1, gamma=0,
                                          importance_type='gain',
                                          learning_rate=0.1, max_delta_step=0,
                                          max_depth=3, min_child_weight=1,
                                          missing=None, n_estimators=100,
                                          n_jobs=1, nthread=None,
                                          objective='reg:linear',
                                          random_state=0, reg_alpha=0,
                                          reg_lambda=1, scale_pos_weight=1,
                                          seed=None, silent=None, subsample=1,
                               

In [319]:
random.best_params_

{'n_estimator': 100, 'min_child_weight': 6, 'max_depth': 8}

In [349]:
xgr = XGBRegressor(random_state=0,n_estimator=100,min_child_weight=6,max_depth=10)

In [346]:
xgr.fit(X_train,y_train)



XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0,
             importance_type='gain', learning_rate=0.1, max_delta_step=0,
             max_depth=10, min_child_weight=6, missing=None, n_estimator=100,
             n_estimators=100, n_jobs=1, nthread=None, objective='reg:linear',
             random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
             seed=None, silent=None, subsample=1, verbosity=1)

In [347]:
y_pred = xgr.predict(X_test)

In [348]:
scores(y_test,y_pred)

0.5969876002717585 0.4453540693757987


In [350]:
xgr.fit(X,y)



XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0,
             importance_type='gain', learning_rate=0.1, max_delta_step=0,
             max_depth=10, min_child_weight=6, missing=None, n_estimator=100,
             n_estimators=100, n_jobs=1, nthread=None, objective='reg:linear',
             random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
             seed=None, silent=None, subsample=1, verbosity=1)

In [351]:
y_pred = xgr.predict(test)

In [281]:
rf.fit(X,y)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=14, max_features=100, max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=3,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=0, verbose=0, warm_start=False)

In [282]:
y_pred = rf.predict(test)

#### Submission

In [54]:
sample_submission = pd.DataFrame(y_pred,test.index,columns=['price'])
sample_submission = np.expm1(sample_submission)

In [55]:
sample_submission

Unnamed: 0_level_0,price
id,Unnamed: 1_level_1
1525602,117.522241
30430185,172.794733
21354525,91.705960
35995074,72.522552
34392081,69.975017
...,...
27730191,120.583072
29676417,62.417069
1813829,60.466162
25017403,234.326442


In [56]:
sample_submission.to_csv("sample_sub.csv")