# Real estate price prediction project.
Using "Housing Prices Competition for Kaggle Learn Users" data.


In [1]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

# handeling warnings
pd.options.mode.chained_assignment = None  
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

## Load and preprocess data

In [2]:
X = pd.read_csv('train.csv')

# Remove rows with missing data
X.dropna(axis=0, subset=['SalePrice'], inplace=True)
y = X.SalePrice
X.drop(['SalePrice'], axis=1, inplace=True)

# Drop columns with missing values
cols_with_missing = [col for col in X.columns if X[col].isnull().any()] 
X.drop(cols_with_missing, axis=1, inplace=True)

# Break off validation data from training data
X_train, X_valid, y_train, y_valid = train_test_split(X, y,
                                                      train_size=0.8, test_size=0.2,
                                                      random_state=0)

In [3]:
# Chose categorical columns
object_cols = [col for col in X_train.columns if X_train[col].dtype == "object"]

# Chose columns for label encoding
good_label_cols = [col for col in object_cols if 
                   set(X_train[col]) == set(X_valid[col])]
# Chose numerical columns
numerical_columns = [col for col in X_train if 
                    X_train[col].dtype in ['int64', 'float64']]
        
# Columns which were not chosen for label encoding
bad_label_cols = list(set(object_cols)-set(good_label_cols))      

## Imputation of missing values 

In [4]:
my_imputer = SimpleImputer()

imputed_X_train = pd.DataFrame(my_imputer.fit_transform(X_train[numerical_columns]))
imputed_X_valid = pd.DataFrame(my_imputer.transform(X_valid[numerical_columns]))

imputed_X_train.columns = numerical_columns
imputed_X_valid.columns = numerical_columns

## Label encoding categorical variables

In [5]:
# Drop columns which were not chosen for label encoding
label_X_train = X_train.drop(bad_label_cols, axis=1)
label_X_valid = X_valid.drop(bad_label_cols, axis=1)

my_encoder = LabelEncoder()

for col in set(good_label_cols):
    label_X_train[col] = my_encoder.fit_transform(X_train[col])
    label_X_valid[col] = my_encoder.transform(X_valid[col])

## Prediction models:

###  Basic random forest reggresor

In [6]:
def check_randomforest(X_train, X_valid):
    model = RandomForestRegressor(n_estimators=100, random_state=1)
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return mean_absolute_error(y_valid, preds)

In [7]:
# Label encoded
print(check_randomforest(label_X_train,label_X_valid)) 

17506.210273972603


In [8]:
# Imputated
print(check_randomforest(imputed_X_train,imputed_X_valid))

17660.74037671233


### XGBoost

#### Find the most accurate XGBRegressor model

In [19]:
def check_xdgb(X_train_data, X_valid_data):
    all_scores = []
    for early_stopping_rounds in [3, 5, 7]:
        for max_depth in [7, 9, 11]:
            for n_estimators in [200, 500, 1000]:
                for learning_rate in [0.02, 0.05, 0.1]:
                    for colsample_bytree in [0.5, 0.7, 0.9]:
                        for reg_alpha in [0.1, 1, 10, 100]:
                                XGBmodel = XGBRegressor(early_stopping_rounds=early_stopping_rounds,
                                                    max_depth=max_depth,
                                                   learning_rate=learning_rate,
                                                   colsample_bytree=colsample_bytree,
                                                   reg_alpha=reg_alpha,
                                                   reg_lambda=1,
                                                   objective ='reg:squarederror',
                                                   random_state=1,

                                                   )
                                XGBmodel.fit(X_train_data, y_train)
                                prediction = XGBmodel.predict(X_valid_data)
                                mae = mean_absolute_error(prediction, y_valid)
                                all_scores.append([early_stopping_rounds, max_depth, n_estimators,
                                                   learning_rate,colsample_bytree, reg_alpha, 
                                                mae])
                                print(all_scores[-1])
    max_scores_label = sorted(all_scores, key = lambda x: x[-1], reverse = False)[:5]
    print("5 BEST MODELS:")
    for i in max_scores_label:
        print(i)

#### Search for best model trained with label encoded data

In [20]:
check_xdgb(label_X_train, label_X_valid)


[3, 7, 200, 0.02, 0.5, 0.1, 29472.200382598458]
[3, 7, 200, 0.02, 0.5, 1, 29472.21899079623]
[3, 7, 200, 0.02, 0.5, 10, 29471.120612157534]
[3, 7, 200, 0.02, 0.5, 100, 29454.29890839041]
[3, 7, 200, 0.02, 0.7, 0.1, 28922.174550513697]
[3, 7, 200, 0.02, 0.7, 1, 28922.19885755565]
[3, 7, 200, 0.02, 0.7, 10, 28907.299015410958]
[3, 7, 200, 0.02, 0.7, 100, 28934.396685038526]
[3, 7, 200, 0.02, 0.9, 0.1, 28501.49973244863]
[3, 7, 200, 0.02, 0.9, 1, 28491.63025738442]
[3, 7, 200, 0.02, 0.9, 10, 28464.312928082192]
[3, 7, 200, 0.02, 0.9, 100, 28663.677386558218]
[3, 7, 200, 0.05, 0.5, 0.1, 16822.443667059077]
[3, 7, 200, 0.05, 0.5, 1, 16822.441513270547]
[3, 7, 200, 0.05, 0.5, 10, 16785.50667540668]
[3, 7, 200, 0.05, 0.5, 100, 16970.171393407534]
[3, 7, 200, 0.05, 0.7, 0.1, 16282.436630458047]
[3, 7, 200, 0.05, 0.7, 1, 16282.439680543665]
[3, 7, 200, 0.05, 0.7, 10, 16593.645374036816]
[3, 7, 200, 0.05, 0.7, 100, 16802.386250535103]
[3, 7, 200, 0.05, 0.9, 0.1, 17072.76181239298]
[3, 7, 200, 0.

[3, 9, 500, 0.1, 0.9, 0.1, 16939.568158711474]
[3, 9, 500, 0.1, 0.9, 1, 16926.68622913099]
[3, 9, 500, 0.1, 0.9, 10, 17216.106191138697]
[3, 9, 500, 0.1, 0.9, 100, 17290.656209867295]
[3, 9, 1000, 0.02, 0.5, 0.1, 29650.663875214042]
[3, 9, 1000, 0.02, 0.5, 1, 29657.839362157534]
[3, 9, 1000, 0.02, 0.5, 10, 29663.08790400257]
[3, 9, 1000, 0.02, 0.5, 100, 29699.444764019692]
[3, 9, 1000, 0.02, 0.7, 0.1, 29127.094378745718]
[3, 9, 1000, 0.02, 0.7, 1, 29127.12711365582]
[3, 9, 1000, 0.02, 0.7, 10, 29131.289289918663]
[3, 9, 1000, 0.02, 0.7, 100, 29211.123969927226]
[3, 9, 1000, 0.02, 0.9, 0.1, 28845.227565817637]
[3, 9, 1000, 0.02, 0.9, 1, 28845.256902825342]
[3, 9, 1000, 0.02, 0.9, 10, 28803.212101348458]
[3, 9, 1000, 0.02, 0.9, 100, 28894.656383775684]
[3, 9, 1000, 0.05, 0.5, 0.1, 17245.43277771832]
[3, 9, 1000, 0.05, 0.5, 1, 17245.430998501713]
[3, 9, 1000, 0.05, 0.5, 10, 17215.90204944349]
[3, 9, 1000, 0.05, 0.5, 100, 17316.382264019692]
[3, 9, 1000, 0.05, 0.7, 0.1, 16668.932416523974]

[5, 7, 200, 0.1, 0.5, 1, 17024.53533015839]
[5, 7, 200, 0.1, 0.5, 10, 17018.73330479452]
[5, 7, 200, 0.1, 0.5, 100, 17205.877742401542]
[5, 7, 200, 0.1, 0.7, 0.1, 15845.20222870291]
[5, 7, 200, 0.1, 0.7, 1, 15845.980375107021]
[5, 7, 200, 0.1, 0.7, 10, 15809.89070526541]
[5, 7, 200, 0.1, 0.7, 100, 15911.21022848887]
[5, 7, 200, 0.1, 0.9, 0.1, 16663.810399721748]
[5, 7, 200, 0.1, 0.9, 1, 16616.18004869435]
[5, 7, 200, 0.1, 0.9, 10, 16713.045349957192]
[5, 7, 200, 0.1, 0.9, 100, 16852.18751337757]
[5, 7, 500, 0.02, 0.5, 0.1, 29472.200382598458]
[5, 7, 500, 0.02, 0.5, 1, 29472.21899079623]
[5, 7, 500, 0.02, 0.5, 10, 29471.120612157534]
[5, 7, 500, 0.02, 0.5, 100, 29454.29890839041]
[5, 7, 500, 0.02, 0.7, 0.1, 28922.174550513697]
[5, 7, 500, 0.02, 0.7, 1, 28922.19885755565]
[5, 7, 500, 0.02, 0.7, 10, 28907.299015410958]
[5, 7, 500, 0.02, 0.7, 100, 28934.396685038526]
[5, 7, 500, 0.02, 0.9, 0.1, 28501.49973244863]
[5, 7, 500, 0.02, 0.9, 1, 28491.63025738442]
[5, 7, 500, 0.02, 0.9, 10, 28464

[5, 9, 1000, 0.05, 0.9, 0.1, 17568.12598994007]
[5, 9, 1000, 0.05, 0.9, 1, 17609.716248394692]
[5, 9, 1000, 0.05, 0.9, 10, 17605.892685145547]
[5, 9, 1000, 0.05, 0.9, 100, 17546.030741652397]
[5, 9, 1000, 0.1, 0.5, 0.1, 17059.305864726026]
[5, 9, 1000, 0.1, 0.5, 1, 17053.117843000855]
[5, 9, 1000, 0.1, 0.5, 10, 17040.826426048803]
[5, 9, 1000, 0.1, 0.5, 100, 17224.27185894692]
[5, 9, 1000, 0.1, 0.7, 0.1, 15953.90073844178]
[5, 9, 1000, 0.1, 0.7, 1, 15953.943038313357]
[5, 9, 1000, 0.1, 0.7, 10, 15946.830104880137]
[5, 9, 1000, 0.1, 0.7, 100, 16176.89711312072]
[5, 9, 1000, 0.1, 0.9, 0.1, 16939.568158711474]
[5, 9, 1000, 0.1, 0.9, 1, 16926.68622913099]
[5, 9, 1000, 0.1, 0.9, 10, 17216.106191138697]
[5, 9, 1000, 0.1, 0.9, 100, 17290.656209867295]
[5, 11, 200, 0.02, 0.5, 0.1, 29728.04731645976]
[5, 11, 200, 0.02, 0.5, 1, 29728.079609910103]
[5, 11, 200, 0.02, 0.5, 10, 29779.350398651542]
[5, 11, 200, 0.02, 0.5, 100, 29751.50373234161]
[5, 11, 200, 0.02, 0.7, 0.1, 29293.952656785103]
[5, 1

[7, 7, 500, 0.05, 0.5, 10, 16785.50667540668]
[7, 7, 500, 0.05, 0.5, 100, 16970.171393407534]
[7, 7, 500, 0.05, 0.7, 0.1, 16282.436630458047]
[7, 7, 500, 0.05, 0.7, 1, 16282.439680543665]
[7, 7, 500, 0.05, 0.7, 10, 16593.645374036816]
[7, 7, 500, 0.05, 0.7, 100, 16802.386250535103]
[7, 7, 500, 0.05, 0.9, 0.1, 17072.76181239298]
[7, 7, 500, 0.05, 0.9, 1, 17072.768073095034]
[7, 7, 500, 0.05, 0.9, 10, 17173.903801904966]
[7, 7, 500, 0.05, 0.9, 100, 17178.31197827483]
[7, 7, 500, 0.1, 0.5, 0.1, 17024.535035851884]
[7, 7, 500, 0.1, 0.5, 1, 17024.53533015839]
[7, 7, 500, 0.1, 0.5, 10, 17018.73330479452]
[7, 7, 500, 0.1, 0.5, 100, 17205.877742401542]
[7, 7, 500, 0.1, 0.7, 0.1, 15845.20222870291]
[7, 7, 500, 0.1, 0.7, 1, 15845.980375107021]
[7, 7, 500, 0.1, 0.7, 10, 15809.89070526541]
[7, 7, 500, 0.1, 0.7, 100, 15911.21022848887]
[7, 7, 500, 0.1, 0.9, 0.1, 16663.810399721748]
[7, 7, 500, 0.1, 0.9, 1, 16616.18004869435]
[7, 7, 500, 0.1, 0.9, 10, 16713.045349957192]
[7, 7, 500, 0.1, 0.9, 100, 1

[7, 11, 200, 0.02, 0.9, 1, 29035.33700770548]
[7, 11, 200, 0.02, 0.9, 10, 29032.936991652397]
[7, 11, 200, 0.02, 0.9, 100, 28899.853448737158]
[7, 11, 200, 0.05, 0.5, 0.1, 17394.76285584332]
[7, 11, 200, 0.05, 0.5, 1, 17394.042406892124]
[7, 11, 200, 0.05, 0.5, 10, 17379.585977632705]
[7, 11, 200, 0.05, 0.5, 100, 17185.947198737158]
[7, 11, 200, 0.05, 0.7, 0.1, 16609.10283336901]
[7, 11, 200, 0.05, 0.7, 1, 16609.129922945205]
[7, 11, 200, 0.05, 0.7, 10, 16570.024668236303]
[7, 11, 200, 0.05, 0.7, 100, 16663.200502996577]
[7, 11, 200, 0.05, 0.9, 0.1, 17229.90998234161]
[7, 11, 200, 0.05, 0.9, 1, 17242.593602846748]
[7, 11, 200, 0.05, 0.9, 10, 17465.743057041953]
[7, 11, 200, 0.05, 0.9, 100, 17256.35041202911]
[7, 11, 200, 0.1, 0.5, 0.1, 16806.777276862158]
[7, 11, 200, 0.1, 0.5, 1, 16813.398317101884]
[7, 11, 200, 0.1, 0.5, 10, 16791.72850224743]
[7, 11, 200, 0.1, 0.5, 100, 17143.932296125855]
[7, 11, 200, 0.1, 0.7, 0.1, 16259.234308112158]
[7, 11, 200, 0.1, 0.7, 1, 16260.603301583904]


#### Search for best model trained with imputed data

In [33]:
check_xdgb(imputed_X_train, imputed_X_valid)

[3, 7, 200, 0.02, 0.5, 0.1, 29270.112425085616]
[3, 7, 200, 0.02, 0.5, 1, 29270.13076573202]
[3, 7, 200, 0.02, 0.5, 10, 29254.294948630137]
[3, 7, 200, 0.02, 0.5, 100, 29244.934316138697]
[3, 7, 200, 0.02, 0.7, 0.1, 28870.839348779966]
[3, 7, 200, 0.02, 0.7, 1, 28891.048761237158]
[3, 7, 200, 0.02, 0.7, 10, 28837.31025256849]
[3, 7, 200, 0.02, 0.7, 100, 28863.601415346748]
[3, 7, 200, 0.02, 0.9, 0.1, 28543.877969820205]
[3, 7, 200, 0.02, 0.9, 1, 28543.899975920376]
[3, 7, 200, 0.02, 0.9, 10, 28570.787938784248]
[3, 7, 200, 0.02, 0.9, 100, 28667.99345836901]
[3, 7, 200, 0.05, 0.5, 0.1, 16391.573188677226]
[3, 7, 200, 0.05, 0.5, 1, 16391.57459332192]
[3, 7, 200, 0.05, 0.5, 10, 16389.966141374145]
[3, 7, 200, 0.05, 0.5, 100, 16704.769277076197]
[3, 7, 200, 0.05, 0.7, 0.1, 17211.86800353168]
[3, 7, 200, 0.05, 0.7, 1, 17207.117950021406]
[3, 7, 200, 0.05, 0.7, 10, 17207.683272688355]
[3, 7, 200, 0.05, 0.7, 100, 17209.7421875]
[3, 7, 200, 0.05, 0.9, 0.1, 17247.320566673803]
[3, 7, 200, 0.05,

[3, 9, 500, 0.1, 0.9, 0.1, 17543.36622431507]
[3, 9, 500, 0.1, 0.9, 1, 17523.990180864726]
[3, 9, 500, 0.1, 0.9, 10, 17445.512909353594]
[3, 9, 500, 0.1, 0.9, 100, 17666.18653681507]
[3, 9, 1000, 0.02, 0.5, 0.1, 29567.753478167808]
[3, 9, 1000, 0.02, 0.5, 1, 29564.58833208476]
[3, 9, 1000, 0.02, 0.5, 10, 29568.25782587757]
[3, 9, 1000, 0.02, 0.5, 100, 29553.36762895976]
[3, 9, 1000, 0.02, 0.7, 0.1, 29192.162604345034]
[3, 9, 1000, 0.02, 0.7, 1, 29192.195432898116]
[3, 9, 1000, 0.02, 0.7, 10, 29201.81656678082]
[3, 9, 1000, 0.02, 0.7, 100, 29072.493832940923]
[3, 9, 1000, 0.02, 0.9, 0.1, 28857.812473244863]
[3, 9, 1000, 0.02, 0.9, 1, 28857.846251605308]
[3, 9, 1000, 0.02, 0.9, 10, 28885.942195526542]
[3, 9, 1000, 0.02, 0.9, 100, 29237.97361943493]
[3, 9, 1000, 0.05, 0.5, 0.1, 16822.725920376713]
[3, 9, 1000, 0.05, 0.5, 1, 16822.737237799658]
[3, 9, 1000, 0.05, 0.5, 10, 16897.66647046233]
[3, 9, 1000, 0.05, 0.5, 100, 16666.019852311645]
[3, 9, 1000, 0.05, 0.7, 0.1, 17298.571596746577]
[3

[5, 7, 200, 0.1, 0.5, 1, 16765.940978167808]
[5, 7, 200, 0.1, 0.5, 10, 16596.4560546875]
[5, 7, 200, 0.1, 0.5, 100, 16598.42533979024]
[5, 7, 200, 0.1, 0.7, 0.1, 17130.31393139983]
[5, 7, 200, 0.1, 0.7, 1, 17157.65341395548]
[5, 7, 200, 0.1, 0.7, 10, 17134.434998394692]
[5, 7, 200, 0.1, 0.7, 100, 17283.309610445205]
[5, 7, 200, 0.1, 0.9, 0.1, 17411.039932041953]
[5, 7, 200, 0.1, 0.9, 1, 17411.055610552226]
[5, 7, 200, 0.1, 0.9, 10, 17239.893220248287]
[5, 7, 200, 0.1, 0.9, 100, 17311.992160744863]
[5, 7, 500, 0.02, 0.5, 0.1, 29270.112425085616]
[5, 7, 500, 0.02, 0.5, 1, 29270.13076573202]
[5, 7, 500, 0.02, 0.5, 10, 29254.294948630137]
[5, 7, 500, 0.02, 0.5, 100, 29244.934316138697]
[5, 7, 500, 0.02, 0.7, 0.1, 28870.839348779966]
[5, 7, 500, 0.02, 0.7, 1, 28891.048761237158]
[5, 7, 500, 0.02, 0.7, 10, 28837.31025256849]
[5, 7, 500, 0.02, 0.7, 100, 28863.601415346748]
[5, 7, 500, 0.02, 0.9, 0.1, 28543.877969820205]
[5, 7, 500, 0.02, 0.9, 1, 28543.899975920376]
[5, 7, 500, 0.02, 0.9, 10, 

[5, 9, 1000, 0.05, 0.9, 1, 17442.470810145547]
[5, 9, 1000, 0.05, 0.9, 10, 17457.131340967466]
[5, 9, 1000, 0.05, 0.9, 100, 17442.508829195205]
[5, 9, 1000, 0.1, 0.5, 0.1, 16679.984856592466]
[5, 9, 1000, 0.1, 0.5, 1, 16629.203125]
[5, 9, 1000, 0.1, 0.5, 10, 16587.918637628423]
[5, 9, 1000, 0.1, 0.5, 100, 16652.737104023974]
[5, 9, 1000, 0.1, 0.7, 0.1, 17178.077255458047]
[5, 9, 1000, 0.1, 0.7, 1, 17154.31809182363]
[5, 9, 1000, 0.1, 0.7, 10, 17245.698349208047]
[5, 9, 1000, 0.1, 0.7, 100, 17443.216047731163]
[5, 9, 1000, 0.1, 0.9, 0.1, 17543.36622431507]
[5, 9, 1000, 0.1, 0.9, 1, 17523.990180864726]
[5, 9, 1000, 0.1, 0.9, 10, 17445.512909353594]
[5, 9, 1000, 0.1, 0.9, 100, 17666.18653681507]
[5, 11, 200, 0.02, 0.5, 0.1, 29653.69033604452]
[5, 11, 200, 0.02, 0.5, 1, 29653.7236328125]
[5, 11, 200, 0.02, 0.5, 10, 29654.308540239726]
[5, 11, 200, 0.02, 0.5, 100, 29638.668798159248]
[5, 11, 200, 0.02, 0.7, 0.1, 29089.25417380137]
[5, 11, 200, 0.02, 0.7, 1, 29089.288500642124]
[5, 11, 200, 

[7, 7, 500, 0.05, 0.5, 100, 16704.769277076197]
[7, 7, 500, 0.05, 0.7, 0.1, 17211.86800353168]
[7, 7, 500, 0.05, 0.7, 1, 17207.117950021406]
[7, 7, 500, 0.05, 0.7, 10, 17207.683272688355]
[7, 7, 500, 0.05, 0.7, 100, 17209.7421875]
[7, 7, 500, 0.05, 0.9, 0.1, 17247.320566673803]
[7, 7, 500, 0.05, 0.9, 1, 17252.477418664384]
[7, 7, 500, 0.05, 0.9, 10, 17232.63598298373]
[7, 7, 500, 0.05, 0.9, 100, 17254.58360980308]
[7, 7, 500, 0.1, 0.5, 0.1, 16765.94689105308]
[7, 7, 500, 0.1, 0.5, 1, 16765.940978167808]
[7, 7, 500, 0.1, 0.5, 10, 16596.4560546875]
[7, 7, 500, 0.1, 0.5, 100, 16598.42533979024]
[7, 7, 500, 0.1, 0.7, 0.1, 17130.31393139983]
[7, 7, 500, 0.1, 0.7, 1, 17157.65341395548]
[7, 7, 500, 0.1, 0.7, 10, 17134.434998394692]
[7, 7, 500, 0.1, 0.7, 100, 17283.309610445205]
[7, 7, 500, 0.1, 0.9, 0.1, 17411.039932041953]
[7, 7, 500, 0.1, 0.9, 1, 17411.055610552226]
[7, 7, 500, 0.1, 0.9, 10, 17239.893220248287]
[7, 7, 500, 0.1, 0.9, 100, 17311.992160744863]
[7, 7, 1000, 0.02, 0.5, 0.1, 2927

[7, 11, 200, 0.02, 0.9, 10, 29014.79732983733]
[7, 11, 200, 0.02, 0.9, 100, 29336.353408604453]
[7, 11, 200, 0.05, 0.5, 0.1, 16852.690175513697]
[7, 11, 200, 0.05, 0.5, 1, 16879.840191566782]
[7, 11, 200, 0.05, 0.5, 10, 17027.616719285103]
[7, 11, 200, 0.05, 0.5, 100, 16813.97008775685]
[7, 11, 200, 0.05, 0.7, 0.1, 17319.34375]
[7, 11, 200, 0.05, 0.7, 1, 17304.617669092466]
[7, 11, 200, 0.05, 0.7, 10, 17307.10029163099]
[7, 11, 200, 0.05, 0.7, 100, 17456.885554901542]
[7, 11, 200, 0.05, 0.9, 0.1, 17581.268782106163]
[7, 11, 200, 0.05, 0.9, 1, 17579.667299871577]
[7, 11, 200, 0.05, 0.9, 10, 17669.166764768837]
[7, 11, 200, 0.05, 0.9, 100, 17845.184530179795]
[7, 11, 200, 0.1, 0.5, 0.1, 16811.904270119863]
[7, 11, 200, 0.1, 0.5, 1, 16866.886290667808]
[7, 11, 200, 0.1, 0.5, 10, 16832.80353702911]
[7, 11, 200, 0.1, 0.5, 100, 16538.349127782534]
[7, 11, 200, 0.1, 0.7, 0.1, 17530.897955907534]
[7, 11, 200, 0.1, 0.7, 1, 17489.7900390625]
[7, 11, 200, 0.1, 0.7, 10, 17440.612612371577]
[7, 11,

#### Chose the best model 

In [29]:
best_model = XGBRegressor(early_stopping_rounds=3,
                             max_depth=7,
                             learning_rate=0.1,
                             colsample_bytree=0.7,
                             reg_alpha=10,
                             reg_lambda=1,
                             objective ='reg:squarederror',
                             random_state=1,
                             )

In [30]:
best_model.fit(label_X_train, y_train)
prediction = best_model.predict(label_X_valid)
mae = mean_absolute_error(prediction, y_valid)

In [31]:
print(mae)

15809.89070526541
