# Training the XGBoost model
Start with training a model. The problem statement and data is the [house prices](https://www.kaggle.com/c/house-prices-advanced-regression-techniques) from kaggle. 

In [65]:
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error

In [67]:
data = pd.read_csv('data/train.csv')
data.dropna(axis=0, subset=['SalePrice'], inplace=True)
y = data.SalePrice
X = data.drop(['SalePrice'], axis=1).select_dtypes(exclude=['object'])
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.30)

In [68]:
model = XGBRegressor(learning_rate=0.1)
model.fit(train_X, train_y, early_stopping_rounds=10, eval_set=[(test_X, test_y)])

[0]	validation_0-rmse:185542.71875
Will train until validation_0-rmse hasn't improved in 10 rounds.
[1]	validation_0-rmse:168205.29688
[2]	validation_0-rmse:152588.65625
[3]	validation_0-rmse:138712.60938
[4]	validation_0-rmse:126079.64844
[5]	validation_0-rmse:114682.03125
[6]	validation_0-rmse:105044.53906
[7]	validation_0-rmse:95954.64844
[8]	validation_0-rmse:87982.34375
[9]	validation_0-rmse:80815.25000
[10]	validation_0-rmse:74429.23438
[11]	validation_0-rmse:68550.01562
[12]	validation_0-rmse:63485.14453
[13]	validation_0-rmse:58950.50391
[14]	validation_0-rmse:55040.39844
[15]	validation_0-rmse:51496.84375
[16]	validation_0-rmse:47912.87109
[17]	validation_0-rmse:44691.46875
[18]	validation_0-rmse:41872.16406
[19]	validation_0-rmse:39719.75000
[20]	validation_0-rmse:37909.73828
[21]	validation_0-rmse:36327.73438
[22]	validation_0-rmse:35078.70703
[23]	validation_0-rmse:34017.23828
[24]	validation_0-rmse:32910.70312
[25]	validation_0-rmse:32074.46484
[26]	validation_0-rmse:31103

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.1, max_delta_step=0, max_depth=6,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=100, n_jobs=0, num_parallel_tree=1,
             objective='reg:squarederror', random_state=0, reg_alpha=0,
             reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',
             validate_parameters=1, verbosity=None)

In [69]:
predictions = model.predict(test_X)
mean_absolute_error(predictions, test_y)

17325.788955479453

# Save the model

In [70]:
model.save_model('model-housing-prices')

# Load the model

In [71]:
loaded_model = XGBRegressor()
loaded_model.load_model('model-housing-prices')

# Predict

In [72]:
predictions = loaded_model.predict(test_X)
mean_absolute_error(predictions, test_y)

17325.788955479453

In [73]:
test_X.head(1)

Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,...,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold
736,737,90,60.0,8544,3,4,1950,1950,0.0,0,...,400,0,0,0,0,0,0,0,7,2006


In [74]:
loaded_model.predict(test_X.head(1))

array([86808.45], dtype=float32)

In [75]:
test_y.head(1)

736    93500
Name: SalePrice, dtype: int64

# Print as JSON

In [79]:
print(test_X.head(1).to_json(orient="records", indent=4))

[
    {
        "Id":737,
        "MSSubClass":90,
        "LotFrontage":60.0,
        "LotArea":8544,
        "OverallQual":3,
        "OverallCond":4,
        "YearBuilt":1950,
        "YearRemodAdd":1950,
        "MasVnrArea":0.0,
        "BsmtFinSF1":0,
        "BsmtFinSF2":0,
        "BsmtUnfSF":0,
        "TotalBsmtSF":0,
        "1stFlrSF":1040,
        "2ndFlrSF":0,
        "LowQualFinSF":0,
        "GrLivArea":1040,
        "BsmtFullBath":0,
        "BsmtHalfBath":0,
        "FullBath":2,
        "HalfBath":0,
        "BedroomAbvGr":2,
        "KitchenAbvGr":2,
        "TotRmsAbvGrd":6,
        "Fireplaces":0,
        "GarageYrBlt":1949.0,
        "GarageCars":2,
        "GarageArea":400,
        "WoodDeckSF":0,
        "OpenPorchSF":0,
        "EnclosedPorch":0,
        "3SsnPorch":0,
        "ScreenPorch":0,
        "PoolArea":0,
        "MiscVal":0,
        "MoSold":7,
        "YrSold":2006
    }
]
