In [2]:
!pip install sklearn pandas xgboost

Collecting sklearn
  Downloading https://files.pythonhosted.org/packages/1e/7a/dbb3be0ce9bd5c8b7e3d87328e79063f8b263b2b1bfa4774cb1147bfcd3f/sklearn-0.0.tar.gz
Collecting pandas
  Downloading https://files.pythonhosted.org/packages/f9/e1/4a63ed31e1b1362d40ce845a5735c717a959bda992669468dae3420af2cd/pandas-0.24.0-cp36-cp36m-manylinux1_x86_64.whl (10.1MB)
[K    100% |████████████████████████████████| 10.1MB 60kB/s  eta 0:00:01
[?25hCollecting xgboost
  Downloading https://files.pythonhosted.org/packages/54/21/8b2ec99862903a6d3aed62ce156d21d114b8666e669c46d9e54041df9496/xgboost-0.81-py2.py3-none-manylinux1_x86_64.whl (16.6MB)
[K    100% |████████████████████████████████| 16.6MB 36kB/s  eta 0:00:01
[?25hCollecting scikit-learn (from sklearn)
  Downloading https://files.pythonhosted.org/packages/0d/3a/b92670f5c368c20329ecc4c255993fae7934564d485c3ed7ea7b8da7f741/scikit_learn-0.20.2-cp36-cp36m-manylinux1_x86_64.whl (5.4MB)
[K    100% |████████████████████████████████| 5.4MB 123kB/s eta 0:0

In [8]:
# Copyright 2018 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import logging
import pandas as pd
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Imputer
from xgboost import XGBRegressor
import urllib.request


TRAINING_URL="https://raw.githubusercontent.com/kubeflow/examples/master/xgboost_ames_housing/ames_dataset/train.csv"
TRAINING_FILE="train.csv"

ESTIMATORS=1000
LEARNING_RATE=0.1
TEST_FRACTION_SIZE=0.25
EARLY_STOPPING_ROUNDS=50

class XgBoostModel(object):
    def train(self):
        (train_X, train_y), (test_X, test_y) = read_input()
        model = train_model(train_X,
                                  train_y,
                                  test_X,
                                  test_y,
                                  ESTIMATORS,
                                  LEARNING_RATE)

        eval_model(model, test_X, test_y)

def download(url, file_name):
    with urllib.request.urlopen(url) as response, open(file_name, "wb") as file:
        file.write(response.read())

def read_input(test_size=TEST_FRACTION_SIZE):
  """Read input data and split it into train and test."""
  download(TRAINING_URL, TRAINING_FILE)
  data = pd.read_csv(TRAINING_FILE)
  data.dropna(axis=0, subset=['SalePrice'], inplace=True)

  y = data.SalePrice
  X = data.drop(['SalePrice'], axis=1).select_dtypes(exclude=['object'])

  train_X, test_X, train_y, test_y = train_test_split(X.values,
                                                      y.values,
                                                      test_size=test_size,
                                                      shuffle=False)

  imputer = Imputer()
  train_X = imputer.fit_transform(train_X)
  test_X = imputer.transform(test_X)

  return (train_X, train_y), (test_X, test_y)

def train_model(train_X,
                train_y,
                test_X,
                test_y,
                n_estimators,
                learning_rate):
  """Train the model using XGBRegressor."""
  model = XGBRegressor(n_estimators=n_estimators,
                      learning_rate=learning_rate)

  model.fit(train_X,
            train_y,
            early_stopping_rounds=EARLY_STOPPING_ROUNDS,
            eval_set=[(test_X, test_y)])

  logging.info("Best RMSE on eval: %.2f with %d rounds",
               model.best_score,
               model.best_iteration+1)
  return model

def eval_model(model, test_X, test_y):
  """Evaluate the model performance."""
  predictions = model.predict(test_X)
  logging.info("mean_absolute_error=%.2f", mean_absolute_error(predictions, test_y))




In [6]:
model = XgBoostModel()
model.train()

[0]	validation_0-rmse:177514
Will train until validation_0-rmse hasn't improved in 50 rounds.
[1]	validation_0-rmse:161858
[2]	validation_0-rmse:147237
[3]	validation_0-rmse:134132
[4]	validation_0-rmse:122224
[5]	validation_0-rmse:111538
[6]	validation_0-rmse:102142
[7]	validation_0-rmse:93392.2
[8]	validation_0-rmse:85824.6
[9]	validation_0-rmse:79667.6
[10]	validation_0-rmse:73463.4
[11]	validation_0-rmse:68059.4
[12]	validation_0-rmse:63350.5
[13]	validation_0-rmse:59732.1
[14]	validation_0-rmse:56260.7
[15]	validation_0-rmse:53392.6
[16]	validation_0-rmse:50770.8
[17]	validation_0-rmse:48107.8
[18]	validation_0-rmse:45923.9
[19]	validation_0-rmse:44154.2
[20]	validation_0-rmse:42488.1
[21]	validation_0-rmse:41263.3
[22]	validation_0-rmse:40212.8
[23]	validation_0-rmse:39089.1
[24]	validation_0-rmse:37691.1
[25]	validation_0-rmse:36875.2
[26]	validation_0-rmse:36276.2
[27]	validation_0-rmse:35444.1
[28]	validation_0-rmse:34831.5
[29]	validation_0-rmse:34205.4
[30]	validation_0-rmse



[66]	validation_0-rmse:27550.9
[67]	validation_0-rmse:27519.3
[68]	validation_0-rmse:27482.8
[69]	validation_0-rmse:27333.7
[70]	validation_0-rmse:27276.4
[71]	validation_0-rmse:27270.7
[72]	validation_0-rmse:27243
[73]	validation_0-rmse:27233.8
[74]	validation_0-rmse:27256.9
[75]	validation_0-rmse:27228.5
[76]	validation_0-rmse:27151.1
[77]	validation_0-rmse:27064.4
[78]	validation_0-rmse:27028.6
[79]	validation_0-rmse:27028.6
[80]	validation_0-rmse:27008.6
[81]	validation_0-rmse:27011
[82]	validation_0-rmse:26997
[83]	validation_0-rmse:26933.3
[84]	validation_0-rmse:26920.6
[85]	validation_0-rmse:26917.8
[86]	validation_0-rmse:26903.4
[87]	validation_0-rmse:26885.1
[88]	validation_0-rmse:26858.6
[89]	validation_0-rmse:26859.2
[90]	validation_0-rmse:26773.6
[91]	validation_0-rmse:26760.8
[92]	validation_0-rmse:26714.3
[93]	validation_0-rmse:26702.5
[94]	validation_0-rmse:26692.9
[95]	validation_0-rmse:26669.6
[96]	validation_0-rmse:26607.3
[97]	validation_0-rmse:26591.4
[98]	validatio

In [37]:
import fairing
fairing.config.set_model(model)
fairing.config.run()



[0]	validation_0-rmse:177514
Will train until validation_0-rmse hasn't improved in 50 rounds.
[1]	validation_0-rmse:161858
[2]	validation_0-rmse:147237
[3]	validation_0-rmse:134132
[4]	validation_0-rmse:122224
[5]	validation_0-rmse:111538
[6]	validation_0-rmse:102142
[7]	validation_0-rmse:93392.2
[8]	validation_0-rmse:85824.6
[9]	validation_0-rmse:79667.6
[10]	validation_0-rmse:73463.4
[11]	validation_0-rmse:68059.4
[12]	validation_0-rmse:63350.5
[13]	validation_0-rmse:59732.1
[14]	validation_0-rmse:56260.7
[15]	validation_0-rmse:53392.6
[16]	validation_0-rmse:50770.8
[17]	validation_0-rmse:48107.8
[18]	validation_0-rmse:45923.9
[19]	validation_0-rmse:44154.2
[20]	validation_0-rmse:42488.1
[21]	validation_0-rmse:41263.3
[22]	validation_0-rmse:40212.8
[23]	validation_0-rmse:39089.1
[24]	validation_0-rmse:37691.1
[25]	validation_0-rmse:36875.2
[26]	validation_0-rmse:36276.2
[27]	validation_0-rmse:35444.1
[28]	validation_0-rmse:34831.5
[29]	validation_0-rmse:34205.4
[30]	validation_0-rmse

[0]	validation_0-rmse:177514
Will train until validation_0-rmse hasn't improved in 50 rounds.
[1]	validation_0-rmse:161858
[2]	validation_0-rmse:147237
[3]	validation_0-rmse:134132
[4]	validation_0-rmse:122224
[5]	validation_0-rmse:111538
[6]	validation_0-rmse:102142
[7]	validation_0-rmse:93392.2
[8]	validation_0-rmse:85824.6
[9]	validation_0-rmse:79667.6
[10]	validation_0-rmse:73463.4
[11]	validation_0-rmse:68059.4
[12]	validation_0-rmse:63350.5
[13]	validation_0-rmse:59732.1
[14]	validation_0-rmse:56260.7
[15]	validation_0-rmse:53392.6
[16]	validation_0-rmse:50770.8
[17]	validation_0-rmse:48107.8
[18]	validation_0-rmse:45923.9
[19]	validation_0-rmse:44154.2
[20]	validation_0-rmse:42488.1
[21]	validation_0-rmse:41263.3
[22]	validation_0-rmse:40212.8
[23]	validation_0-rmse:39089.1
[24]	validation_0-rmse:37691.1
[25]	validation_0-rmse:36875.2
[26]	validation_0-rmse:36276.2
[27]	validation_0-rmse:35444.1
[28]	validation_0-rmse:34831.5
[29]	validation_0-rmse:34205.4
[30]	validation_0-rmse

