In [None]:
# Copyright 2019 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

# Train and deploy Xgboost (Scikit-learn) on Kubeflow from Notebooks

This notebook introduces you the usage of Kubeflow Fairing to train and deploy a model to Kubeflow on Google Kubernetes Engine (GKE), and Google Cloud AI Platform training. This notebook demonstrate how to:
 
* Train an XGBoost model in a local notebook,
* Use Kubeflow Fairing to train an XGBoost model remotely on Kubeflow cluster,
* Use Kubeflow Fairing to train an XGBoost model remotely on AI Platform training,
* Use Kubeflow Fairing to deploy a trained model to Kubeflow, and Call the deployed endpoint for predictions.

**You need Python 3.6 to use Kubeflow Fairing.**

## Setups

* Pre-conditions
    - Deployed a kubeflow cluster through https://deploy.kubeflow.cloud/
    - Have the following environment variable ready: 
        - PROJECT_ID # project host the kubeflow cluster or for running AI platform training
        - DEPLOYMENT_NAME # kubeflow deployment name, the same the cluster name after delpoyed
        - GCP_BUCKET # google cloud storage bucket

* Create service account
```bash
export SA_NAME = [service account name]
gcloud iam service-accounts create ${SA_NAME}
gcloud projects add-iam-policy-binding ${PROJECT_ID} \
    --member serviceAccount:${SA_NAME}@${PROJECT_ID}.iam.gserviceaccount.com \
    --role 'roles/editor'
gcloud iam service-accounts keys create ~/key.json \
    --iam-account ${SA_NAME}@${PROJECT_ID}.iam.gserviceaccount.com
```

* Authorize for Source Repository
```bash
gcloud auth configure-docker
```

* Update local kubeconfig (for submiting job to kubeflow cluster)
```bash
export CLUSTER_NAME=${DEPLOYMENT_NAME} # this is the deployment name or the kubenete cluster name
export ZONE=us-central1-c
gcloud container clusters get-credentials ${CLUSTER_NAME} --region ${ZONE}
```

* Set the environmental variable: GOOGLE_APPLICATION_CREDENTIALS
```bash
export GOOGLE_APPLICATION_CREDENTIALS = ....
```
```python
os.environ['GOOGLE_APPLICATION_CREDENTIALS']=...
```

* Install the lastest version of fairing
```python
pip install git+https://github.com/kubeflow/fairing@master
```

* Upload training file
```bash
# upload the train.csv to GCS bucket that can be accessed from both CMLE and Kubeflow cluster
gsutil cp ./train.csv ${GCP_Bucket}/train.csv
```

**Please not that the above configuration is required for notebook service running outside Kubeflow environment. And the examples demonstrated in the notebook is fully tested on notebook service outside Kubeflow cluster also.**

**The environemt variables, e.g. service account, projects and etc, should have been pre-configured while setting up the cluster.**

## Set up your notebook for training an XGBoost model

Import the libraries required to train this model.

In [4]:
# ! pip3 install joblib
# ! pip3 install pandas scikit-learn xgboost

In [1]:
import argparse
import logging
import joblib
import sys
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from xgboost import XGBClassifier

In [2]:
logging.basicConfig(format='%(message)s')
logging.getLogger().setLevel(logging.INFO)

In [9]:
import os
import fairing

# Setting up google container repositories (GCR) for storing output containers
# You can use any docker container registry istead of GCR
# For local notebook, GCP_PROJECT should be set explicitly
GCP_PROJECT = fairing.cloud.gcp.guess_project_name()
GCP_Bucket ='gs://kubeflow-trykube/'
print(GCP_PROJECT)
# This is for local notebook instead of that in kubeflow cluster
# os.environ['GOOGLE_APPLICATION_CREDENTIALS']=

trykube-248403


## Define the model logic

### Define a function to split the input file into training and testing datasets.

In [4]:
def gcs_copy(src_path, dst_path):
    import subprocess
    print(subprocess.run(['gsutil', 'cp', src_path, dst_path], stdout=subprocess.PIPE).stdout[:-1].decode('utf-8'))
    
def gcs_download(src_path, file_name):
    import subprocess
    print(subprocess.run(['gsutil', 'cp', src_path, file_name], stdout=subprocess.PIPE).stdout[:-1].decode('utf-8'))

In [20]:
gcs_copy('train_fraud.csv', GCP_Bucket + "train_fraud.csv")




In [5]:
def read_input(source_path, test_size=0.25):
    """Read input data and split it into train and test."""
    
    file_name = source_path.split('/')[-1]
    gcs_download(source_path, file_name)
    data = pd.read_csv(file_name)
    data.dropna(axis=0, inplace=True)

    y = data.Class
    X = data.drop(['Class', 'Amount', 'Time'], axis=1).select_dtypes(exclude=['object'])

    train_X, test_X, train_y, test_y = train_test_split(X.values,
                                                      y.values,
                                                      test_size=test_size,
                                                      shuffle=True)

    imputer = SimpleImputer()
    train_X = imputer.fit_transform(train_X)
    test_X = imputer.transform(test_X)

    return (train_X, train_y), (test_X, test_y)

### Define functions to train, evaluate, and save the trained model.

In [6]:
def train_model(train_X,
                train_y,
                test_X,
                test_y,
                n_estimators,
                learning_rate):
    """Train the model using XGBRegressor."""
    model = XGBClassifier(n_estimators=n_estimators, learning_rate=learning_rate)

    model.fit(train_X,
            train_y,
            early_stopping_rounds=40,
            eval_set=[(test_X, test_y)])

    print("Best loss on eval: %.2f with %d rounds",
               model.best_score,
               model.best_iteration+1)
    return model

def eval_model(model, test_X, test_y):
    """Evaluate the model performance."""
    predictions = model.predict_proba(test_X)
    logging.info("auc=%.2f", roc_auc_score(test_y, predictions[:,1]))

def save_model(model, model_file):
    """Save XGBoost model for serving."""
    joblib.dump(model, model_file)
    gcs_copy(model_file, GCP_Bucket + model_file)
    logging.info("Model export success: %s", model_file)

### Define a class for your model, with methods for training and prediction.

In [7]:
class FraudServe(object):
    
    def __init__(self):
        self.train_input = GCP_Bucket + "train_fraud.csv"
        self.n_estimators = 50
        self.learning_rate = 0.1
        self.model_file = "trained_fraud_model.joblib"
        self.model = None

    def train(self):
        (train_X, train_y), (test_X, test_y) = read_input(self.train_input)
        model = train_model(train_X,
                          train_y,
                          test_X,
                          test_y,
                          self.n_estimators,
                          self.learning_rate)

        eval_model(model, test_X, test_y)
        save_model(model, self.model_file)

    def predict(self, X, feature_names):
        """Predict using the model for given ndarray."""
        if not self.model:
            self.model = joblib.load(self.model_file)
        # Do any preprocessing
        prediction = self.model.predict(data=X)
        # Do any postprocessing
        return [[prediction.item(0), prediction.item(0)]]

## Train an XGBoost model in a notebook

Call `FraudServe().train()` to train your model, and then evaluate and save your trained model.

In [8]:
FraudServe().train()


[0]	validation_0-error:0.034853
Will train until validation_0-error hasn't improved in 40 rounds.
[1]	validation_0-error:0.029491
[2]	validation_0-error:0.032172
[3]	validation_0-error:0.032172
[4]	validation_0-error:0.032172
[5]	validation_0-error:0.029491
[6]	validation_0-error:0.029491
[7]	validation_0-error:0.032172
[8]	validation_0-error:0.029491
[9]	validation_0-error:0.029491
[10]	validation_0-error:0.029491
[11]	validation_0-error:0.029491
[12]	validation_0-error:0.02681
[13]	validation_0-error:0.024129
[14]	validation_0-error:0.021448
[15]	validation_0-error:0.024129
[16]	validation_0-error:0.021448
[17]	validation_0-error:0.021448
[18]	validation_0-error:0.021448
[19]	validation_0-error:0.021448
[20]	validation_0-error:0.021448
[21]	validation_0-error:0.021448
[22]	validation_0-error:0.021448
[23]	validation_0-error:0.021448
[24]	validation_0-error:0.021448
[25]	validation_0-error:0.021448
[26]	validation_0-error:0.021448
[27]	validation_0-error:0.021448
[28]	validation_0-er

auc=0.99
Model export success: trained_fraud_model.joblib





## Make Use of Fairing

### Spicify a image registry that will hold the image built by fairing

In [15]:
# In this demo, I use gsutil, therefore i compile a special image to install GoogleCloudSDK as based image
base_image = 'gcr.io/{}/fairing-predict-example:latest'.format(GCP_PROJECT)
# !docker build --build-arg PY_VERSION=3.6.4 . -t {base_image}
# !docker push {base_image}

In [16]:
DOCKER_REGISTRY = 'gcr.io/{}/fairing-job-xgboost'.format(GCP_PROJECT)
BASE_IMAGE = base_image

### Train an XGBoost model remotely on Kubeflow

Import the `TrainJob` and `GKEBackend` classes. Kubeflow Fairing packages the `FraudServe` class, the training data, and the training job's software prerequisites as a Docker image. Then Kubeflow Fairing deploys and runs the training job on Kubeflow.

In [18]:
from fairing import TrainJob
from fairing.backends import KubeflowGKEBackend

train_job = TrainJob(FraudServe, BASE_IMAGE, input_files=["requirements.txt"],
                     docker_registry=DOCKER_REGISTRY, backend=KubeflowGKEBackend())
train_job.submit()

Using preprocessor: <class 'fairing.preprocessors.function.FunctionPreProcessor'>
Using docker registry: gcr.io/trykube-248403/fairing-job-xgboost
Using builder: <class 'fairing.builders.cluster.cluster.ClusterBuilder'>
/opt/conda/lib/python3.6/site-packages/fairing/__init__.py already exists in Fairing context, skipping...
/opt/conda/lib/python3.6/site-packages/fairing/__init__.py already exists in Fairing context, skipping...
Waiting for fairing-builder-27sbv to start...
Waiting for fairing-builder-27sbv to start...
Waiting for fairing-builder-27sbv to start...
Pod started running True


[36mINFO[0m[0000] Downloading base image gcr.io/trykube-248403/fairing-predict-example:latest
[36mINFO[0m[0001] Executing 0 build triggers
[36mINFO[0m[0001] Unpacking rootfs as cmd RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi requires it.
[36mINFO[0m[0049] Taking snapshot of full filesystem...
[36mINFO[0m[0064] Skipping paths under /dev, as it is a whitelisted directory
[36mINFO[0m[0064] Skipping paths under /etc/secrets, as it is a whitelisted directory
[36mINFO[0m[0064] Skipping paths under /kaniko, as it is a whitelisted directory
[36mINFO[0m[0064] Skipping paths under /proc, as it is a whitelisted directory
[36mINFO[0m[0064] Skipping paths under /sys, as it is a whitelisted directory
[36mINFO[0m[0064] Skipping paths under /var/run, as it is a whitelisted directory
[36mINFO[0m[0084] WORKDIR /app/
[36mINFO[0m[0084] cmd: workdir
[36mINFO[0m[0084] Changed working directory to /app
[36mINFO[0m[0084] Taking snapshot of ful

[36mINFO[0m[0120] Taking snapshot of full filesystem...
[36mINFO[0m[0120] Skipping paths under /dev, as it is a whitelisted directory
[36mINFO[0m[0120] Skipping paths under /etc/secrets, as it is a whitelisted directory
[36mINFO[0m[0120] Skipping paths under /kaniko, as it is a whitelisted directory
[36mINFO[0m[0120] Skipping paths under /proc, as it is a whitelisted directory
[36mINFO[0m[0120] Skipping paths under /sys, as it is a whitelisted directory
[36mINFO[0m[0121] Skipping paths under /var/run, as it is a whitelisted directory
[36mINFO[0m[0133] Using files from context: [/kaniko/buildcontext/app]
[36mINFO[0m[0133] COPY /app/ /app/
[36mINFO[0m[0133] Taking snapshot of files...
2019/07/31 07:17:19 existing blob: sha256:087a57faf9491b1b82a83e26bc8cc90c90c30e4a4d858b57ddd5b4c2c90095f6
2019/07/31 07:17:19 existing blob: sha256:2eeb5ce9b9240a928b0a799f9f2601027e2c6b7525394ae5c371f124058489d7
2019/07/31 07:17:19 existing blob: sha256:620aea26e85367b08cdf1f6768491fb4

Training job fairing-job-8h6hj launched.
Waiting for fairing-job-8h6hj-xsk28 to start...
Waiting for fairing-job-8h6hj-xsk28 to start...
Waiting for fairing-job-8h6hj-xsk28 to start...
Pod started running True


Copying gs://kubeflow-trykube/train_fraud.csv...
/ [1 files][793.4 KiB/793.4 KiB]                                                
Operation completed over 1 objects/793.4 KiB.

[0]	validation_0-error:0.050938
Will train until validation_0-error hasn't improved in 40 rounds.
[1]	validation_0-error:0.032172
[2]	validation_0-error:0.032172
[3]	validation_0-error:0.032172
[4]	validation_0-error:0.032172
[5]	validation_0-error:0.032172
[6]	validation_0-error:0.032172
[7]	validation_0-error:0.032172
[8]	validation_0-error:0.032172
[9]	validation_0-error:0.032172
[10]	validation_0-error:0.032172
[11]	validation_0-error:0.032172
[12]	validation_0-error:0.032172
[13]	validation_0-error:0.032172
[14]	validation_0-error:0.032172
[15]	validation_0-error:0.032172
[16]	validation_0-error:0.032172
[17]	validation_0-error:0.032172
[18]	validation_0-error:0.032172
[19]	validation_0-error:0.032172
[20]	validation_0-error:0.032172
[21]	validation_0-error:0.032172
[22]	validation_0-error:0.032172
[23]	val

Cleaning up job fairing-job-8h6hj...


### Train an XGBoost model remotely on Cloud ML Engine

Import the `TrainJob` and `GCPManagedBackend` classes. Kubeflow Fairing packages the `FraudServe` class, the training data, and the training job's software prerequisites as a Docker image. Then Kubeflow Fairing deploys and runs the training job on Cloud ML Engine.

In [21]:
from fairing import TrainJob
from fairing.backends import GCPManagedBackend
train_job = TrainJob(FraudServe, BASE_IMAGE, input_files=["requirements.txt"],
                     docker_registry=DOCKER_REGISTRY, backend=GCPManagedBackend())
train_job.submit()

Using preprocessor: <class 'fairing.preprocessors.function.FunctionPreProcessor'>
Using docker registry: gcr.io/trykube-248403/fairing-job-xgboost
Using builder: <class 'fairing.builders.cluster.cluster.ClusterBuilder'>
/opt/conda/lib/python3.6/site-packages/fairing/__init__.py already exists in Fairing context, skipping...
/opt/conda/lib/python3.6/site-packages/fairing/__init__.py already exists in Fairing context, skipping...
Waiting for fairing-builder-6d4kb to start...
Waiting for fairing-builder-6d4kb to start...
Pod started running True


[36mINFO[0m[0000] Downloading base image gcr.io/trykube-248403/fairing-predict-example:latest
[36mINFO[0m[0002] Executing 0 build triggers
[36mINFO[0m[0002] Unpacking rootfs as cmd RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi requires it.
[36mINFO[0m[0051] Taking snapshot of full filesystem...
[36mINFO[0m[0067] Skipping paths under /dev, as it is a whitelisted directory
[36mINFO[0m[0067] Skipping paths under /etc/secrets, as it is a whitelisted directory
[36mINFO[0m[0067] Skipping paths under /kaniko, as it is a whitelisted directory
[36mINFO[0m[0067] Skipping paths under /proc, as it is a whitelisted directory
[36mINFO[0m[0067] Skipping paths under /sys, as it is a whitelisted directory
[36mINFO[0m[0068] Skipping paths under /var/run, as it is a whitelisted directory
[36mINFO[0m[0087] WORKDIR /app/
[36mINFO[0m[0087] cmd: workdir
[36mINFO[0m[0087] Changed working directory to /app
[36mINFO[0m[0087] Taking snapshot of ful

[36mINFO[0m[0122] Taking snapshot of full filesystem...
[36mINFO[0m[0122] Skipping paths under /dev, as it is a whitelisted directory
[36mINFO[0m[0122] Skipping paths under /etc/secrets, as it is a whitelisted directory
[36mINFO[0m[0122] Skipping paths under /kaniko, as it is a whitelisted directory
[36mINFO[0m[0122] Skipping paths under /proc, as it is a whitelisted directory
[36mINFO[0m[0122] Skipping paths under /sys, as it is a whitelisted directory
[36mINFO[0m[0123] Skipping paths under /var/run, as it is a whitelisted directory
[36mINFO[0m[0135] Using files from context: [/kaniko/buildcontext/app]
[36mINFO[0m[0135] COPY /app/ /app/
[36mINFO[0m[0135] Taking snapshot of files...
2019/07/31 07:45:01 existing blob: sha256:620aea26e85367b08cdf1f6768491fb44df6a2a71f7d663f835b1692e849c3ee
2019/07/31 07:45:01 existing blob: sha256:5d71636fb824265e30ff34bf20737c9cdc4f5af28b6bce86f08215c55b89bfab
2019/07/31 07:45:01 existing blob: sha256:687ed2fb2a0d7da5503478759fd00c23

file_cache is unavailable when using oauth2client >= 4.0.0 or google-auth
Traceback (most recent call last):
  File "/opt/conda/lib/python3.6/site-packages/googleapiclient/discovery_cache/__init__.py", line 36, in autodetect
    from google.appengine.api import memcache
ModuleNotFoundError: No module named 'google.appengine'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.6/site-packages/googleapiclient/discovery_cache/file_cache.py", line 33, in <module>
    from oauth2client.contrib.locked_file import LockedFile
ModuleNotFoundError: No module named 'oauth2client.contrib.locked_file'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.6/site-packages/googleapiclient/discovery_cache/file_cache.py", line 37, in <module>
    from oauth2client.locked_file import LockedFile
ModuleNotFoundError: No module named 'oauth2cli

Creating training job with the following options: {'jobId': 'fairing_job_d8c7ed8f', 'trainingInput': {'scaleTier': 'BASIC', 'masterConfig': {'imageUri': 'gcr.io/trykube-248403/fairing-job-xgboost/fairing-job:FA205E89'}, 'region': 'us-central1'}}
Job submitted successfully.
Access job logs at the following URL:
https://console.cloud.google.com/mlengine/jobs/fairing_job_d8c7ed8f?project=trykube-248403


### Deploy the trained model to Kubeflow for predictions

Import the `PredictionEndpoint` and `KubeflowGKEBackend` classes. Kubeflow Fairing packages the `FraudServe` class, the trained model, and the prediction endpoint's software prerequisites as a Docker image. Then Kubeflow Fairing deploys and runs the prediction endpoint on Kubeflow.

This part only works for fairing version >=0.5.2

In [22]:
from fairing import PredictionEndpoint
from fairing.backends import KubeflowGKEBackend
# The trained_ames_model.joblib is exported during the above local training
endpoint = PredictionEndpoint(FraudServe, BASE_IMAGE, input_files=['trained_fraud_model.joblib', "requirements.txt"],
                              docker_registry=DOCKER_REGISTRY, backend=KubeflowGKEBackend())
endpoint.create()

Using preprocessor: <class 'fairing.preprocessors.function.FunctionPreProcessor'>
Using docker registry: gcr.io/trykube-248403/fairing-job-xgboost
Using builder: <class 'fairing.builders.cluster.cluster.ClusterBuilder'>
/opt/conda/lib/python3.6/site-packages/fairing/__init__.py already exists in Fairing context, skipping...
/opt/conda/lib/python3.6/site-packages/fairing/__init__.py already exists in Fairing context, skipping...
Waiting for fairing-builder-qtxch to start...
Waiting for fairing-builder-qtxch to start...
Waiting for fairing-builder-qtxch to start...
Pod started running True


[36mINFO[0m[0000] Downloading base image gcr.io/trykube-248403/fairing-predict-example:latest
[36mINFO[0m[0002] Executing 0 build triggers
[36mINFO[0m[0002] Unpacking rootfs as cmd RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi requires it.
[36mINFO[0m[0067] Taking snapshot of full filesystem...
[36mINFO[0m[0079] Skipping paths under /dev, as it is a whitelisted directory
[36mINFO[0m[0079] Skipping paths under /etc/secrets, as it is a whitelisted directory
[36mINFO[0m[0079] Skipping paths under /kaniko, as it is a whitelisted directory
[36mINFO[0m[0079] Skipping paths under /proc, as it is a whitelisted directory
[36mINFO[0m[0079] Skipping paths under /sys, as it is a whitelisted directory
[36mINFO[0m[0079] Skipping paths under /var/run, as it is a whitelisted directory
[36mINFO[0m[0098] WORKDIR /app/
[36mINFO[0m[0098] cmd: workdir
[36mINFO[0m[0098] Changed working directory to /app
[36mINFO[0m[0098] Taking snapshot of ful

[36mINFO[0m[0134] Taking snapshot of full filesystem...
[36mINFO[0m[0134] Skipping paths under /dev, as it is a whitelisted directory
[36mINFO[0m[0134] Skipping paths under /etc/secrets, as it is a whitelisted directory
[36mINFO[0m[0134] Skipping paths under /kaniko, as it is a whitelisted directory
[36mINFO[0m[0134] Skipping paths under /proc, as it is a whitelisted directory
[36mINFO[0m[0134] Skipping paths under /sys, as it is a whitelisted directory
[36mINFO[0m[0134] Skipping paths under /var/run, as it is a whitelisted directory
[36mINFO[0m[0147] Using files from context: [/kaniko/buildcontext/app]
[36mINFO[0m[0147] COPY /app/ /app/
[36mINFO[0m[0147] Taking snapshot of files...
2019/07/31 07:51:44 existing blob: sha256:b251d940695c89cba7645ba18409d57bc50de2b1ae3500e7c7645b3ef8b81c3a
2019/07/31 07:51:44 existing blob: sha256:0c1db95989906f161007d8ef2a6ef6e0ec64bc15bf2c993fd002edbdfc7aa7df
2019/07/31 07:51:44 existing blob: sha256:c60eba308238780085602c72a69337c6

Endpoint fairing-deployer-szbzp launched.


Waiting for prediction endpoint to come up...


Prediction endpoint: http://35.229.188.112:5000/predict


### Deploy to GCP

In [None]:
# Deploy model to gcp
# from fairing.deployers.gcp.gcpserving import GCPServingDeployer
# deployer = GCPServingDeployer()
# deployer.deploy(VERSION_DIR, MODEL_NAME, VERSION_NAME)

### Call the prediction endpoint

Create a test dataset, then call the endpoint on Kubeflow for predictions.

In [23]:
(train_X, train_y), (test_X, test_y) = read_input(GCP_Bucket + "train_fraud.csv")
endpoint.predict_nparray(test_X)

{"data":{"names":["t:0","t:1"],"tensor":{"shape":[1,2],"values":[0.0,0.0]}},"meta":{}}






### Clean up the prediction endpoint

Delete the prediction endpoint created by this notebook.

In [24]:
endpoint.delete()

Deleted service: kubeflow/fairing-service-hcd5h
Deleted deployment: kubeflow/fairing-deployer-szbzp
