## A simple regression training using LightGBM through Fairing

In [6]:
import os
from time import gmtime, strftime
import fairing
from fairing.frameworks import lightgbm

# Setting up google container repositories (GCR) for storing output containers
# You can use any docker container registry istead of GCR
GCP_PROJECT = fairing.cloud.gcp.guess_project_name()
DOCKER_REGISTRY = 'gcr.io/{}/fairing-job'.format(GCP_PROJECT)

## Launch a LightGBM train task

In [7]:
# Creating a bucket for copying the trained model. 
# You can set gcs_bucket variable to an existing bucket name if that is desired.
gcs_bucket = "gs://{}-fairing".format(GCP_PROJECT)
!gsutil mb {gcs_bucket}

Creating gs://caip-dexter-bugbash-fairing/...
ServiceException: 409 Bucket caip-dexter-bugbash-fairing already exists.


In [8]:
params = {
    'task': 'train',
    'boosting_type': 'gbdt',
    'objective': 'regression',
    'metric': 'l2',
    'metric_freq': 1,
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    "n_estimators": 10,
    "is_training_metric": "true",
    "valid_data": "gs://fairing-lightgbm/regression-example/regression.test",
    "train_data": "gs://fairing-lightgbm/regression-example/regression.train",
    'verbose': 1,
    "verbose_eval": 1,
    "model_output": "{}/lightgbm/example/model_{}.txt".format(gcs_bucket, strftime("%Y_%m_%d_%H_%M_%S", gmtime())),
    "num_machines": 3,
    "tree_learner": "feature"

}

In [5]:
lightgbm.execute(config=params,
                          docker_registry=DOCKER_REGISTRY,
                          cores_per_worker=2, # Allocating 2 CPU cores per worker instance
                          memory_per_worker=0.5, # Allocating 0.5GB of memory per worker instance
                          stream_log=True)

Ignoring machine_list_file filed in the config
Building image using Append builder...
Creating docker context: /tmp/fairing_context_3vapkvxx
Loading Docker credentials for repository 'gcr.io/kubeflow-fairing/lightgbm:latest'
Invoking 'docker-credential-gcloud' to obtain Docker credentials.
Successfully obtained Docker credentials.
Image successfully built in 2.396838453991222s.
Pushing image gcr.io/caip-dexter-bugbash/fairing-job/fairing-job:478CB1C6...
Loading Docker credentials for repository 'gcr.io/caip-dexter-bugbash/fairing-job/fairing-job:478CB1C6'
Invoking 'docker-credential-gcloud' to obtain Docker credentials.
Successfully obtained Docker credentials.
Uploading gcr.io/caip-dexter-bugbash/fairing-job/fairing-job:478CB1C6
Layer sha256:5f79c90f8d7c1e4b72ecfb1ef6b6076ab0033fbdf3c67ec648a5f389c6a9fe2c exists, skipping
Layer sha256:472a6afc63327632ba525d83814cd962d55ac3b8b3cd3e820ae311f05954cfe9 exists, skipping
Layer sha256:4250b3117dca5e14edc32ebf1366cd54e4cda91f17610b76c504a8691

Cluster setup:
fairing-tfjob-q2trr-chief-0.kubeflow.svc	10.32.22.3	2222
fairing-tfjob-q2trr-worker-0.kubeflow.svc	10.32.0.183	2222
fairing-tfjob-q2trr-worker-1.kubeflow.svc	10.32.0.184	2222
Copying gs://fairing-lightgbm/regression-example/regression.train.weight...
/ [1 files][131.6 KiB/131.6 KiB]                                                
Operation completed over 1 objects/131.6 KiB.
Copying gs://fairing-lightgbm/regression-example/regression.train...
/ [1 files][  1.2 MiB/  1.2 MiB]                                                
Operation completed over 1 objects/1.2 MiB.
Copying gs://fairing-lightgbm/regression-example/regression.test...
/ [1 files][ 85.7 KiB/ 85.7 KiB]                                                
Operation completed over 1 objects/85.7 KiB.
All files are copied!
[LightGBM] [Info] Finished loading parameters
[LightGBM] [Info] Trying to bind port 2222...
[LightGBM] [Info] Binding port 2222 succeeded
[LightGBM] [Info] Listening...
[LightGBM] [Info] Connected 

<fairing.deployers.tfjob.tfjob.TfJob at 0x122e895f8>

## Let's look at the trained model

In [None]:
url = params['model_output']
model_name = os.path.split(url)[1]
!gsutil cp {url} /tmp/{model_name}
!head /tmp/{model_name}

## Runnig a prediction task using the trained model

In [None]:
predict_params = {
    "task": "predict",
    'metric': 'l2',
    "data": "gs://fairing-lightgbm/regression-example/regression.test",
    "input_model": params['model_output'],
    "output_result": "{}/lightgbm/example/prediction_result_{}.txt".format(gcs_bucket, model_name)
}

In [None]:
lightgbm.execute(config=predict_params, docker_registry=DOCKER_REGISTRY)

In [None]:
url = predict_params['output_result']
file_name = os.path.split(url)[1]
!gsutil cp {url} /tmp/{file_name}

In [None]:
import pandas as pd
predictions = pd.read_csv("/tmp/{}".format(file_name), header=None)
print("Prediction mean: {}, count: {}".format(predictions.mean()[0], predictions.count()[0]))