In [1]:
# nuclio: ignore
import nuclio

In [2]:
import os
import pandas as pd
from mlrun.datastore import DataItem
from mlrun.artifacts import get_model
from cloudpickle import load

from mlutils import eval_class_model, log_model

from urllib.request import urlopen

import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)

In [3]:
def _load_xgb_model(models_path: DataItem, default_model="model.pkl"):
    """hack function - load a pickled xgb model
    
    :param models_path:    local, url, or artifact
    :param default_model:  default file name, currently a pickle
    """
    try:
        model_file, model_obj, _ = get_model(models_path.url, suffix=".pkl")
        model_obj = load(open(model_file, "rb"))
    except ValueError as va: 
        model_obj = load(urlopen(os.path.join(str(models_path), default_model)))
    except Exception as a:
        raise Exception("model location likely misspecified")
    
    return model_obj

In [4]:
def test_model(
    context,
    models_path: DataItem, 
    test_set: DataItem,
    label_column: str,
    plots_dest: str = "plots",
    results_key: str = "xgb-test-results",
    default_model: str = "model.pkl"
) -> None:
    """Test one or more classifier models against held-out dataset
    
    Using held-out test features, evaluates the peformance of the estimated model
    
    Can be part of a kubeflow pipeline as a test step that is run post EDA and 
    training/validation cycles
    
    Calls `log_model` storing only results and pointer to model artifact used
    to generate results
    
    :param context:         the function context
    :param models_path:     model artifact to be tested
    :param test_set:        test features and labels
    :param label_column:    column name for ground truth labels
    :param plots_dest:      destination for test plots
    :param results_key:     (xgb-test-results) logged results key
    :param default_model:   (model.pkl), default model artifact file name
    """
    xtest = test_set.as_df()
    ytest = xtest.pop(label_column)
        
    model = _load_xgb_model(models_path, default_model)
    
    eval_metrics = eval_class_model(context, xtest, ytest, model)
    
    log_model(context, None, eval_metrics, plots_dest, None)

In [5]:
# nuclio: end-code

### xgboost tester setup

In [6]:
gpus = False # if gpu inference is enabled or not, not implemented yet

In [7]:
task_params = {
    "name" : "tasks xgb test",
    "params": {
        "label_column"  : "labels",
        "plots_dest"    : "plots/xgb_test"}}

**note**  remember to use the held-out test set produced during the model training pipeline.

In [12]:
TEST_REPO = "https://raw.githubusercontent.com/yjb-ds/testdata/master"
DATA_PATH  = "models/xgb_test/test-set.parquet"
MODELS_PATH = "models/xgb_test"

### run locally

In [13]:
from mlrun import run_local, NewTask, mlconf

run = run_local(NewTask(**task_params),
                handler=test_model,
                inputs={"test_set"      : f"{TEST_REPO}/{DATA_PATH}",
                        "models_path"   : f"{TEST_REPO}/{MODELS_PATH}"},
                workdir=mlconf.artifact_path)

[mlrun] 2020-05-27 00:56:31,342 starting run tasks xgb test uid=9b6ef031d5b34f2e9bb7ebfed27e70bf  -> http://mlrun-api:8080
[mlrun] 2020-05-27 00:56:31,529 Traceback (most recent call last):
  File "/User/repos/mlrun/mlrun/runtimes/local.py", line 187, in exec_from_params
    val = handler(*args_list)
  File "<ipython-input-4-643db952fcc4>", line 28, in test_model
    xtest = test_set.as_df()
  File "/User/repos/mlrun/mlrun/datastore/base.py", line 206, in as_df
    df_module=df_module, format=format, **kwargs)
  File "/User/repos/mlrun/mlrun/datastore/base.py", line 123, in as_df
    self.download(self._join(key), tmp)
  File "/User/repos/mlrun/mlrun/datastore/base.py", line 92, in download
    data = self.get(key)
  File "/User/repos/mlrun/mlrun/datastore/base.py", line 279, in get
    data = http_get(self.url + self._join(key), None, self.auth)
  File "/User/repos/mlrun/mlrun/datastore/base.py", line 237, in http_get
    raise OSError('failed to read file in {}'.format(url))
OSError:

failed to read file in https://raw.githubusercontent.com/yjb-ds/testdata/master/models/xgb_test/test-set.parquet


project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
default,...d27e70bf,0,May 27 00:56:31,error,tasks xgb test,v3io_user=adminkind=handlerowner=adminhost=jupyter-f9d6597fd-ns9cj,test_setmodels_path,label_column=labelsplots_dest=plots/xgb_test,,


to track results use .show() or .logs() or in CLI: 
!mlrun get run 9b6ef031d5b34f2e9bb7ebfed27e70bf --project default , !mlrun logs 9b6ef031d5b34f2e9bb7ebfed27e70bf --project default
[mlrun] 2020-05-27 00:56:31,626 run executed, status=error


RunError: failed to read file in https://raw.githubusercontent.com/yjb-ds/testdata/master/models/xgb_test/test-set.parquet

## run remotely

In [10]:
from mlrun import mlconf
from mlutils import create_function

# flatten mlrun's parameter inputs to single json when 
# creating function
fn_params = {
    "name"            : "xgb_test",
    "project"         : "functions",
    "default_handler" : "test_model",
    "desc"            : "test an xgb classifier using held-out or new data",
    "categories"      : ["ml", "test"],
    "labels"          : {"author": "yjb", "framework": "xgboost"},
    "kind"            : "job",
    "image"           : "yjbds/ml-models",
    "commands"        : None,
    "tag"             : mlconf.images_tag}

# there should also be signature which takes task_params and runs: `create_and_run(*)`
fn = create_function(fn_params, path=".")

[mlrun] 2020-05-26 20:40:02,833 function spec saved to path: ./function.yaml


In [11]:
from mlrun import NewTask

run = fn.run(
    NewTask(**task_params),
    inputs={"test_set"      : f"{TEST_REPO}/{DATA_PATH}",
            "models_path"   : f"{TEST_REPO}/{MODELS_PATH}"
        },
    workdir=mlconf.artifact_path)

[mlrun] 2020-05-26 20:40:02,848 starting run tasks xgb test uid=f04b2ee94434454780e007ba5de0f9b7  -> http://mlrun-api:8080
[mlrun] 2020-05-26 20:40:02,965 Job is running in the background, pod: tasks-xgb-test-n6jkf
[mlrun] 2020-05-26 20:40:06,400 starting local run: main.py # test_model
[mlrun] 2020-05-26 20:40:08,282 log artifact calibration curve at /User/artifacts/plots/xgb_test/calibration curve.html, size: 53776, db: Y
[mlrun] 2020-05-26 20:40:08,363 log artifact confusion-matrix-normalized at /User/artifacts/plots/xgb_test/confusion-matrix-normalized.html, size: 19626, db: Y
[mlrun] 2020-05-26 20:40:08,395 log artifact learning curve - erreur at /User/artifacts/plots/xgb_test/learning curve - erreur.html, size: 3282, db: Y
[mlrun] 2020-05-26 20:40:08,429 log artifact feature-importances at /User/artifacts/plots/xgb_test/feature-importances.html, size: 3278, db: Y
[mlrun] 2020-05-26 20:40:08,516 log artifact roc-binary at /User/artifacts/plots/xgb_test/roc-binary.html, size: 33333

project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
default,...5de0f9b7,0,May 26 20:40:07,completed,tasks xgb test,v3io_user=adminkind=jobowner=adminhost=tasks-xgb-test-n6jkf,test_setmodels_path,label_column=labelsplots_dest=plots/xgb_test,test-accuracy=0.964test-error=0.036rocauc=0.9845582259375362brier_score=0.026451627408418558f1-score=0.9552238805970149precision_score=0.964824120603015recall_score=0.9458128078817734,calibration curveconfusion-matrix-normalizedlearning curve - erreurfeature-importancesroc-binaryprecision-recall-binaryfeature-importances-tbl


to track results use .show() or .logs() or in CLI: 
!mlrun get run f04b2ee94434454780e007ba5de0f9b7  , !mlrun logs f04b2ee94434454780e007ba5de0f9b7 
[mlrun] 2020-05-26 20:40:12,154 run executed, status=completed
