# CoxPH tests

In [11]:
# nuclio: ignore
import nuclio

In [12]:
import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)

In [13]:
import os
import pandas as pd
from mlrun.datastore import DataItem
from mlrun.artifacts import get_model
from cloudpickle import load
from mlrun.mlutils import eval_class_model

def cox_test(
    context,
    models_path: DataItem, 
    test_set: DataItem,
    label_column: str,
    plots_dest: str = "plots",
    model_evaluator = None
) -> None:
    """Test one or more classifier models against held-out dataset
    
    Using held-out test features, evaluates the peformance of the estimated model
    
    Can be part of a kubeflow pipeline as a test step that is run post EDA and 
    training/validation cycles
    
    :param context:         the function context
    :param model_file:      model artifact to be tested
    :param test_set:        test features and labels
    :param label_column:    column name for ground truth labels
    :param score_method:    for multiclass classification
    :param plots_dest:      dir for test plots
    :param model_evaluator: WIP: specific method to generate eval, passed in as string
                            or available in this folder
    """  
    xtest = test_set.as_df()
    ytest = xtest.pop(label_column)
    
    model_file, model_obj, _ = get_model(models_path.url, suffix='.pkl')
    model_obj = load(open(str(model_file), "rb"))

    try:
        # there could be different eval_models, type of model (xgboost, tfv1, tfv2...)
        if not model_evaluator:
            # binary and multiclass
            eval_metrics = eval_class_model(context, xtest, ytest, model_obj)

        # just do this inside log_model?
        model_plots = eval_metrics.pop("plots")
        model_tables = eval_metrics.pop("tables")
        for plot in model_plots:
            context.log_artifact(plot, local_path=f"{plots_dest}/{plot.key}.html")
        for tbl in model_tables:
            context.log_artifact(tbl, local_path=f"{plots_dest}/{plot.key}.csv")

        context.log_results(eval_metrics)
    except:
        #dummy log:
        context.log_dataset("cox-test-summary", df=model_obj.summary, index=True, format="csv")
        context.logger.info("cox tester not implemented")

In [14]:
# nuclio: end-code

### save

In [15]:
from mlrun import code_to_function
from mlrun.platforms.other import auto_mount

gpus = False

fn_params = {
    "name"        : "coxph_test",
    "handler"     : "cox_test",
    "kind"        : "job",
    "image"       : "mlrun/ml-models" if not gpus else "mlrun/ml-models-gpu",
    "description" : "test cox proportional hazards model",
    "categories"  : ["ml", "test"],
    "labels"      : {"author": "yjb", "framework": "survival"}
}

fn = code_to_function(**fn_params)

fn.export("function.yaml")
fn.apply(auto_mount())

[mlrun] 2020-06-14 13:09:47,448 function spec saved to path: function.yaml


<mlrun.runtimes.kubejob.KubejobRuntime at 0x7f98e1d386a0>

In [16]:
task_params = {
    "name" : "tasks cox test",
    "params": {
        "label_column"  : "labels",
        "plots_dest"    : "churn/test/plots"}}

In [17]:
DATA_URL = "https://raw.githubusercontent.com/yjb-ds/testdata/master/demos/churn/churn-tests.csv"

In [19]:
from mlrun import run_local, NewTask, mlconf

run = run_local(NewTask(**task_params),
                handler=cox_test,
                inputs={"test_set": DATA_URL,
                        "models_path"   : "models/cox"},
               workdir=mlconf.artifact_path+"/churn")

[mlrun] 2020-06-14 13:09:54,707 starting run tasks cox test uid=6284f5f5e6d14a969ba81addcfaf200f  -> http://mlrun-api:8080
[mlrun] 2020-06-14 13:09:55,249 log artifact cox-test-summary at /User/artifacts/cox-test-summary.csv, size: 3395, db: Y
[mlrun] 2020-06-14 13:09:55,250 cox tester not implemented



project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
default,...cfaf200f,0,Jun 14 13:09:54,completed,tasks cox test,v3io_user=adminkind=handlerowner=adminhost=jupyter-7b44c8d958-kklf7,test_setmodels_path,label_column=labelsplots_dest=churn/test/plots,,cox-test-summary


to track results use .show() or .logs() or in CLI: 
!mlrun get run 6284f5f5e6d14a969ba81addcfaf200f --project default , !mlrun logs 6284f5f5e6d14a969ba81addcfaf200f --project default
[mlrun] 2020-06-14 13:09:55,307 run executed, status=completed


In [20]:
run = fn.run(
    NewTask(**task_params),
    inputs={
        "test_set"    : DATA_URL,
        "models_path" : "models/cox"},
    workdir=os.path.join(mlconf.artifact_path, "churn"))

[mlrun] 2020-06-14 13:10:09,843 starting run tasks cox test uid=d17f26d2e17941adaacae38726830ef8  -> http://mlrun-api:8080
[mlrun] 2020-06-14 13:10:09,943 Job is running in the background, pod: tasks-cox-test-wzpd8
[mlrun] 2020-06-14 13:10:13,389 starting local run: main.py # cox_test
[mlrun] 2020-06-14 13:10:14,336 log artifact cox-test-summary at /User/artifacts/cox-test-summary.csv, size: 3395, db: Y
[mlrun] 2020-06-14 13:10:14,336 cox tester not implemented

[mlrun] 2020-06-14 13:10:14,362 run executed, status=completed
final state: succeeded


project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
default,...26830ef8,0,Jun 14 13:10:14,completed,tasks cox test,v3io_user=adminkind=jobowner=adminhost=tasks-cox-test-wzpd8,test_setmodels_path,label_column=labelsplots_dest=churn/test/plots,,cox-test-summary


to track results use .show() or .logs() or in CLI: 
!mlrun get run d17f26d2e17941adaacae38726830ef8 --project default , !mlrun logs d17f26d2e17941adaacae38726830ef8 --project default
[mlrun] 2020-06-14 13:10:16,113 run executed, status=completed
