# Using MLRUN function locally, as a Kubernetes Job, and in a Workflow

In [None]:
# for loading specific MLRUN git version 
!pip uninstall -y mlrun
!pip install git+https://github.com/mlrun/mlrun.git@development

In [1]:
# nuclio: ignore
# if the nuclio-jupyter package is not installed run !pip install nuclio-jupyter
import nuclio 

## Define function and its dependencies 

In [2]:
%nuclio cmd -c pip install pandas
%nuclio config spec.build.baseImage = "python:3.6-jessie"

%nuclio: setting spec.build.baseImage to 'python:3.6-jessie'


In [2]:
import os

def training(context, p1=1, p2=2):
    # access input metadata, values, and inputs
    print(f'Run: {context.name} (uid={context.uid})')
    print(f'Params: p1={p1}, p2={p2}')
    context.logger.info('started training')
    
    # do some training 
    
    # log the run results (scalar values)
    context.log_result('accuracy', p1 * 2)
    context.log_result('loss', p1 * 3)
    
    # add a lable/tag to this run 
    context.set_label('category', 'tests')
    
    # log a simple artifact + label the artifact 
    context.log_artifact('model.txt', body=b'abc is 123', labels={'framework': 'xgboost'})

def validation(context, model):
    # access input metadata, values, files, and secrets (passwords)
    print(f'Run: {context.name} (uid={context.uid})')
    #model = context.get_object('model', model)
    print('file - {}:\n{}\n'.format(model.url, model.get()))
    
    context.logger.info('started validation')
    
    context.log_artifact('validation.html', body=b'<b> validated </b>', viewer='web-app')

def listfiles(context, path='/'):
    l = os.listdir(path)
    print(l)
    return '{} contain {} files'.format(path, len(l))

## Load MLRUN and specify defaults 

In [3]:
# nuclio: end-code
# (end-code marker tells nuclio to stop parsing the notebook from this cell)

# set mlrun db path (can also be specified in run_start command)
%env MLRUN_DBPATH=/User/mlrun
#%env MLRUN_PACKAGE_PATH=git+https://github.com/mlrun/mlrun.git@development

from mlrun import new_function, code_to_function, NewTask
from mlrun.platforms import mount_v3io

env: MLRUN_DBPATH=/User/mlrun


## Test the code locally
the functions above can be tested locally, parameters, inputs, and outputs can be specified in the API or the runspec object<br>
we create a `function` which defines the runtime environment (type, code, image, ..) and `run` a tasks/experiments using that function <br>
(we use the `local` runtime by default, later on we will use a `job` runtime for running containers, and can use other runners like MpiJob, Spark, Dask, Nuclio, ..)

in each run we can specify the function, inputs, parameters/hyper-parameters, etc. (check the `RunTemplate` class for details)<br>
in Jupyter runs print a summary table with metadata and links to data artifacts, this can be disabled with `visible=False` in `.run()`

In [4]:
fn = new_function()
list_run = fn.run(handler=listfiles, params={'path': '/User'})

[mlrun] 2019-10-27 23:20:29,814 starting run None uid=e613efa8e2754080b733ec72441b3103  -> /User/mlrun
['.bash_history', '.bash_profile', '.bashrc', '.config', '.gitignore', '.igz', '.ipynb_checkpoints', '.ipython', '.jupyter', '.local', '.pythonlibs', '.viminfo', '.vimrc', '1', 'LICENSE', 'README.md', 'assets', 'demos', 'examples', 'experiment-tracking', 'getting-started', 'igz-tutorials-get.sh', 'mlrun', 'nuclio-Inference.ipynb', 'nuclio-data_preperation.ipynb', 'nuclio-generator.ipynb', 'nuclio-training.ipynb', 'update-tutorials.ipynb', 'v3io', 'welcome.ipynb']



uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...1b3103,0,Oct 27 23:20:29,completed,,kind=handlerowner=iguaziohost=jupyter-dulwoc9x63-ixir3-68dccc6b7-4psd5,,path=/User,return=/User contain 30 files,


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid e613efa8e2754080b733ec72441b3103 
[mlrun] 2019-10-27 23:20:29,967 run executed, status=completed


## Running and linking multiple tasks
in the next example we run two functions, `training` and `validation` and we pass the result from one to the other.<br>
we will see in the 'job' example that linking works even when the tasks run on different processes or containers, or in a workflow.

In [5]:
train_run = fn.run(handler=training, params={'p1': 5})
model_path = train_run.output('model.txt')
validation_run = fn.run(handler=validation, inputs={'model': model_path})

[mlrun] 2019-10-27 23:20:29,975 starting run None uid=95f921a7b28646ae9ed883114a48ad15  -> /User/mlrun
[mlrun] 2019-10-27 23:20:30,090 started training
Run:  (uid=95f921a7b28646ae9ed883114a48ad15)
Params: p1=5, p2=2



uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...48ad15,0,Oct 27 23:20:30,completed,,kind=handlerowner=iguaziohost=jupyter-dulwoc9x63-ixir3-68dccc6b7-4psd5category=tests,,p1=5,accuracy=10loss=15,model.txt


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid 95f921a7b28646ae9ed883114a48ad15 
[mlrun] 2019-10-27 23:20:30,173 run executed, status=completed
[mlrun] 2019-10-27 23:20:30,174 starting run None uid=4f5bceb3f293407e818b869ba19bcc1d  -> /User/mlrun
[mlrun] 2019-10-27 23:20:30,213 started validation
Run:  (uid=4f5bceb3f293407e818b869ba19bcc1d)
file - model.txt:
b'abc is 123'




uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...9bcc1d,0,Oct 27 23:20:30,completed,,kind=handlerowner=iguaziohost=jupyter-dulwoc9x63-ixir3-68dccc6b7-4psd5,model,,,validation.html


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid 4f5bceb3f293407e818b869ba19bcc1d 
[mlrun] 2019-10-27 23:20:30,273 run executed, status=completed


## Define cluster jobs and build images 
in order to use in a cluster we need to package our code and dependencies<br>
the `code_to_function` call will automatically form a `Function` with list of dependencies and runtime configuration<br>
you can apply KubeFlow modifiers to configure resources like Volumes, `mount_v3io()` adds an iguazio v3io volume (Home of current user) to the function 

the `build()` command is optional it pre builds all the dependencies, so the runs will be faster. note the code and params can be updated per run

In [6]:
# create an ML function from the notebook, attache it to iguazio data fabric (v3io)
fn = code_to_function(runtime='job').apply(mount_v3io())

# prepare an image from the dependencies, so we wont need to build the image every run 
fn.build(image='mlrun/nuctest:latest')

[mlrun] 2019-10-27 23:21:09,551 building image (mlrun/nuctest:latest)
FROM python:3.6-jessie
WORKDIR /run
RUN pip install pandas
RUN pip install mlrun
ENV PYTHONPATH /run
[mlrun] 2019-10-27 23:21:09,553 using in-cluster config.
[mlrun] 2019-10-27 23:21:09,571 Pod mlrun-build-2qftv created
..
[36mINFO[0m[0000] Resolved base name python:3.6-jessie to python:3.6-jessie 
[36mINFO[0m[0000] Resolved base name python:3.6-jessie to python:3.6-jessie 
[36mINFO[0m[0000] Downloading base image python:3.6-jessie     
[36mINFO[0m[0000] Error while retrieving image from cache: getting file info: stat /cache/sha256:0318d80cb241983eda20b905d77fa0bfb06e29e5aabf075c7941ea687f1c125a: no such file or directory 
[36mINFO[0m[0000] Downloading base image python:3.6-jessie     
[36mINFO[0m[0000] Built cross stage deps: map[]                
[36mINFO[0m[0000] Downloading base image python:3.6-jessie     
[36mINFO[0m[0000] Error while retrieving image from cache: getting file info: stat /cache/s

<mlrun.runtimes.kubejob.KubejobRuntime at 0x7f462c74d710>

### Run the function on the cluster (build or use pre-built image)
note the listfiles call will return the same results as in the local run since the function shares the same filesystem <br>
`with_code()` will inject the latest code to the function, in case we made changes (it doesnt require a new build)

In [7]:
fn.run(handler=listfiles, params={'path': '/User'})

[mlrun] 2019-10-27 23:22:32,723 starting run nuclio-jobs uid=78f697488c9c4e9cb3c37f27c288ec40  -> /User/mlrun
[mlrun] 2019-10-27 23:22:32,749 using in-cluster config.
[mlrun] 2019-10-27 23:22:32,778 Pod nuclio-jobs-j9cmr created
....
[mlrun] 2019-10-27 23:22:42,992 starting run nuclio-jobs uid=78f697488c9c4e9cb3c37f27c288ec40  -> /User/mlrun
['.bash_history', '.bash_profile', '.bashrc', '.config', '.gitignore', '.igz', '.ipynb_checkpoints', '.ipython', '.jupyter', '.local', '.pythonlibs', '.viminfo', '.vimrc', '1', 'LICENSE', 'README.md', 'assets', 'demos', 'examples', 'experiment-tracking', 'getting-started', 'igz-tutorials-get.sh', 'mlrun', 'nuclio-Inference.ipynb', 'nuclio-data_preperation.ipynb', 'nuclio-generator.ipynb', 'nuclio-training.ipynb', 'update-tutorials.ipynb', 'v3io', 'welcome.ipynb']

type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid 78f697488c9c4e9cb3c37f27c288ec40 
[mlrun] 2019-10-27 23:22:43,173 run executed, status=completed


uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...88ec40,0,Oct 27 23:22:43,completed,nuclio-jobs,kind=localowner=iguaziohost=nuclio-jobs-j9cmr,,path=/User,return=/User contain 30 files,


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid 78f697488c9c4e9cb3c37f27c288ec40 
[mlrun] 2019-10-27 23:22:48,226 run executed, status=completed


<mlrun.model.RunObject at 0x7f4623f7ef60>

In [8]:
# define a run template, specify the artifacts output path, and add a label (can be used for search later)
run_base = NewTask(out_path='/User/mlrun/data').set_label('stage', 'dev')

In [9]:
# run our training task, with hyper params, and select the one with max accuracy
run = run_base.copy().with_hyper_params({'p1': [2,6,4]}, selector='max.accuracy')
train_run = fn.with_code().run(run, handler=training, name='my-training', params={'p1': 9})
model_path = train_run.output('model.txt')

[mlrun] 2019-10-27 23:22:53,659 starting run my-training uid=d44b8b5bf2024d0c830e90043cdb6db2  -> /User/mlrun
[mlrun] 2019-10-27 23:22:53,711 Pod my-training-w48l4 created
..
[mlrun] 2019-10-27 23:22:59,240 starting run my-training uid=d44b8b5bf2024d0c830e90043cdb6db2  -> /User/mlrun
[mlrun] 2019-10-27 23:22:59,384 started training
Run: my-training (uid=d44b8b5bf2024d0c830e90043cdb6db2-1)
Params: p1=2, p2=2

[mlrun] 2019-10-27 23:22:59,474 started training
Run: my-training (uid=d44b8b5bf2024d0c830e90043cdb6db2-2)
Params: p1=6, p2=2

[mlrun] 2019-10-27 23:22:59,574 started training
Run: my-training (uid=d44b8b5bf2024d0c830e90043cdb6db2-3)
Params: p1=4, p2=2

type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid d44b8b5bf2024d0c830e90043cdb6db2 
[mlrun] 2019-10-27 23:22:59,741 run executed, status=completed


uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...db6db2,0,Oct 27 23:22:59,completed,my-training,stage=devkind=localowner=iguazio,,p1=9,best_iteration=2accuracy=12loss=18,model.txtiteration_results


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid d44b8b5bf2024d0c830e90043cdb6db2 
[mlrun] 2019-10-27 23:23:04,789 run executed, status=completed


In [10]:
# running validation, use the best model result from the previos step 
fn.run(run_base, handler=validation, name='my-validation', inputs={'model': model_path})

[mlrun] 2019-10-27 23:23:38,199 starting run my-validation uid=0b01c1a208cd4eb3b256c63620af6057  -> /User/mlrun
[mlrun] 2019-10-27 23:23:38,245 Pod my-validation-dksws created
..
[mlrun] 2019-10-27 23:23:43,480 starting run my-validation uid=0b01c1a208cd4eb3b256c63620af6057  -> /User/mlrun
[mlrun] 2019-10-27 23:23:43,642 started validation
Run: my-validation (uid=0b01c1a208cd4eb3b256c63620af6057)
file - /User/mlrun/data/2/model.txt:
b'abc is 123'


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid 0b01c1a208cd4eb3b256c63620af6057 
[mlrun] 2019-10-27 23:23:43,698 run executed, status=completed


uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...af6057,0,Oct 27 23:23:43,completed,my-validation,stage=devkind=localowner=iguaziohost=my-validation-dksws,model,,,validation.html


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid 0b01c1a208cd4eb3b256c63620af6057 
[mlrun] 2019-10-27 23:23:48,745 run executed, status=completed


<mlrun.model.RunObject at 0x7f4623f2cf28>

In [11]:
# list all jobs
!mlrun get po 

[mlrun] 2019-10-27 23:24:22,547 using in-cluster config.
state      started          type     name
Succeeded  Oct 27 23:21:09  build    mlrun-build-2qftv
Succeeded  Oct 24 11:17:25  build    mlrun-build-9f92m
Succeeded  Oct 27 23:22:53  job      my-training-w48l4
Succeeded  Oct 27 23:23:38  job      my-validation-dksws
Running    Oct 27 23:18:56  remote   mysrv2-6bd7b786c9-jblzd
Running    Oct 24 18:49:47  remote   mysrv3-6d8bd66fb4-54drk
Succeeded  Oct 27 23:22:32  job      nuclio-jobs-j9cmr
Running    Oct 27 23:20:06  remote   xgb-train-868c479499-lfj9n


In [13]:
# check job logs
!mlrun watch my-training-gkt7l

[mlrun] 2019-09-30 23:45:09,508 using in-cluster config.

[mlrun] 2019-09-24 13:13:45,340 starting run my-training uid=ca4c0d98365047969ae1e43b8ea53cd6
[mlrun] 2019-09-24 13:13:45,459 started training
Run: my-training (uid=ca4c0d98365047969ae1e43b8ea53cd6-1)
Params: p1=2, p2=2

[mlrun] 2019-09-24 13:13:45,555 started training
Run: my-training (uid=ca4c0d98365047969ae1e43b8ea53cd6-2)
Params: p1=6, p2=2

[mlrun] 2019-09-24 13:13:45,636 started training
Run: my-training (uid=ca4c0d98365047969ae1e43b8ea53cd6-3)
Params: p1=4, p2=2

type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid ca4c0d98365047969ae1e43b8ea53cd6 
[mlrun] 2019-09-24 13:13:45,808 run executed, status=completed
Pod my-training-gkt7l last status is: succeeded


## Create a KubeFlow Pipeline

In [12]:
import kfp
from kfp import dsl

In [13]:
artifacts_path = 'v3io:///users/admin/mlrun/kfp/{{workflow.uid}}/'

In [14]:
@dsl.pipeline(
    name='job test',
    description='Shows how to use mlrun.'
)
def tr_pipeline(
   p1 = 9
):
    run = NewTask(handler='training', out_path=artifacts_path, outputs=['model.txt']).with_params(p1=p1)
    train = fn.as_step(run).apply(mount_v3io())

In [15]:
kfp.compiler.Compiler().compile(tr_pipeline, 'trpipe.yaml')



In [16]:
client = kfp.Client(namespace='default-tenant')
arguments = {'p1': 8}
run_result = client.create_run_from_pipeline_func(tr_pipeline, arguments, run_name='tr 1', experiment_name='tr')