# Creating A Pipeline Using MLRUN

In [12]:
import kfp
from kfp import dsl
from mlrun import run_start, mlrun_op
from mlrun.iguazio import mount_v3io

<b> Test/Debug the code locally <b>

In [None]:
!python -m mlrun run -p p1=5 -s file=secrets.txt training.py

### Build & Run a KubeFlow Pipeline 
<b> Define steps (training, validation) <b>

In [3]:
def mlrun_train(p1, p2):
    return mlrun_op('training', 
                    command = '/User/kubeflow/training.py', 
                    params = {'p1':p1, 'p2':p2},
                    outputs = {'model.txt':'', 'dataset.csv':''},
                    out_path ='v3io:///bigdata/mlrun/{{workflow.uid}}/',
                    rundb = '/User')
                    
# use data from the first step
def mlrun_validate(modelfile):
    return mlrun_op('validation', 
                    command = '/User/kubeflow/validation.py', 
                    inputs = {'model.txt':modelfile},
                    out_path ='v3io:///bigdata/mlrun/{{workflow.uid}}/',
                    rundb = '/User')

<b> Create a DSL (execution graph/DAG)<b>

In [4]:
@dsl.pipeline(
    name='My MLRUN pipeline',
    description='Shows how to use mlrun.'
)
def mlrun_pipeline(
   p1 = 5 , p2 = '"text"'
):
    train = mlrun_train(p1, p2).apply(mount_v3io())
    
    # feed 1st step results into the secound step
    validate = mlrun_validate(train.outputs['model-txt']).apply(mount_v3io())

<b> Create and run experiment <b>

In [6]:
kfp.compiler.Compiler().compile(mlrun_pipeline, 'mlrunpipe.yaml')

In [7]:
client = kfp.Client(namespace='default-tenant')
arguments = {'p1': 4}
experiment = client.create_experiment('mlrun demo')
run_result = client.run_pipeline(experiment.id, 'mlrun pipe demo', 'mlrunpipe.yaml', arguments)

# Building a Pipeline with Hyperparams

* First step runs training in parallel (depend on the selected runtime), and generate a list of results 
* 2nd step selects the best fit result (e.g. best accuracy)
* 3rd step runs validation on the selected (best fit) model

In [8]:
def mlrun_train(p1, p2):
    return mlrun_op('training', 
                    command = '/User/kubeflow/training.py', 
                    params = {'p2':p2},
                    hyperparams = {'p1': p1},
                    out_path ='v3io:///bigdata/mlrun/{{workflow.uid}}/',
                    rundb = '/User')
                    
# select best fit
def mlrun_select(iterations):
    return mlrun_op('best_fit', 
                    command = '/User/kubeflow/best_fit.py', 
                    params = {'iterations': iterations},
                    outputs = {'model.txt':''},
                    out_path ='v3io:///bigdata/mlrun/{{workflow.uid}}/',
                    rundb = '/User')              

# use data from the first step
def mlrun_validate(modelfile):
    return mlrun_op('validation', 
                    command = '/User/kubeflow/validation.py', 
                    inputs = {'model.txt':modelfile},
                    out_path ='v3io:///bigdata/mlrun/{{workflow.uid}}/',
                    rundb = '/User')

In [9]:
@dsl.pipeline(
    name='My MLRUN pipeline',
    description='Shows how to use mlrun.'
)
def mlrun_pipeline(
   p1 = [5, 6, 2] , p2 = '"text"'
):
    train = mlrun_train(p1, p2).apply(mount_v3io()).apply(v3io_cred())
    
    # feed the result list into a "best fit" selection step
    selector = mlrun_select(train.outputs['iterations']).apply(mount_v3io())
    
    # feed the best fit model into a validation step
    validate = mlrun_validate(selector.outputs['model-txt']).apply(mount_v3io())

In [10]:
kfp.compiler.Compiler().compile(mlrun_pipeline, 'mlrunpipe_hyper.yaml')

In [11]:
client = kfp.Client(namespace='default-tenant')
arguments = {'p1': [5, 7, 3]}
experiment = client.create_experiment('mlrun demo hyper')
run_result = client.run_pipeline(experiment.id, 'mlrun hyper pipe demo', 'mlrunpipe_hyper.yaml', arguments)