# Using MLRUN with different runtimes + parallelism (hyper params)
<b>Using function, local file, Dask, remote function<b>

In [None]:
# to install run the next line
!pip install git+https://github.com/v3io/mlrun.git

In [1]:
from mlrun import get_or_create_ctx, run_start
import yaml
import pandas as pd

In [2]:
# note: you need to create/specify a secrets file with credentials for remote data access (e.g. in S3 or v3io)
run_spec =  {'metadata':
                 {'labels': {
                     'owner': 'yaronh'}},
             'spec':
                 {'parameters': {'p1': 5}, 
                  'input_objects': [],
                  'log_level': 'info',
                  'secret_sources': [{'kind': 'file', 'source': 'secrets.txt'}],
                 }}

hyper = { 'p2': ['aa', 'bb', 'cc']}

### Run external code 

In [11]:
task = run_start(run_spec, command='example1.py', rundb='./')
print(yaml.dump(task))

Run: mytask (uid=be05637542b14d14875f695a90dd6400)
Params: p1=5, p2=a-string
accesskey = 1234
file
i'm a local input file


metadata:
  annotations:
    message: aa
  iteration: 0
  labels:
    owner: yaronh
  name: mytask
  project: ''
  tag: ''
  uid: be05637542b14d14875f695a90dd6400
spec:
  data_stores: []
  default_output_path: ''
  input_objects:
  - key: infile.txt
    path: infile.txt
  log_level: info
  output_artifacts: []
  parameters:
    p1: 5
    p2: a-string
  runtime:
    command: example1.py
    kind: local
status:
  last_update: '2019-07-11 16:33:10.695421'
  output_artifacts:
  - description: ''
    key: test.txt
    src_path: ''
    target_path: test.txt
    viewer: ''
  - description: ''
    key: test.html
    src_path: ''
    target_path: test.html
    viewer: web-app
  - description: ''
    format: ''
    header:
    - A
    - B
    - C
    key: tbl.csv
    src_path: ''
    target_path: tbl.csv
    viewer: table
  - description: ''
    key: chart.html
    src_path

<b>With Hyper Params<b>

In [12]:
task = run_start(run_spec, command='example1.py', rundb='./', hyperparams=hyper)
print(yaml.dump(task))

Run: mytask (uid=6663130ddd824d559a587eda77ea0390-1)
Params: p1=5, p2=aa
accesskey = 1234
file
i'm a local input file


Run: mytask (uid=6663130ddd824d559a587eda77ea0390-2)
Params: p1=5, p2=bb
accesskey = 1234
file
i'm a local input file


Run: mytask (uid=6663130ddd824d559a587eda77ea0390-3)
Params: p1=5, p2=cc
accesskey = 1234
file
i'm a local input file


metadata:
  labels:
    owner: yaronh
  uid: 6663130ddd824d559a587eda77ea0390
spec:
  hyperparams:
    p2:
    - aa
    - bb
    - cc
  input_objects: []
  parameters:
    p1: 5
  runtime:
    command: example1.py
    kind: local
  secret_sources:
  - kind: file
    source: secrets.txt
status:
  iterations:
  - - iter
    - output.accuracy
    - output.latency
    - param.p1
    - param.p2
    - state
  - - 1
    - 10
    - 15
    - 5
    - aa
    - completed
  - - 2
    - 10
    - 15
    - 5
    - bb
    - completed
  - - 3
    - 10
    - 15
    - 5
    - cc
    - completed
  last_update: '2019-07-11 16:33:59.435800'
  start_time: 

view results in a table

In [13]:
iter = task['status']['iterations']
pd.DataFrame(iter[1:], columns=iter[0]).set_index('iter')

Unnamed: 0_level_0,output.accuracy,output.latency,param.p1,param.p2,state
iter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,10,15,5,aa,completed
2,10,15,5,bb,completed
3,10,15,5,cc,completed


<b>Using CLI<b>

In [6]:
!python -m mlrun run -x p1="[1,2]" -p p2=5 --rundb ./ example1.py

Run: mytask (uid=8389f9e7d8964865bd36200b07f3ee5b-1)
Params: p1=1, p2=5
accesskey = None
file
i'm a local input file


Run: mytask (uid=8389f9e7d8964865bd36200b07f3ee5b-2)
Params: p1=2, p2=5
accesskey = None
file
i'm a local input file


metadata:
  labels:
    owner: iguazio
  uid: 8389f9e7d8964865bd36200b07f3ee5b
spec:
  runtime:
    kind: local
    command: example1.py
  parameters:
    p2: 5
  hyperparams:
    p1:
    - 1
    - 2
status:
  start_time: '2019-07-11 15:47:11.382039'
  iterations:
  - - iter
    - output.accuracy
    - output.latency
    - param.p1
    - param.p2
    - state
  - - 1
    - 2
    - 3
    - 1
    - 5
    - completed
  - - 2
    - 4
    - 6
    - 2
    - 5
    - completed
  state: completed
  last_update: '2019-07-11 15:47:17.044881'



### Using (inline) Code

In [14]:
# define a function with spec as parameter
import time
def handler(spec):
    ctx = get_or_create_ctx('mytask', spec=spec)
    p1 = ctx.get_param('p1', 1)
    p2 = ctx.get_param('p2', 'a-string')

    # access input metadata, values, and inputs
    print(f'Run: {ctx.name} (uid={ctx.uid})')
    print(f'Params: p1={p1}, p2={p2}')
    time.sleep(1)
    ctx.log_output('accuracy', p1 * 2)
    ctx.log_output('loss', p1 * 3)

    return ctx.to_json()

<b>Run locally in the notebook<b>

In [15]:
%%timeit -n 1 -r 1
resp = run_start({}, handler=handler, hyperparams=hyper)
print(yaml.dump(resp))

Run: mytask (uid=46cebf94d56e4529afc9e2e4da89e6d5-1)
Params: p1=1, p2=aa
Run: mytask (uid=46cebf94d56e4529afc9e2e4da89e6d5-2)
Params: p1=1, p2=bb
Run: mytask (uid=46cebf94d56e4529afc9e2e4da89e6d5-3)
Params: p1=1, p2=cc
metadata:
  uid: 46cebf94d56e4529afc9e2e4da89e6d5
spec:
  hyperparams:
    p2:
    - aa
    - bb
    - cc
  parameters: {}
  runtime:
    kind: handler
status:
  iterations:
  - - iter
    - output.accuracy
    - output.loss
    - param.p1
    - param.p2
    - state
  - - 1
    - 2
    - 3
    - 1
    - aa
    - completed
  - - 2
    - 2
    - 3
    - 1
    - bb
    - completed
  - - 3
    - 2
    - 3
    - 1
    - cc
    - completed
  last_update: '2019-07-11 16:34:50.373346'
  start_time: '2019-07-11 16:34:47.211350'
  state: completed

3.16 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


<b>Run in Dask (locally or using Dask k8s)<b>

In [16]:
%%timeit -n 1 -r 1
resp = run_start({}, runtime={'kind': 'dask'}, handler=handler, hyperparams=hyper)
print(yaml.dump(resp))

metadata:
  uid: d34496644e3e454b9a4bb21bb024cb40
spec:
  hyperparams:
    p2:
    - aa
    - bb
    - cc
  parameters: {}
  runtime:
    kind: dask
status:
  iterations:
  - - iter
    - output.accuracy
    - output.loss
    - param.p1
    - param.p2
    - state
  - - 1
    - 2
    - 3
    - 1
    - aa
    - completed
  - - 2
    - 2
    - 3
    - 1
    - bb
    - completed
  - - 3
    - 2
    - 3
    - 1
    - cc
    - completed
  last_update: '2019-07-11 16:35:03.726426'
  start_time: '2019-07-11 16:35:02.625878'
  state: completed

1.1 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### Run Using Nuclio (with parallelism)

In [3]:
# bypass Jupyter asyncio bug 
import nest_asyncio
nest_asyncio.apply()

In [6]:
%%timeit -n 1 -r 1
task = run_start(run_spec, command='http://mlrun.default-tenant.svc:8080', rundb='./', hyperparams={'p1': [2,4,7,1,8,9]})
print(yaml.dump(task))

2019-07-14 01:32:35.402877  info   web  This is an unstructured log  
 ----------
2019-07-14 01:32:35.403886  info   web  This is an unstructured log  
 ----------
2019-07-14 01:32:35.403886  info   web  This is an unstructured log  
 ----------
2019-07-14 01:32:35.404047  info   web  This is an unstructured log  
 ----------
2019-07-14 01:32:35.403602  info   web  This is an unstructured log  
 ----------
2019-07-14 01:32:35.404390  info   web  This is an unstructured log  
 ----------
metadata:
  labels:
    owner: yaronh
  uid: 8f2f0ceaf5df4e39ab35e819daf5c949
spec:
  hyperparams:
    p1:
    - 2
    - 4
    - 7
    - 1
    - 8
    - 9
  input_objects: []
  log_level: info
  parameters:
    p1: 5
  runtime:
    command: http://mlrun.default-tenant.svc:8080
    kind: remote
  secret_sources:
  - kind: file
    source: secrets.txt
status:
  iterations:
  - - iter
    - output.accuracy
    - output.latency
    - param.p1
    - param.p2
    - state
  - - 1
    - 4
    - 6
    - 2
    - 

In [5]:
!python -m mlrun run -x p1="[1,2]" -p p2=5 --rundb ./ http://52.179.174.235:31507/

2019-07-14 01:24:19.016081  info   web  This is an unstructured log  
 ----------
2019-07-14 01:24:19.015679  info   web  This is an unstructured log  
 ----------
metadata:
  labels:
    owner: iguazio
  uid: 1eb17bfe14c540309b5323cfdc66709b
spec:
  runtime:
    kind: remote
    command: http://52.179.174.235:31507/
  parameters:
    p2: 5
  hyperparams:
    p1:
    - 1
    - 2
status:
  start_time: '2019-07-14 01:24:19.005233'
  iterations:
  - - iter
    - output.accuracy
    - output.latency
    - param.p1
    - param.p2
    - state
  - - 1
    - 2
    - 3
    - 1
    - 5
    - running
  - - 2
    - 4
    - 6
    - 2
    - 5
    - running
  state: completed
  last_update: '2019-07-14 01:24:20.022776'

