# Describe Dask Function

### save

In [1]:
import mlrun
import yaml

with open("item.yaml") as item_file:
    items = yaml.load(item_file, Loader=yaml.FullLoader)

# create job function object from notebook code
mlrun.set_environment(artifact_path="./")
fn = mlrun.code_to_function(
    items["name"],
    kind=items["spec"]["kind"],
    handler=items["spec"]["handler"],
    filename=items["spec"]["filename"],
    image=items["spec"]["image"],
    description=items["description"],
    categories=items["categories"],
    labels=items["labels"],
    requirements=items["spec"]["requirements"],
)

fn.export("describe_dask.yaml")

> 2021-02-17 16:25:40,813 [info] function spec saved to path: describe_dask.yaml


<mlrun.runtimes.kubejob.KubejobRuntime at 0x7f5efa1d3dd0>

## examples

In [2]:
fn.apply(mlrun.platforms.auto_mount())
DATA_URL = "/User/iris.csv"

In [3]:
!curl -L "https://s3.wasabisys.com/iguazio/data/iris/iris_dataset.csv" > {DATA_URL}

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  2776  100  2776    0     0  19687      0 --:--:-- --:--:-- --:--:-- 19549


In [4]:
# create a dask test cluster (dask function)
dask_cluster = mlrun.new_function("dask_tests", kind="dask", image="mlrun/ml-models")
dask_cluster.apply(mlrun.mount_v3io())
dask_cluster.spec.remote = True
dask_cluster.with_requests(mem="2G")
dask_cluster.save()

> 2021-02-17 16:25:41,644 [info] using in-cluster config.


'3efd9b015f9527652feff1e5b9ec18377a6952e5'

In [5]:
from describe_dask import summarize

run = fn.run(
    name="tasks-describe",
    handler=summarize,
    inputs={"dataset": DATA_URL},
    params={"label_column": "label", "dask_function": "db://default/dask_tests"},
)

> 2021-02-17 16:25:42,905 [info] starting run tasks-describe uid=e813ec0d74b945d8914f3fc57d2afd1d DB=http://mlrun-api:8080
> 2021-02-17 16:25:43,062 [info] Job is running in the background, pod: tasks-describe-xhbkf
> 2021-02-17 16:25:48,120 [info] using in-cluster config.
> 2021-02-17 16:25:53,324 [info] to get a dashboard link, use NodePort service_type
> 2021-02-17 16:25:53,324 [info] trying dask client at: tcp://mlrun-dask-tests-ec7e95be-2.default-tenant:8786
> 2021-02-17 16:25:53,354 [info] using remote dask scheduler (mlrun-dask-tests-ec7e95be-2) at: tcp://mlrun-dask-tests-ec7e95be-2.default-tenant:8786
> 2021-02-17 16:26:01,651 [info] run executed, status=completed
final state: completed


project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
default,...7d2afd1d,0,Feb 17 16:25:48,completed,tasks-describe,v3io_user=eyalskind=jobowner=eyalshost=tasks-describe-xhbkf,dataset,label_column=labeldask_function=db://default/dask_tests,scale_pos_weight=1.00,histogramsimbalancecorrelation


to track results use .show() or .logs() or in CLI: 
!mlrun get run e813ec0d74b945d8914f3fc57d2afd1d --project default , !mlrun logs e813ec0d74b945d8914f3fc57d2afd1d --project default
> 2021-02-17 16:26:02,284 [info] run executed, status=completed
