# Exporting and Importing MLRun Functions

## Write a function

In [2]:
import os
import zipfile
from mlrun import DataItem


def open_archive(context, target_dir: str, archive_url: DataItem = None):
    """Open a file/object archive into a target directory

    :param target_dir:   target directory
    :param archive_url:  source archive path/url (MLRun DataItem object)

    :returns: content dir
    """

    # Define locations
    archive_file = archive_url.local()
    os.makedirs(target_dir, exist_ok=True)
    context.logger.info("Verified directories")

    # Extract dataset from zip
    context.logger.info("Extracting zip")
    zip_ref = zipfile.ZipFile(archive_file, "r")
    zip_ref.extractall(target_dir)
    zip_ref.close()

    context.logger.info(f"extracted archive to {target_dir}")
    # use target_path= to specify and absolute target path (vs artifact_path)
    context.log_artifact("content", target_path=target_dir)

In [3]:
# mlrun: end-code

## Export to a file

In [4]:
# create job function object from notebook code and add doc/metadata
import mlrun

fn = mlrun.code_to_function(
    "file_utils",
    kind="job",
    handler="open_archive",
    image="mlrun/mlrun",
    description="this function opens a zip archive into a local/mounted folder",
    categories=["fileutils"],
    labels={"author": "me"},
)

In [5]:
print(fn.to_yaml())

kind: job
metadata:
  name: file-utils
  tag: ''
  project: ''
  labels:
    author: me
  categories:
  - fileutils
spec:
  command: ''
  args: []
  image: mlrun/mlrun
  volumes: []
  volume_mounts: []
  env: []
  default_handler: open_archive
  entry_points:
    open_archive:
      name: open_archive
      doc: Open a file/object archive into a target directory
      parameters:
      - name: context
      - name: target_dir
        type: str
        doc: target directory
      - name: archive_url
        doc: source archive path/url (MLRun DataItem object)
      outputs:
      - doc: content dir
      lineno: 7
  description: this function opens a zip archive into a local/mounted folder
  build:
    functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG9zCmltcG9ydCB6aXBmaWxlCmltcG9ydCBtbHJ1bgoKZGVmIG9wZW5fYXJjaGl2ZShjb250ZXh0LCAKICAgICAgICAgICAgICAgICB0YXJnZXRfZGlyOiBzdHIsCiAgICAgICAgICAgICAgICAgYXJjaGl2ZV91cmw6IG1scnVuLkRhdGFJdGVtID0gTm9uZSk6CiAgI

In [6]:
# save to a file (and can be pushed to a git)
fn.export("function.yaml")

[mlrun] 2020-06-08 21:50:59,815 function spec saved to path: function.yaml


<mlrun.runtimes.kubejob.KubejobRuntime at 0x7f91776d5c88>

## Import the function and run

In [7]:
mlrun.mlconf.dbpath = mlrun.mlconf.dbpath or "http://mlrun-api:8080"

In [8]:
# load from local file
xfn = mlrun.import_function("./function.yaml")

# load function from MLRun functions hub
# xfn = mlrun.import_function('hub://open_archive')

# get function doc
xfn.doc()

function: file-utils
this function opens a zip archive into a local/mounted folder
default handler: open_archive
entry points:
  open_archive: Open a file/object archive into a target directory
    context  - 
    target_dir(str)  - target directory
    archive_url  - source archive path/url (MLRun DataItem object)


In [9]:
from os import path
from mlrun.platforms import auto_mount

# for auto choice between Iguazio platform and k8s PVC
# should set the env var for PVC: MLRUN_PVC_MOUNT=<pvc-name>:<mount-path>, or use mount_pvc()
xfn.apply(auto_mount())

# create and run the task
images_path = path.abspath("images")
open_archive_task = mlrun.new_task(
    "download",
    params={"target_dir": images_path},
    inputs={"archive_url": "http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip"},
)

### Test locally

In [10]:
run = mlrun.run_local(open_archive_task, xfn)

[mlrun] 2020-06-08 21:51:03,218 artifact path is not defined or is local, artifacts will not be visible in the UI
[mlrun] 2020-06-08 21:51:03,226 starting run download uid=1a4c43546ad7437ea8b9055e601c9fad  -> http://mlrun-api:8080
[mlrun] 2020-06-08 21:51:03,259 starting local run: /tmp/tmpq7o4xvch.py # open_archive
[mlrun] 2020-06-08 21:51:03,276 downloading http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip to local tmp
[mlrun] 2020-06-08 21:51:04,365 Verified directories
[mlrun] 2020-06-08 21:51:04,365 Extracting zip
[mlrun] 2020-06-08 21:51:11,813 extracted archive to /User/mlrun/examples/images
[mlrun] 2020-06-08 21:51:11,830 log artifact content at /User/mlrun/examples/images, size: None, db: Y



project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
default,...601c9fad,0,Jun 08 21:51:03,completed,download,v3io_user=adminkind=owner=adminhost=jupyter-65887d7ffb-5jsn2,archive_url,target_dir=/User/mlrun/examples/images,,content


to track results use .show() or .logs() or in CLI: 
!mlrun get run 1a4c43546ad7437ea8b9055e601c9fad --project default , !mlrun logs 1a4c43546ad7437ea8b9055e601c9fad --project default
[mlrun] 2020-06-08 21:51:11,877 run executed, status=completed


### Run as a cluster job

In [None]:
from mlrun import mlconf

mlconf.dbpath = mlconf.dbpath or "./"
artifact_path = mlconf.artifact_path or path.abspath("data")

In [11]:
run = xfn.run(open_archive_task, artifact_path=artifact_path)

[mlrun] 2020-06-08 21:51:11,900 starting run download uid=70ac5c42098b49a1965ff80eb3bed2e7  -> http://mlrun-api:8080
[mlrun] 2020-06-08 21:51:11,983 Job is running in the background, pod: download-fbm52
[mlrun] 2020-06-08 21:51:16,068 starting local run: main.py # open_archive
[mlrun] 2020-06-08 21:51:16,083 downloading http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip to local tmp
[mlrun] 2020-06-08 21:51:16,949 Verified directories
[mlrun] 2020-06-08 21:51:16,950 Extracting zip
[mlrun] 2020-06-08 21:51:24,350 extracted archive to /User/mlrun/examples/images
[mlrun] 2020-06-08 21:51:24,364 log artifact content at /User/mlrun/examples/images, size: None, db: Y

[mlrun] 2020-06-08 21:51:24,375 run executed, status=completed
final state: succeeded


project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
default,...b3bed2e7,0,Jun 08 21:51:16,completed,download,v3io_user=adminkind=jobowner=adminhost=download-fbm52,archive_url,target_dir=/User/mlrun/examples/images,,content


to track results use .show() or .logs() or in CLI: 
!mlrun get run 70ac5c42098b49a1965ff80eb3bed2e7 --project default , !mlrun logs 70ac5c42098b49a1965ff80eb3bed2e7 --project default
[mlrun] 2020-06-08 21:51:27,212 run executed, status=completed


### Loading and running functions as local python modules

In [12]:
from mlrun import function_to_module, get_or_create_ctx

mod = function_to_module(xfn)

In [13]:
# create a context object and DataItem objects
# you can also use existing context and data objects (e.g. from parant function)
context = get_or_create_ctx("myfunc")
data = mlrun.run.get_dataitem(
    "http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip"
)

In [14]:
mod.open_archive(context, target_dir=images_path, archive_url=data)

[mlrun] 2020-06-08 21:51:27,245 downloading http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip to local tmp
[mlrun] 2020-06-08 21:51:28,099 Verified directories
[mlrun] 2020-06-08 21:51:28,100 Extracting zip
[mlrun] 2020-06-08 21:51:35,622 extracted archive to /User/mlrun/examples/images
[mlrun] 2020-06-08 21:51:35,637 log artifact content at /User/mlrun/examples/images, size: None, db: Y


In [15]:
print(context.to_yaml())

kind: run
metadata:
  name: download
  uid: 1a4c43546ad7437ea8b9055e601c9fad
  iteration: 0
  project: default
  labels:
    v3io_user: admin
    kind: ''
    owner: admin
    host: jupyter-65887d7ffb-5jsn2
  annotations: {}
spec:
  function: /file-utils
  log_level: info
  parameters:
    target_dir: /User/mlrun/examples/images
  outputs: []
  output_path: ''
  inputs:
    archive_url: http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip
  data_stores: []
status:
  state: completed
  results: {}
  start_time: '2020-06-08T21:51:03.261376+00:00'
  last_update: '2020-06-08T21:51:11.831849+00:00'
  artifacts:
  - key: content
    kind: ''
    iter: 0
    tree: 1a4c43546ad7437ea8b9055e601c9fad
    target_path: /User/mlrun/examples/images
    db_key: download_content

