# Exporting and Importing MLRun Functions

In [1]:
# nuclio: ignore
import nuclio

## Write a function

In [2]:
import os
import zipfile

def open_archive(context, 
                 target_dir: str = 'content',
                 archive_url: str = ''):
    """Open a file/object archive into a target directory
    
    :param target_dir:   target directory
    :param archive_url:  source archive path/url
    
    :returns: content dir
    """
        
    # Define locations
    os.makedirs(target_dir, exist_ok=True)
    context.logger.info('Verified directories')
    
    # Extract dataset from zip
    context.logger.info('Extracting zip')
    zip_ref = zipfile.ZipFile(archive_url, 'r')
    zip_ref.extractall(target_dir)
    zip_ref.close()
    
    context.logger.info(f'extracted archive to {target_dir}')
    context.log_artifact('content', local_path=target_dir)


In [3]:
# nuclio: end-code

## Export to a file

In [4]:
import mlrun

In [5]:
# create job function object from notebook code
fn = mlrun.code_to_function('file_utils', runtime='job', with_doc=True,
                            handler=open_archive, image='mlrun/mlrun')

# add metadata (for templates and reuse)
fn.spec.default_handler = 'open_archive'
fn.spec.description = "this function opens a zip archive into a local/mounted folder"
fn.metadata.categories = ['fileutils']
fn.metadata.labels = {'author': 'me'}

[mlrun] 2020-05-03 18:12:13,251 "runtime=" param is deprecated, use "kind="


In [6]:
print(fn.to_yaml())

kind: job
metadata:
  name: file-utils
  tag: ''
  project: ''
  labels:
    author: me
  categories:
  - fileutils
spec:
  command: ''
  args: []
  image: mlrun/mlrun
  volumes: []
  volume_mounts: []
  env: []
  default_handler: open_archive
  entry_points:
    open_archive:
      name: open_archive
      doc: Open a file/object archive into a target directory
      parameters:
      - name: context
      - name: target_dir
        type: str
        doc: target directory
        default: content
      - name: archive_url
        type: str
        doc: source archive path/url
      outputs:
      - doc: content dir
      lineno: 6
  description: this function opens a zip archive into a local/mounted folder
  build:
    functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlciBvbiAyMDIwLTA1LTAzIDE4OjEyCgppbXBvcnQgb3MKaW1wb3J0IHppcGZpbGUKCmRlZiBvcGVuX2FyY2hpdmUoY29udGV4dCwgCiAgICAgICAgICAgICAgICAgdGFyZ2V0X2Rpcjogc3RyID0gJ2NvbnRlbnQnLAogICAgICAgICAgICAgICAgIGFyY2hpdm

In [7]:
# save to a file (and can be pushed to a git)
fn.export('function.yaml')

[mlrun] 2020-05-03 18:12:16,262 function spec saved to path: function.yaml


<mlrun.runtimes.kubejob.KubejobRuntime at 0x7f43ff874780>

## Import the function and run

In [8]:
mlrun.mlconf.dbpath = mlrun.mlconf.dbpath or 'http://mlrun-api:8080'

In [9]:
# load from local file
xfn = mlrun.import_function('./function.yaml')

# load function from MLRun functions hub 
# xfn = mlrun.import_function('hub://open_archive')

# get function doc
xfn.doc()

function: file-utils
this function opens a zip archive into a local/mounted folder
default handler: open_archive
entry points:
  open_archive: Open a file/object archive into a target directory
    context  - 
    target_dir(str)  - target directory, default=content
    archive_url(str)  - source archive path/url


In [10]:
# configute it: mount on iguazio fabric, set as interactive (return stdout)
xfn.apply(mlrun.mount_v3io())

# create and run the task
images_path = '/User/mlrun/examples/images'
open_archive_task = mlrun.NewTask('download',  
    params={'target_dir': images_path},
    inputs={'archive_url': 'http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip'})

### Test locally

In [11]:
run = mlrun.run_local(open_archive_task, xfn)

[mlrun] 2020-05-03 18:12:16,318 starting run download uid=11954f56235a43b3853ca457f4d6450c  -> http://mlrun-api:8080
[mlrun] 2020-05-03 18:12:16,356 downloading http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip to local tmp
[mlrun] 2020-05-03 18:12:17,928 Verified directories
[mlrun] 2020-05-03 18:12:17,928 Extracting zip
[mlrun] 2020-05-03 18:12:26,948 extracted archive to /User/mlrun/examples/images
[mlrun] 2020-05-03 18:12:26,957 Traceback (most recent call last):
  File "/User/repos/mlrun/mlrun/runtimes/local.py", line 184, in exec_from_params
    val = handler(*args_list)
  File "/tmp/tmp17zfeq2x.py", line 26, in open_archive
    context.log_artifact('content', local_path=target_dir)
  File "/User/repos/mlrun/mlrun/execution.py", line 349, in log_artifact
    format=format)
  File "/User/repos/mlrun/mlrun/artifacts/manager.py", line 107, in log_artifact
    raise ValueError('local/source path must be a relative path, '
ValueError: local/source path must be a relative path,

local/source path must be a relative path, cannot be remote or absolute path, use target_path for absolute paths


project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
,...f4d6450c,0,May 03 18:12:16,error,download,host=jupyter-6c5fccf844-gxlrwkind=owner=adminv3io_user=admin,archive_url,target_dir=/User/mlrun/examples/images,,


to track results use .show() or .logs() or in CLI: 
!mlrun get run 11954f56235a43b3853ca457f4d6450c  , !mlrun logs 11954f56235a43b3853ca457f4d6450c 
[mlrun] 2020-05-03 18:12:27,066 run executed, status=error


RunError: local/source path must be a relative path, cannot be remote or absolute path, use target_path for absolute paths

### Run as a cluster job

In [None]:
run = xfn.run(open_archive_task)