# Exporting and Importing MLRun Functions

In [1]:
# nuclio: ignore
import nuclio

## Write a function

In [2]:
import os
import zipfile

def open_archive(context, 
                 target_dir: str = 'content',
                 archive_url: str = ''):
    """Open a file/object archive into a target directory
    
    :param target_dir:   target directory
    :param archive_url:  source archive path/url
    
    :returns: content dir
    """
        
    # Define locations
    os.makedirs(target_dir, exist_ok=True)
    context.logger.info('Verified directories')
    
    # Extract dataset from zip
    context.logger.info('Extracting zip')
    zip_ref = zipfile.ZipFile(archive_url, 'r')
    zip_ref.extractall(target_dir)
    zip_ref.close()
    
    context.logger.info(f'extracted archive to {target_dir}')
    context.log_artifact('content', local_path=target_dir)


In [3]:
# nuclio: end-code

## Export to a file

In [4]:
import mlrun

In [5]:
# create job function object from notebook code
fn = mlrun.code_to_function('file_utils', runtime='job', with_doc=True,
                            handler=open_archive, image='mlrun/mlrun')

# add metadata (for templates and reuse)
fn.spec.default_handler = 'open_archive'
fn.spec.description = "this function opens a zip archive into a local/mounted folder"
fn.metadata.categories = ['fileutils']
fn.metadata.labels = {'author': 'me'}

[mlrun] 2020-03-30 18:53:44,644 "runtime=" param is deprecated, use "kind="


In [6]:
print(fn.to_yaml())

kind: job
metadata:
  name: file-utils
  tag: ''
  project: ''
  labels:
    author: me
  categories:
  - fileutils
spec:
  command: ''
  args: []
  image: mlrun/mlrun
  volumes: []
  volume_mounts: []
  env: []
  default_handler: open_archive
  entry_points:
    open_archive:
      name: open_archive
      doc: Open a file/object archive into a target directory
      parameters:
      - name: context
      - name: target_dir
        type: str
        doc: target directory
        default: content
      - name: archive_url
        type: str
        doc: source archive path/url
      outputs:
      - doc: content dir
      lineno: 6
  description: this function opens a zip archive into a local/mounted folder
  build:
    functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlciBvbiAyMDIwLTAzLTMwIDE4OjUzCgppbXBvcnQgb3MKaW1wb3J0IHppcGZpbGUKCmRlZiBvcGVuX2FyY2hpdmUoY29udGV4dCwgCiAgICAgICAgICAgICAgICAgdGFyZ2V0X2Rpcjogc3RyID0gJ2NvbnRlbnQnLAogICAgICAgICAgICAgICAgIGFyY2hpdm

In [7]:
# save to a file (and can be pushed to a git)
fn.export('function.yaml')

[mlrun] 2020-03-30 18:53:46,971 function spec saved to path: function.yaml


<mlrun.runtimes.kubejob.KubejobRuntime at 0x7ff157081e10>

## Import the function and run

In [8]:
mlrun.mlconf.dbpath = mlrun.mlconf.dbpath or 'http://mlrun-api:8080'

In [12]:
# load from local file
xfn = mlrun.import_function('./function.yaml')

# load function from MLRun functions hub 
# xfn = mlrun.import_function('hub://open_archive')

# get function doc
xfn.doc()

function: file-utils
this function opens a zip archive into a local/mounted folder
default handler: open_archive
entry points:
  open_archive: Open a file/object archive into a target directory
    {'name': 'context'}
    {'name': 'target_dir', 'type': 'str', 'doc': 'target directory', 'default': 'content'}
    {'name': 'archive_url', 'type': 'str', 'doc': 'source archive path/url'}


In [9]:
# configute it: mount on iguazio fabric, set as interactive (return stdout)
xfn.apply(mlrun.mount_v3io())

# create and run the task
images_path = '/User/mlrun/examples/images'
open_archive_task = mlrun.NewTask('download',  
    params={'target_dir': images_path},
    inputs={'archive_url': 'http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip'})

### Test locally

In [10]:
run = mlrun.run_local(open_archive_task, xfn)

[mlrun] 2020-03-30 18:54:23,142 artifact path is not defined or is local, artifacts will not be visible in the UI
[mlrun] 2020-03-30 18:54:23,149 starting run download uid=0a6483f2f6c5418cac4ef06f74904d23  -> http://10.196.88.27:80
[mlrun] 2020-03-30 18:54:23,196 downloading http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip to local tmp
[mlrun] 2020-03-30 18:54:24,819 Verified directories
[mlrun] 2020-03-30 18:54:24,819 Extracting zip
[mlrun] 2020-03-30 18:54:33,141 extracted archive to /User/mlrun/examples/images
[mlrun] 2020-03-30 18:54:33,157 log artifact content at /User/mlrun/examples/images, size: None, db: Y



uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...904d23,0,Mar 30 18:54:23,completed,download,v3io_user=adminkind=owner=adminhost=jupyter-74f9488695-6wrxj,archive_url,target_dir=/User/mlrun/examples/images,,content


to track results use .show() or .logs() or in CLI: 
!mlrun get run 0a6483f2f6c5418cac4ef06f74904d23  , !mlrun logs 0a6483f2f6c5418cac4ef06f74904d23 
[mlrun] 2020-03-30 18:54:33,207 run executed, status=completed


### Run as a cluster job

In [11]:
run = xfn.run(open_archive_task)

[mlrun] 2020-03-30 18:54:38,731 artifact path is not defined or is local, artifacts will not be visible in the UI
[mlrun] 2020-03-30 18:54:38,737 starting run download uid=983db54276e14ca988217fb7875b5b95  -> http://10.196.88.27:80
[mlrun] 2020-03-30 18:54:38,813 Job is running in the background, pod: download-jkl85
[mlrun] 2020-03-30 18:54:45,711 artifact path is not defined or is local, artifacts will not be visible in the UI
[mlrun] 2020-03-30 18:54:45,743 downloading http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip to local tmp
[mlrun] 2020-03-30 18:54:46,703 Verified directories
[mlrun] 2020-03-30 18:54:46,703 Extracting zip
[mlrun] 2020-03-30 18:54:55,181 extracted archive to /User/mlrun/examples/images
[mlrun] 2020-03-30 18:54:55,197 log artifact content at /User/mlrun/examples/images, size: None, db: Y

[mlrun] 2020-03-30 18:54:55,211 run executed, status=completed
final state: succeeded


uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...5b5b95,0,Mar 30 18:54:45,completed,download,host=download-jkl85kind=jobowner=adminv3io_user=admin,archive_url,target_dir=/User/mlrun/examples/images,,content


to track results use .show() or .logs() or in CLI: 
!mlrun get run 983db54276e14ca988217fb7875b5b95  , !mlrun logs 983db54276e14ca988217fb7875b5b95 
[mlrun] 2020-03-30 18:54:57,233 run executed, status=completed
