# Exporting and Importing MLRun Functions

In [1]:
# nuclio: ignore
import nuclio

## Write a function

In [2]:
import os
import zipfile

def open_archive(context, 
                 target_dir: str = 'content',
                 archive_url: str = ''):
    """Open a file/object archive into a target directory
    
    :param target_dir:   target directory
    :param archive_url:  source archive path/url
    
    :returns: content dir
    """
        
    # Define locations
    os.makedirs(target_dir, exist_ok=True)
    context.logger.info('Verified directories')
    
    # Extract dataset from zip
    context.logger.info('Extracting zip')
    zip_ref = zipfile.ZipFile(archive_url, 'r')
    zip_ref.extractall(target_dir)
    zip_ref.close()
    
    context.logger.info(f'extracted archive to {target_dir}')
    context.log_artifact('content', local_path=target_dir)


In [3]:
# nuclio: end-code

## Export to a file

In [4]:
import mlrun

In [5]:
# create job function object from notebook code
fn = mlrun.code_to_function('file_utils', runtime='job', with_doc=True,
                            handler=open_archive, image='mlrun/mlrun')

# add metadata (for templates and reuse)
fn.spec.default_handler = 'open_archive'
fn.spec.description = "this function opens a zip archive into a local/mounted folder"
fn.metadata.categories = ['fileutils']
fn.metadata.labels = {'author': 'me'}

[mlrun] 2020-03-15 14:22:40,718 "runtime=" param is deprecated, use "kind="


In [6]:
print(fn.to_yaml())

kind: job
metadata:
  name: file-utils
  tag: ''
  project: ''
  labels:
    author: me
  categories:
  - fileutils
spec:
  command: ''
  args: []
  image: mlrun/mlrun
  volumes: []
  volume_mounts: []
  env: []
  default_handler: open_archive
  entry_points:
    open_archive:
      name: open_archive
      doc: Open a file/object archive into a target directory
      parameters:
      - name: context
      - name: target_dir
        type: str
        doc: target directory
        default: content
      - name: archive_url
        type: str
        doc: source archive path/url
      outputs:
      - doc: content dir
      lineno: 6
  description: this function opens a zip archive into a local/mounted folder
  build:
    functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlciBvbiAyMDIwLTAzLTE1IDE0OjIyCgppbXBvcnQgb3MKaW1wb3J0IHppcGZpbGUKCmRlZiBvcGVuX2FyY2hpdmUoY29udGV4dCwgCiAgICAgICAgICAgICAgICAgdGFyZ2V0X2Rpcjogc3RyID0gJ2NvbnRlbnQnLAogICAgICAgICAgICAgICAgIGFyY2hpdm

In [7]:
# save to a file (and can be pushed to a git)
fn.export('function.yaml')

[mlrun] 2020-03-15 14:22:43,708 function spec saved to path: function.yaml


<mlrun.runtimes.kubejob.KubejobRuntime at 0x7f7a460aa0f0>

## Import the function from Github and run

In [8]:
mlrun.mlconf.dbpath = 'http://mlrun-api:8080'

In [14]:
# load from local file
xfn = mlrun.import_function('./function.yaml')

# load function from MLRun functions hub 
# xfn = mlrun.import_function('hub://open_archive')

# configute it: mount on iguazio fabric, set as interactive (return stdout)
xfn.apply(mlrun.mount_v3io())

# create and run the task
images_path = '/User/mlrun/examples/images'
open_archive_task = mlrun.NewTask('download',  
    params={'target_dir': images_path},
    inputs={'archive_url': 'http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip'})

### Test locally

In [15]:
xfn.verbose = True
run = mlrun.run_local(open_archive_task, xfn)

[mlrun] 2020-03-15 14:25:45,466 artifact path is not defined or is local, artifacts will not be visible in the UI
[mlrun] 2020-03-15 14:25:45,473 starting run download uid=f956f9d22e32421c91f4a0c2d2555862  -> http://mlrun-api:8080
[mlrun] 2020-03-15 14:25:45,508 downloading http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip to local tmp
[mlrun] 2020-03-15 14:25:46,927 Verified directories
[mlrun] 2020-03-15 14:25:46,927 Extracting zip
[mlrun] 2020-03-15 14:25:54,683 extracted archive to /User/mlrun/examples/images
[mlrun] 2020-03-15 14:25:54,692 log artifact content at /User/mlrun/examples/images, size: None, db: Y



uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...555862,0,Mar 15 14:25:45,completed,download,kind=owner=adminhost=jupyter-db8d675b8-rcpjx,archive_url,target_dir=/User/mlrun/examples/images,,content


to track results use .show() or .logs() or in CLI: 
!mlrun get run f956f9d22e32421c91f4a0c2d2555862 --project default , !mlrun logs f956f9d22e32421c91f4a0c2d2555862 --project default
[mlrun] 2020-03-15 14:25:54,723 run executed, status=completed


### Run as a cluster job

In [11]:
run = xfn.run(open_archive_task)

[mlrun] 2020-03-15 13:14:28,774 artifact path is not defined or is local,artifacts will not be visible in the UI
[mlrun] 2020-03-15 13:14:28,781 starting run download uid=a48537cda5e44b598d47a877dec79252  -> http://mlrun-api:8080
[mlrun] 2020-03-15 13:14:28,858 Job is running in the background, pod: download-s64jl
[mlrun] 2020-03-15 13:14:33,664 artifact path is not defined or is local,artifacts will not be visible in the UI
[mlrun] 2020-03-15 13:14:33,695 downloading http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip to local tmp
[mlrun] 2020-03-15 13:14:35,090 Verified directories
[mlrun] 2020-03-15 13:14:35,090 Extracting zip
[mlrun] 2020-03-15 13:14:43,390 extracted archive to /User/mlrun/examples/images
[mlrun] 2020-03-15 13:14:43,397 log artifact content at /User/mlrun/examples/images, size: None, db: Y

[mlrun] 2020-03-15 13:14:43,405 run executed, status=completed
final state: succeeded


uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...c79252,0,Mar 15 13:14:33,completed,download,host=download-s64jlkind=jobowner=admin,archive_url,target_dir=/User/mlrun/examples/images,,content


to track results use .show() or .logs() or in CLI: 
!mlrun get run a48537cda5e44b598d47a877dec79252  , !mlrun logs a48537cda5e44b598d47a877dec79252 
[mlrun] 2020-03-15 13:14:47,082 run executed, status=completed
