# Running Faces pipeline

In [72]:
#!pip uninstall -y mlrun

!pip install mlrun
!pip show mlrun

Collecting mlrun
  Using cached https://files.pythonhosted.org/packages/39/b2/8660822b82bbf6a6112694495cee90cd03c51180499ae46060cae30dd89b/mlrun-0.5.2-py3-none-any.whl
Collecting ipython<7.17,>=5.5 (from mlrun)
  Using cached https://files.pythonhosted.org/packages/23/6a/210816c943c9aeeb29e4e18a298f14bf0e118fe222a23e13bfcc2d41b0a4/ipython-7.16.1-py3-none-any.whl
Collecting azure-storage-blob (from mlrun)
  Using cached https://files.pythonhosted.org/packages/b8/84/7e51b3e1156bcb89a20b9ec641d4fced4800aa79daac3a403898c32046be/azure_storage_blob-12.5.0-py2.py3-none-any.whl
Collecting aiohttp>=3.5.0 (from mlrun)
  Using cached https://files.pythonhosted.org/packages/12/a2/ca3ba17c50ebeb3e7473330d8d1ce08fb83506a9bc985bcc0716354d2018/aiohttp-3.6.3-cp37-cp37m-manylinux1_x86_64.whl
Collecting pydantic~=1.5 (from mlrun)
  Using cached https://files.pythonhosted.org/packages/25/69/4be0f2caa2ae3424cd34c0f934b460a02f01aa6897b1d58fc056664b15b7/pydantic-1.6.1-py36.py37.py38-none-any.whl
Collecting n

### create the mlrun project 

In [53]:
from os import path, getenv
from mlrun import new_project, mlconf

#project_name = '-'.join(filter(None, ['getting-started-iris', getenv('V3IO_USERNAME', None)]))
project_name = "faces"
project_path = path.abspath('./')
project = new_project(project_name, project_path)
project.save()
print(f'Project path: {project_path}\nProject name: {project_name}')

Project path: /User/mlrun/demos/faces/notebooks
Project name: faces


In [54]:
out = mlconf.artifact_path or path.abspath('./data')
# {{run.uid}} will be substituted with the run id, so output will be written to different directoried per run
artifact_path = path.join(out, '{{run.uid}}')
%env PYTHONPATH=./

env: PYTHONPATH=./


### set the project functions

In [55]:
from mlrun import mount_v3io
#project.set_function("functions/clean_parquez.py", 'clean', kind='job', image='aviaigz/parquez')
project.set_function("../functions/encode_images.py", 'encode_images', kind='job', image='ml-models/ml-models-gpu')
project.set_function("../functions/train.py", 'train', kind='job', image='ml-models/ml-models-gpu')
project.func('encode_images').apply(mount_v3io())
project.func('train').apply(mount_v3io())
project.func('encode_images').set_env('PYTHONPATH', project_path)
project.func('train').set_env('PYTHONPATH', project_path)
project.func('encode_images').spec.artifact_path = 'User/artifacts'
project.func('train').spec.artifact_path = 'User/artifacts'

<a id="gs-step-create-n-run-ml-pipeline"></a>
## Create and Run a Fully Automated ML Pipeline

You're now ready to create a full ML pipeline.
This is done by using [Kubeflow Pipelines](https://www.kubeflow.org/docs/pipelines/overview/pipelines-overview/), which is integrated into the Iguazio Data Science Platform.
Kubeflow Pipelines is an open-source framework for building and deploying portable, scalable machine-learning workflows based on Docker containers.
MLRun leverages this framework to take your existing code and deploy it as steps in the pipeline.

In [66]:
%%writefile {path.join(project_path, 'workflow.py')}

from kfp import dsl
from mlrun import mount_v3io
from os import path

funcs = {}
project_path = path.abspath('./')
faces_params = {}

# Configure function resources and local settings
def init_functions(functions: dict, project=None, secrets=None):
    project_path = path.abspath('./')
    for f in functions.values():
        f.apply(mount_v3io())
        f.set_env('PYTHONPATH', project_path)
        f.spec.artifact_path = 'User/artifacts'
        #f.spec.service_account='mlrun-api'
        
        
# Create a Kubeflow Pipelines pipeline
@dsl.pipeline(
    name = "faces-pipeline",
    description = "faces demo pipeline"
)
def kfpipeline():

    # encode images
    encode = funcs['encode_images'].as_step(
        name="encode_images",
        params=faces_params,
        outputs=['encode']
    )
    
    # train the model based on the images
    train = funcs['train'].as_step(
        name="train",
        params = faces_params,
        inputs={'table': encode.outputs},                       
        outputs=['training']
    )

Overwriting /User/mlrun/demos/faces/notebooks/workflow.py


<a id="gs-register-workflow"></a>
#### Register the Workflow

Use the `set_workflow` MLRun project method to register your workflow with MLRun.
The following code sets the `name` parameter to the selected workflow name ("main") and the `code` parameter to the name of the workflow file that is found in your project directory (**workflow.py**).

In [67]:
# Register the workflow file as "main"
project.set_workflow('main', 'workflow.py')

In [68]:
project.save()

In [69]:
run_id = project.run(
    'main',
    arguments={}, 
    
    artifact_path=path.abspath(path.join('pipeline','{{workflow.uid}}'),
    
                              )
    ,dirty=True)



> 2020-10-15 07:22:01,628 [info] Pipeline run id=3ddc0d1f-d42f-414a-96e1-5d86f23573c0, check UI or DB for progress
