# Running Faces pipeline

In [69]:
#!pip uninstall -y mlrun

#!pip install mlrun
!pip show mlrun

### create the mlrun project 

In [70]:
from os import path, getenv
from mlrun import new_project, mlconf

#project_name = '-'.join(filter(None, ['getting-started-iris', getenv('V3IO_USERNAME', None)]))
project_name = "faces"
project_path = path.abspath('./')
project = new_project(project_name, project_path)
project.save()
print(f'Project path: {project_path}\nProject name: {project_name}')

Project path: /User/mlrun/demos/faces/notebooks
Project name: faces


In [71]:
out = mlconf.artifact_path or path.abspath('./data')
# {{run.uid}} will be substituted with the run id, so output will be written to different directoried per run
artifact_path = path.join(out, '{{run.uid}}')
%env PYTHONPATH=./

env: PYTHONPATH=./


In [72]:
from mlrun import mount_v3io, code_to_function
encode_images_func = code_to_function('encode_images', kind='job', filename='functions/encode_images.py')
encode_images_func.spec.build.commands = ['pip install face_recognition imutils opencv-python-headless v3io_frames']
encode_images_func.spec.build.base_image = 'mlrun/ml-models'
encode_images_func.deploy()

> 2020-10-20 20:42:07,619 [info] starting remote build, image: .mlrun/func-default-encode-images-latest
[36mINFO[0m[0000] Resolved base name mlrun/ml-models:0.5.1 to mlrun/ml-models:0.5.1 
[36mINFO[0m[0000] Resolved base name mlrun/ml-models:0.5.1 to mlrun/ml-models:0.5.1 
[36mINFO[0m[0000] Retrieving image manifest mlrun/ml-models:0.5.1 
[36mINFO[0m[0002] Retrieving image manifest mlrun/ml-models:0.5.1 
[36mINFO[0m[0004] Built cross stage deps: map[]                
[36mINFO[0m[0004] Retrieving image manifest mlrun/ml-models:0.5.1 
[36mINFO[0m[0005] Retrieving image manifest mlrun/ml-models:0.5.1 
[36mINFO[0m[0006] Unpacking rootfs as cmd RUN pip install face_recognition imutils opencv-python-headless v3io_frames requires it. 
[36mINFO[0m[0173] Taking snapshot of full filesystem...        
[36mINFO[0m[0185] Resolving paths                              
[36mINFO[0m[0209] RUN pip install face_recognition imutils opencv-python-headless v3io_frames 
[36mINFO[0m[020

True

In [73]:
from mlrun import mount_v3io, code_to_function
train_func = code_to_function('train', kind='job', filename='functions/train.py')
train_func.spec.build.commands = ['pip install v3io_frames']
train_func.spec.build.base_image = 'mlrun/ml-models'
train_func.deploy()

> 2020-10-20 20:49:01,546 [info] starting remote build, image: .mlrun/func-default-train-latest
[36mINFO[0m[0000] Resolved base name mlrun/ml-models:0.5.1 to mlrun/ml-models:0.5.1 
[36mINFO[0m[0000] Resolved base name mlrun/ml-models:0.5.1 to mlrun/ml-models:0.5.1 
[36mINFO[0m[0000] Retrieving image manifest mlrun/ml-models:0.5.1 
[36mINFO[0m[0002] Retrieving image manifest mlrun/ml-models:0.5.1 
[36mINFO[0m[0004] Built cross stage deps: map[]                
[36mINFO[0m[0004] Retrieving image manifest mlrun/ml-models:0.5.1 
[36mINFO[0m[0005] Retrieving image manifest mlrun/ml-models:0.5.1 
[36mINFO[0m[0006] Unpacking rootfs as cmd RUN pip install v3io_frames requires it. 
[36mINFO[0m[0154] Taking snapshot of full filesystem...        
[36mINFO[0m[0175] Resolving paths                              
[36mINFO[0m[0199] RUN pip install v3io_frames                  
[36mINFO[0m[0199] cmd: /bin/sh                                 
[36mINFO[0m[0199] args: [-c pip inst

True

### set the project functions

In [75]:
from mlrun import mount_v3io, code_to_function
#project.set_function("functions/clean_parquez.py", 'clean', kind='job', image='aviaigz/parquez')

ARTIFACTS_PATH ='/User/faces/artifacts/'

project.set_function(encode_images_func,name = 'encode_images')
project.set_function(train_func,name = 'train')
project.func('encode_images').apply(mount_v3io())
project.func('train').apply(mount_v3io())
project.func('encode_images').set_env('PYTHONPATH', project_path)
project.func('train').set_env('PYTHONPATH', project_path)
project.func('encode_images').spec.artifact_path = ARTIFACTS_PATH
project.func('train').spec.artifact_path = ARTIFACTS_PATH



<a id="gs-step-create-n-run-ml-pipeline"></a>
## Create and Run a Fully Automated ML Pipeline

You're now ready to create a full ML pipeline.
This is done by using [Kubeflow Pipelines](https://www.kubeflow.org/docs/pipelines/overview/pipelines-overview/), which is integrated into the Iguazio Data Science Platform.
Kubeflow Pipelines is an open-source framework for building and deploying portable, scalable machine-learning workflows based on Docker containers.
MLRun leverages this framework to take your existing code and deploy it as steps in the pipeline.

In [76]:
%%writefile {path.join(project_path, 'workflow.py')}

from kfp import dsl
from mlrun import mount_v3io
from os import getenv, path

DATA_PATH ='/User/faces/dataset/'
ARTIFACTS_PATH ='/User/faces/artifacts/'
MODELS_PATH = '/User/mlrun/demos/faces/notebooks/functions/models.py'
FRAMES_URL = 'framesd:8081'
V3IO_ACCESS_KEY = getenv('V3IO_ACCESS_KEY')
USER_NAME = getenv('V3IO_USERNAME')
ENCODINGS_PATH = '/'.join([USER_NAME,'faces','encodings']) 
WEB_API = "http://v3io-webapi:8081"


funcs = {}
project_path = path.abspath('./')
faces_params = {'data_path' : DATA_PATH,
                'artifacts_path': ARTIFACTS_PATH,
                'models_path': MODELS_PATH,
                'frames_url': FRAMES_URL,
                'token' : V3IO_ACCESS_KEY, 
                'encodings_path': ENCODINGS_PATH }

# Configure function resources and local settings
def init_functions(functions: dict, project=None, secrets=None):
    project_path = path.abspath('./')
    for f in functions.values():
        f.apply(mount_v3io())
        f.set_env('PYTHONPATH', project_path)
        f.spec.artifact_path = ARTIFACTS_PATH
        
        
        
# Create a Kubeflow Pipelines pipeline
@dsl.pipeline(
    name = "faces-pipeline",
    description = "faces demo pipeline"
)
def kfpipeline():

    # encode images
    encode = funcs['encode_images'].as_step(
        name="encode_images",
        params=faces_params,
        outputs=['encode']
    )
    
    # train the model based on the images
    train = funcs['train'].as_step(
        name="train",
        params = faces_params,
        inputs={'table': encode.outputs},                       
        outputs=['training']
    )

Overwriting /User/mlrun/demos/faces/notebooks/workflow.py


<a id="gs-register-workflow"></a>
#### Register the Workflow

Use the `set_workflow` MLRun project method to register your workflow with MLRun.
The following code sets the `name` parameter to the selected workflow name ("main") and the `code` parameter to the name of the workflow file that is found in your project directory (**workflow.py**).

In [77]:
# Register the workflow file as "main"
project.set_workflow('main', 'workflow.py')

In [78]:
project.save()

In [79]:
run_id = project.run(
    'main',
    arguments={}, 
    
    artifact_path=path.abspath(path.join('pipeline','{{workflow.uid}}'),
    
                              )
    ,dirty=True)

> 2020-10-20 20:54:33,469 [info] Pipeline run id=115b5ede-b480-4d40-a862-317c431bdb79, check UI or DB for progress
