# Running Faces pipeline

In [72]:
import sys
import subprocess
import pkg_resources
import IPython

required = {'mlrun'}
installed = {pkg.key for pkg in pkg_resources.working_set}
missing = required - installed
previously_installed = required.intersection(installed)

if missing:
    print(f'Installing {",".join(missing)}')
    python = sys.executable
    subprocess.check_call([python, '-m', 'pip', 'install', *missing], stdout=subprocess.DEVNULL)
    print('Restarting kernel')
    IPython.Application.instance().kernel.do_shutdown(True) #automatically restarts kernel
if previously_installed:
    print(f'Already installed: {",".join(previously_installed)}')

Already installed: mlrun


### create the mlrun project 

In [73]:
from os import path, getenv
from mlrun import new_project, mlconf

#project_name = '-'.join(filter(None, ['getting-started-iris', getenv('V3IO_USERNAME', None)]))
project_name = "faces"
project_path = path.abspath('./')
project = new_project(project_name, project_path)
project.save()
print(f'Project path: {project_path}\nProject name: {project_name}')

Project path: /User/mlrun/demos/faces/notebooks
Project name: faces


In [74]:
out = mlconf.artifact_path or path.abspath('./data')
# {{run.uid}} will be substituted with the run id, so output will be written to different directoried per run
artifact_path = path.join(out, '{{run.uid}}')
#%env PYTHONPATH=./

In [75]:
from mlrun import mount_v3io, code_to_function
encode_images_func = code_to_function('encode_images', kind='job', filename='functions/encode_images.py')
#encode_images_func.spec.build.commands = ['pip install face_recognition imutils opencv-python-headless v3io_frames==0.7.37']
encode_images_func.spec.build.base_image = 'aviaigz/ml-models:0.5.4'
encode_images_func.deploy()

> 2020-11-17 09:41:45,006 [info] running build to add mlrun package, set with_mlrun=False to skip if its already in the image
> 2020-11-17 09:41:45,018 [info] starting remote build, image: .mlrun/func-default-encode-images-latest
[36mINFO[0m[0000] Resolved base name aviaigz/ml-models:0.5.4 to aviaigz/ml-models:0.5.4 
[36mINFO[0m[0000] Resolved base name aviaigz/ml-models:0.5.4 to aviaigz/ml-models:0.5.4 
[36mINFO[0m[0000] Retrieving image manifest aviaigz/ml-models:0.5.4 
[36mINFO[0m[0002] Retrieving image manifest aviaigz/ml-models:0.5.4 
[36mINFO[0m[0003] Built cross stage deps: map[]                
[36mINFO[0m[0003] Retrieving image manifest aviaigz/ml-models:0.5.4 
[36mINFO[0m[0004] Retrieving image manifest aviaigz/ml-models:0.5.4 
[36mINFO[0m[0006] Unpacking rootfs as cmd RUN pip install mlrun requires it. 
[36mINFO[0m[0198] Taking snapshot of full filesystem...        
[36mINFO[0m[0223] Resolving paths                              
[36mINFO[0m[0249] RUN pi

True

In [76]:

from mlrun import mount_v3io, code_to_function
train_func = code_to_function('train', kind='job', filename='functions/train.py')
train_func.spec.build.base_image = 'aviaigz/ml-models:0.5.4'
train_func.deploy()

> 2020-11-17 09:46:25,608 [info] running build to add mlrun package, set with_mlrun=False to skip if its already in the image
> 2020-11-17 09:46:25,615 [info] starting remote build, image: .mlrun/func-default-train-latest
[36mINFO[0m[0000] Resolved base name aviaigz/ml-models:0.5.4 to aviaigz/ml-models:0.5.4 
[36mINFO[0m[0000] Resolved base name aviaigz/ml-models:0.5.4 to aviaigz/ml-models:0.5.4 
[36mINFO[0m[0000] Retrieving image manifest aviaigz/ml-models:0.5.4 
[36mINFO[0m[0002] Retrieving image manifest aviaigz/ml-models:0.5.4 
[36mINFO[0m[0003] Built cross stage deps: map[]                
[36mINFO[0m[0003] Retrieving image manifest aviaigz/ml-models:0.5.4 
[36mINFO[0m[0004] Retrieving image manifest aviaigz/ml-models:0.5.4 
[36mINFO[0m[0006] Unpacking rootfs as cmd RUN pip install mlrun requires it. 
[36mINFO[0m[0167] Taking snapshot of full filesystem...        
[36mINFO[0m[0193] Resolving paths                              
[36mINFO[0m[0218] RUN pip instal

True

In [77]:
import nuclio
import os
from mlrun import mount_v3io, code_to_function
nuclio_face_prediction_func = code_to_function('nuclio_face_prediction', kind='nuclio', filename='nuclio-face-prediction.ipynb')
# set the API/trigger, attach the home dir to the function
nuclio_face_prediction_func.with_http(workers=2).apply(mount_v3io())

# set environment variables
nuclio_face_prediction_func.set_env('MODELS_PATH', '/User/mlrun/demos/faces/notebooks/functions/models.py')
nuclio_face_prediction_func.set_env('MODEL_PATH', '/User/faces/artifacts/model.bst')
nuclio_face_prediction_func.set_env('CLASSES_MAP', '/User/faces/artifacts/idx2name.csv')
nuclio_face_prediction_func.set_env('V3IO_ACCESS_KEY', os.environ['V3IO_ACCESS_KEY'])
nuclio_face_prediction_func.spec.build.base_image = 'mlrun/ml-models'
#nuclio_face_prediction_func.deploy()

In [78]:
import nuclio
import os
from mlrun import mount_v3io, code_to_function
nuclio_api_serving_func = code_to_function('nuclio_api_serving', kind='nuclio', filename='nuclio-api-serving.ipynb')
# set the API/trigger, attach the home dir to the function
nuclio_api_serving_func.with_http(workers=2).apply(mount_v3io())

# set environment variables
nuclio_api_serving_func.set_env('DATA_PATH' ,'/User/faces/dataset/')
nuclio_api_serving_func.set_env('V3IO_ACCESS_KEY', os.environ['V3IO_ACCESS_KEY'])
nuclio_api_serving_func.spec.build.base_image = 'mlrun/ml-models'
#nuclio_api_serving_func.deploy()

### set the project functions

In [80]:
from mlrun import mount_v3io, code_to_function
#project.set_function("functions/clean_parquez.py", 'clean', kind='job', image='aviaigz/parquez')

ARTIFACTS_PATH ='/User/faces/artifacts/'

project.set_function(encode_images_func,name = 'encode_images')
project.set_function(train_func,name = 'train')
project.set_function(nuclio_face_prediction_func,name = 'nuclio_face_prediction')
project.set_function(nuclio_face_prediction_func,name = 'nuclio_api_serving')

project.func('encode_images').apply(mount_v3io())
project.func('train').apply(mount_v3io())
project.func('nuclio_face_prediction').apply(mount_v3io())
project.func('nuclio_api_serving').apply(mount_v3io())


project.func('encode_images').set_env('PYTHONPATH', project_path)
project.func('train').set_env('PYTHONPATH', project_path)
project.func('nuclio_face_prediction').set_env('PYTHONPATH', project_path)
project.func('nuclio_api_serving').set_env('PYTHONPATH', project_path)


project.func('encode_images').spec.artifact_path = ARTIFACTS_PATH
project.func('train').spec.artifact_path = ARTIFACTS_PATH
project.func('nuclio_face_prediction').spec.artifact_path = ARTIFACTS_PATH
project.func('nuclio_api_serving').spec.artifact_path = ARTIFACTS_PATH





<a id="gs-step-create-n-run-ml-pipeline"></a>
## Create and Run a Fully Automated ML Pipeline

You're now ready to create a full ML pipeline.
This is done by using [Kubeflow Pipelines](https://www.kubeflow.org/docs/pipelines/overview/pipelines-overview/), which is integrated into the Iguazio Data Science Platform.
Kubeflow Pipelines is an open-source framework for building and deploying portable, scalable machine-learning workflows based on Docker containers.
MLRun leverages this framework to take your existing code and deploy it as steps in the pipeline.

In [81]:
%%writefile {path.join(project_path, 'workflow.py')}

from kfp import dsl
from mlrun import mount_v3io
from os import getenv, path

DATA_PATH ='/User/faces/dataset/'
ARTIFACTS_PATH ='/User/faces/artifacts/'
MODELS_PATH = '/User/mlrun/demos/faces/notebooks/functions/models.py'
FRAMES_URL = 'framesd:8081'
V3IO_ACCESS_KEY = getenv('V3IO_ACCESS_KEY')
USER_NAME = getenv('V3IO_USERNAME')
ENCODINGS_PATH = '/'.join([USER_NAME,'faces','encodings']) 
WEB_API = "http://v3io-webapi:8081"


funcs = {}
project_path = path.abspath('./')
faces_params = {'data_path' : DATA_PATH,
                'artifacts_path': ARTIFACTS_PATH,
                'models_path': MODELS_PATH,
                'frames_url': FRAMES_URL,
                'token' : V3IO_ACCESS_KEY, 
                'encodings_path': ENCODINGS_PATH }

# Configure function resources and local settings
def init_functions(functions: dict, project=None, secrets=None):
    project_path = path.abspath('./')
    for f in functions.values():
        f.apply(mount_v3io())
        f.set_env('PYTHONPATH', project_path)
        f.spec.artifact_path = ARTIFACTS_PATH
        
        
        
# Create a Kubeflow Pipelines pipeline
@dsl.pipeline(
    name = "faces-pipeline",
    description = "faces demo pipeline"
)
def kfpipeline():
    # encode images
    encode = funcs['encode_images'].as_step(
        name="encode_images",
        params=faces_params,
        outputs=['encode']
    )
    
    # train the model based on the images
    train = funcs['train'].as_step(
        name="train",
        params = faces_params,
        inputs={'table': encode.outputs},                       
        outputs=['training']
    )
    # deploy the model as nuclio function
    nuclio_face_prediction = funcs['nuclio_face_prediction'].deploy_step(                
        models={"nuclio_face_prediction": train.outputs['training']}
    )    
    
    # deploy api serving as nuclio function
    nuclio_api_serving = funcs['nuclio_api_serving'].deploy_step()
    nuclio_api_serving.after(nuclio_face_prediction)
    
    

Overwriting /User/mlrun/demos/faces/notebooks/workflow.py


<a id="gs-register-workflow"></a>
#### Register the Workflow

Use the `set_workflow` MLRun project method to register your workflow with MLRun.
The following code sets the `name` parameter to the selected workflow name ("main") and the `code` parameter to the name of the workflow file that is found in your project directory (**workflow.py**).

In [82]:
# Register the workflow file as "main"
project.set_workflow('main', 'workflow.py')

In [83]:
project.save()

In [84]:
run_id = project.run(
    'main',
    arguments={}, 
    
    artifact_path=path.abspath(path.join('pipeline','{{workflow.uid}}'),
    
                              )
    ,dirty=True)

> 2020-11-17 09:54:22,064 [info] Pipeline run id=9d6457b1-88c6-4139-b3b4-e1b503064414, check UI or DB for progress
