# Face Recognition Using mlrun with OpenCV And scikit-learn
 A complete pipeline of data processing, model training and serving function deployment

### Install mlrun and kubeflow pipelines

In [None]:
!pip install mlrun
!pip install kfp

### Install dependencies for the code and set config 

In [1]:
# nuclio: ignore
import nuclio

In [None]:
%%nuclio cmd 
pip install cmake
pip install dlib
pip install face_recognition
pip install opencv-contrib-python
pip install imutils
pip install sklearn 
pip install pandas
pip install joblib
pip install v3io_frames

In [2]:
%nuclio config spec.build.baseImage = "python:3.6-jessie"

%nuclio: setting spec.build.baseImage to 'python:3.6-jessie'


### Declare global variables and perform necessary imports 

In [3]:
DATA_PATH = '/User/demos/face-recognition/dataset/'
ARTIFACTS_PATH = '/User/demos/face-recognition/artifacts/'

In [4]:
import os
import zipfile
from urllib.request import urlopen
from io import BytesIO
import cv2
import face_recognition
from imutils import paths
import joblib
from sklearn.linear_model import LogisticRegression
from mlrun.artifacts import TableArtifact
import pandas as pd
import numpy as np
import random
import string
import v3io_frames as v3f

### Import and define mlrun functions for the pipeline 

In [5]:
# nuclio: ignore
from mlrun import new_function, code_to_function, NewTask, mount_v3io, new_model_server 
import kfp
from kfp import dsl

In [6]:
def encode_images(context):
    
    client = v3f.Client("framesd:8081", container="users")
    
    # If no train images exist in the predefined path we will train the model on a small dataset of movie actresses
    if not os.path.exists(DATA_PATH + 'unprocessed'):
        os.makedirs(DATA_PATH + 'unprocessed')
        resp = urlopen('https://iguazio-public.s3.amazonaws.com/roy-actresses/Actresses.zip')
        zip_ref = zipfile.ZipFile(BytesIO(resp.read()), 'r')
        zip_ref.extractall(DATA_PATH + 'unprocessed')
        zip_ref.close()
        
    
    #creates a mapping of classes(person's names) to target value
    classes_names = [f for f in os.listdir(DATA_PATH + 'unprocessed') if not '.ipynb' in f]
    
    name2idx = {}
    idx2name = {}
    
    for (i, name) in enumerate(classes_names):
        idx2name[i] = name
        name2idx[name] = i
    value2name_dict = {'value':[i for i in idx2name.keys()], 'name':[n for n in idx2name.values()]}
    
    #log name to index mapping into mlrun context
    mapping_df = pd.DataFrame(value2name_dict)
    mapping_df['value'] = mapping_df['value'].astype('str')
    context.log_artifact(TableArtifact('idx2name', df=mapping_df), src_path='idx2name.csv')
    
    #generates a list of paths to labeled images 
    imagePaths = [f for f in paths.list_images(DATA_PATH + 'unprocessed') if not '.ipynb' in f]
    knownEncodings = []
    knownLabels = []
    fileNames = []
    for (i, imagePath) in enumerate(imagePaths):
        print("[INFO] processing image {}/{}".format(i + 1, len(imagePaths)))
        #extracts label (person's name) of the image
        name = imagePath.split(os.path.sep)[-2]
        
        #converts image format to RGB for comptability with face_recognition library
        image = cv2.imread(imagePath)
        rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        #detects coordinates of faces bounding boxes
        boxes = face_recognition.face_locations(rgb, model='hog')
        
        #computes embeddings for detected faces
        encodings = face_recognition.face_encodings(rgb, boxes)
        
        #this code assumes that a person's folder in the dataset does not contain an image with a face other then his own
        for enc in encodings:
            file_name = name.replace('_', '') + ''.join(random.choices(string.ascii_uppercase + string.digits, k=5))                                                            
            knownEncodings.append(enc)
            knownLabels.append([name2idx[name]])
            fileNames.append(file_name)
            
    #saves computed encodings to avoid repeating computations
    df_x = pd.DataFrame(knownEncodings, columns=['c' + str(i).zfill(3) for i in range(128)]).reset_index(drop=True)
    df_y = pd.DataFrame(knownLabels, columns=['label']).reset_index(drop=True)
    df_details = pd.DataFrame([[np.nan]*3]*len(df_x), columns=['imgUrl', 'camera', 'time'])
    data_df = pd.concat([df_x, df_y, df_details], axis=1)
    data_df['fileName'] = fileNames
    client.write(backend='kv', table='iguazio/demos/face-recognition/artifacts/encodings', dfs=data_df, index_cols=['fileName'])
    with open('encodings_path.txt', 'w+') as f:
        f.write('iguazio/demos/face-recognition/artifacts/encodings')
    context.log_artifact('encodings_path', src_path=f.name)

In [7]:
def train(context, processed_data, model_name='model.bst'):
    #trains classifier
    client = v3f.Client("framesd:8081", container="users")
    with open(processed_data.url, 'r') as f:
        t = f.read()
    data_df = client.read(backend="kv", table=t, reset_index=False)

    X_train = data_df[['c'+str(i).zfill(3) for i in range(128)]].values
    y_train = data_df['label'].values
    model = LogisticRegression(multi_class='ovr', solver='lbfgs').fit(X_train, y_train)
    
    #saves and logs model into mlrun context
    joblib.dump(model, model_name)
    context.log_artifact('model', src_path=model_name, labels={'framework': 'sklearn_classifier'})

In [8]:
# nuclio: end-code

In [9]:
# serving function
serving_function = code_to_function(name='recognize-faces', 
                                      filename='./nuclio_face_prediction.ipynb',
                                      runtime='nuclio')
serving_function.with_http(workers=2).add_volume('User','~/')

<mlrun.runtimes.function.RemoteRuntime at 0x7f0fc28d1a20>

### Test pipeline functions locally

In [10]:
task = NewTask(handler=encode_images, out_path=ARTIFACTS_PATH)
run = new_function().run(task)

[mlrun] 2019-11-18 17:49:06,263 starting run mlrun-f5a224 uid=5387647056a84da59c3a697352eea22e  -> 
[INFO] processing image 1/99
[INFO] processing image 2/99
[INFO] processing image 3/99
[INFO] processing image 4/99
[INFO] processing image 5/99
[INFO] processing image 6/99
[INFO] processing image 7/99
[INFO] processing image 8/99
[INFO] processing image 9/99
[INFO] processing image 10/99
[INFO] processing image 11/99
[INFO] processing image 12/99
[INFO] processing image 13/99
[INFO] processing image 14/99
[INFO] processing image 15/99
[INFO] processing image 16/99
[INFO] processing image 17/99
[INFO] processing image 18/99
[INFO] processing image 19/99
[INFO] processing image 20/99
[INFO] processing image 21/99
[INFO] processing image 22/99
[INFO] processing image 23/99
[INFO] processing image 24/99
[INFO] processing image 25/99
[INFO] processing image 26/99
[INFO] processing image 27/99
[INFO] processing image 28/99
[INFO] processing image 29/99
[INFO] processing image 30/99
[INFO] pr

uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...eea22e,0,Nov 18 17:49:06,completed,mlrun-f5a224,v3io_user=iguaziokind=handlerowner=iguaziohost=jupyter-e9rk4i3j33-rwnkt-7646c7c98c-5mrq5,,,,idx2nameencodings_path


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid 5387647056a84da59c3a697352eea22e 
[mlrun] 2019-11-18 17:52:50,620 run executed, status=completed


In [11]:
task2 = NewTask(handler=train, inputs={'processed_data': run.outputs['encodings_path']}, out_path=ARTIFACTS_PATH)
train = new_function().run(task2)

[mlrun] 2019-11-18 17:53:33,035 starting run mlrun-291262 uid=d32a7c9616f9461595e1a2c55538b50b  -> 


uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...38b50b,0,Nov 18 17:53:33,completed,mlrun-291262,v3io_user=iguaziokind=handlerowner=iguaziohost=jupyter-e9rk4i3j33-rwnkt-7646c7c98c-5mrq5,processed_data,,,model


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid d32a7c9616f9461595e1a2c55538b50b 
[mlrun] 2019-11-18 17:53:33,293 run executed, status=completed


### Create a function from notebook and build image
supposed to take a few minutes

In [12]:
fn = code_to_function('face-recognition')

In [None]:
fn.build(image='mlrun/face_recognition:latest')

### Create pipeline

In [14]:
@dsl.pipeline(
    name='face recognition pipeline',
    description='Creates and deploys a face recognition model'
)
def face_recognition_pipeline():
    fn.with_code()
    
    encode = fn.as_step(name='encode-images', handler='encode_images', out_path=ARTIFACTS_PATH, outputs=['idx2name', 'encodings_path']).apply(mount_v3io())
    
    train = fn.as_step(name='train', handler='train', out_path=ARTIFACTS_PATH, outputs=['model'], 
                               inputs={'processed_data': encode.outputs['encodings_path']}).apply(mount_v3io())
    
    deploy = serving_function.deploy_step(project='default', models={'face_rec_v1': train.outputs['model']})
    

In [15]:
client = kfp.Client(namespace='default-tenant')

In [16]:
#For debug purposes compile pipeline code
kfp.compiler.Compiler().compile(face_recognition_pipeline, 'face_rec.yaml')

### Run pipeline

In [17]:
arguments = {}
run_result = client.create_run_from_pipeline_func(face_recognition_pipeline, arguments=arguments, run_name='face_rec_1', experiment_name='face_rec')