# Face Recognition Using mlrun with OpenCV And scikit-learn
 A complete pipeline of data processing, model training and serving function deployment

### Install mlrun and kubeflow pipelines

In [None]:
!pip install mlrun
!pip install kfp

### Install dependencies for the code and set config 

In [1]:
# nuclio: ignore
import nuclio

In [None]:
%%nuclio cmd 
pip install cmake
pip install dlib
pip install face_recognition
pip install opencv-contrib-python
pip install imutils
pip install sklearn 
pip install pandas
pip install joblib
pip install v3io_frames

In [2]:
%nuclio config spec.build.baseImage = "python:3.6-jessie"

%nuclio: setting spec.build.baseImage to 'python:3.6-jessie'


### Declare global variables and perform necessary imports 

In [3]:
DATA_PATH = '/User/demos/face-recognition/dataset/'
ARTIFACTS_PATH = '/User/demos/face-recognition/artifacts/'

In [20]:
import os
import zipfile
from urllib.request import urlopen
from io import BytesIO
import cv2
import face_recognition
from imutils import paths
import joblib
from sklearn.linear_model import LogisticRegression
from mlrun.artifacts import TableArtifact
import pandas as pd
import numpy as np
import datetime
import random
import string
import v3io_frames as v3f

### Import and define mlrun functions for the pipeline 

In [5]:
# nuclio: ignore
from mlrun import new_function, code_to_function, NewTask, mount_v3io, new_model_server 
import kfp
from kfp import dsl

In [21]:
def encode_images(context):
    
    client = v3f.Client("framesd:8081", container="users")
    
    # If no train images exist in the predefined path we will train the model on a small dataset of movie actresses
    if not os.path.exists(DATA_PATH + 'input'):
        os.makedirs(DATA_PATH + 'input')
        resp = urlopen('https://iguazio-public.s3.amazonaws.com/roy-actresses/Actresses.zip')
        zip_ref = zipfile.ZipFile(BytesIO(resp.read()), 'r')
        zip_ref.extractall(DATA_PATH + 'input')
        zip_ref.close()
        
    
    #creates a mapping of classes(person's names) to target value
    classes_names = [f for f in os.listdir(DATA_PATH + 'input') if not '.ipynb' in f]
    
    name2idx = {}
    idx2name = {}
    
    for (i, name) in enumerate(classes_names):
        idx2name[i] = name
        name2idx[name] = i
    value2name_dict = {'value':[i for i in idx2name.keys()], 'name':[n for n in idx2name.values()]}
    
    #log name to index mapping into mlrun context
    mapping_df = pd.DataFrame(value2name_dict)
    mapping_df['value'] = mapping_df['value'].astype('str')
    context.log_artifact(TableArtifact('idx2name', df=mapping_df), src_path='idx2name.csv')
    
    #generates a list of paths to labeled images 
    imagePaths = [f for f in paths.list_images(DATA_PATH + 'input') if not '.ipynb' in f]
    knownEncodings = []
    knownLabels = []
    fileNames = []
    for (i, imagePath) in enumerate(imagePaths):
        print("[INFO] processing image {}/{}".format(i + 1, len(imagePaths)))
        #extracts label (person's name) of the image
        name = imagePath.split(os.path.sep)[-2]
        
        #converts image format to RGB for comptability with face_recognition library
        image = cv2.imread(imagePath)
        rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        #detects coordinates of faces bounding boxes
        boxes = face_recognition.face_locations(rgb, model='hog')
        
        #computes embeddings for detected faces
        encodings = face_recognition.face_encodings(rgb, boxes)
        
        #this code assumes that a person's folder in the dataset does not contain an image with a face other then his own
        for enc in encodings:
            file_name = name + '_' + ''.join(random.choices(string.ascii_uppercase + string.digits, k=5))                                                            
            knownEncodings.append(enc)
            knownLabels.append([name2idx[name]])
            fileNames.append(file_name)
            
    #saves computed encodings to avoid repeating computations
    df_x = pd.DataFrame(knownEncodings, columns=['c' + str(i).zfill(3) for i in range(128)]).reset_index(drop=True)
    df_y = pd.DataFrame(knownLabels, columns=['label']).reset_index(drop=True)
    df_details = pd.DataFrame([[np.nan]*3]*len(df_x), columns=['imgUrl', 'camera', 'time'])
    df_details['time'] = [datetime.datetime.utcnow()]*len(df_x)
    data_df = pd.concat([df_x, df_y, df_details], axis=1)
    data_df['fileName'] = fileNames
    client.write(backend='kv', table='iguazio/demos/face-recognition/artifacts/encodings', dfs=data_df, index_cols=['fileName'])
    with open('encodings_path.txt', 'w+') as f:
        f.write('iguazio/demos/face-recognition/artifacts/encodings')
    context.log_artifact('encodings_path', src_path=f.name)

In [22]:
def train(context, processed_data, model_name='model.bst'):
    #trains classifier
    client = v3f.Client("framesd:8081", container="users")
    with open(processed_data.url, 'r') as f:
        t = f.read()
    data_df = client.read(backend="kv", table=t, reset_index=False)

    X_train = data_df[['c'+str(i).zfill(3) for i in range(128)]].values
    y_train = data_df['label'].values
    model = LogisticRegression(multi_class='ovr', solver='lbfgs').fit(X_train, y_train)
    
    #saves and logs model into mlrun context
    joblib.dump(model, model_name)
    context.log_artifact('model', src_path=model_name, labels={'framework': 'sklearn_classifier'})

In [17]:
# nuclio: end-code

In [8]:
# serving function
serving_function = code_to_function(name='recognize-faces', 
                                      filename='./nuclio_face_prediction.ipynb',
                                      runtime='nuclio')
serving_function.with_http(workers=2).add_volume('User','~/')

<mlrun.runtimes.function.RemoteRuntime at 0x7f13d1f657f0>

### Test pipeline functions locally

In [23]:
task = NewTask(handler=encode_images, out_path=ARTIFACTS_PATH)
run = new_function().run(task)

[mlrun] 2019-11-20 15:30:02,752 starting run encode_images uid=4b68b0a791594ca18f56dfe3bad80059  -> 
[INFO] processing image 1/26
[INFO] processing image 2/26
[INFO] processing image 3/26
[INFO] processing image 4/26
[INFO] processing image 5/26
[INFO] processing image 6/26
[INFO] processing image 7/26
[INFO] processing image 8/26
[INFO] processing image 9/26
[INFO] processing image 10/26
[INFO] processing image 11/26
[INFO] processing image 12/26
[INFO] processing image 13/26
[INFO] processing image 14/26
[INFO] processing image 15/26
[INFO] processing image 16/26
[INFO] processing image 17/26
[INFO] processing image 18/26
[INFO] processing image 19/26
[INFO] processing image 20/26
[INFO] processing image 21/26
[INFO] processing image 22/26
[INFO] processing image 23/26
[INFO] processing image 24/26
[INFO] processing image 25/26
[INFO] processing image 26/26



uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...d80059,0,Nov 20 15:30:02,completed,encode_images,kind=handlerowner=iguaziohost=jupyter-vkitg3cn1q-4mhnd-5d667f7858-cm7dd,,,,idx2nameencodings_path


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid 4b68b0a791594ca18f56dfe3bad80059 
[mlrun] 2019-11-20 15:30:32,566 run executed, status=completed


In [24]:
task2 = NewTask(handler=train, inputs={'processed_data': run.outputs['encodings_path']}, out_path=ARTIFACTS_PATH)
train = new_function().run(task2)

[mlrun] 2019-11-20 15:30:59,349 starting run train uid=b4922fb6016b4bde92903fe166f4833e  -> 


uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...f4833e,0,Nov 20 15:30:59,completed,train,kind=handlerowner=iguaziohost=jupyter-vkitg3cn1q-4mhnd-5d667f7858-cm7dd,processed_data,,,model


type result.show() to see detailed results/progress or use CLI:
!mlrun get run --uid b4922fb6016b4bde92903fe166f4833e 
[mlrun] 2019-11-20 15:31:00,401 run executed, status=completed


### Create a function from notebook and build image
supposed to take a few minutes

In [9]:
fn = code_to_function('face-recognition')

In [10]:
fn.build(image='mlrun/face_recognition:latest')

[mlrun] 2019-11-20 14:09:05,608 building image (mlrun/face_recognition:latest)
FROM python:3.6-jessie
WORKDIR /run
RUN pip install cmake
RUN pip install dlib
RUN pip install face_recognition
RUN pip install opencv-contrib-python
RUN pip install imutils
RUN pip install sklearn
RUN pip install pandas
RUN pip install joblib
RUN pip install v3io_frames
RUN pip install mlrun
ENV PYTHONPATH /run
[mlrun] 2019-11-20 14:09:05,612 using in-cluster config.
[mlrun] 2019-11-20 14:09:05,642 Pod mlrun-build-p68qv created
..
[36mINFO[0m[0000] Resolved base name python:3.6-jessie to python:3.6-jessie 
[36mINFO[0m[0000] Resolved base name python:3.6-jessie to python:3.6-jessie 
[36mINFO[0m[0000] Downloading base image python:3.6-jessie     
[36mINFO[0m[0000] Error while retrieving image from cache: getting file info: stat /cache/sha256:0318d80cb241983eda20b905d77fa0bfb06e29e5aabf075c7941ea687f1c125a: no such file or directory 
[36mINFO[0m[0000] Downloading base image python:3.6-jessie     
[3

<mlrun.runtimes.local.LocalRuntime at 0x7f13d1f65b00>

### Create pipeline

In [None]:
@dsl.pipeline(
    name='face recognition pipeline',
    description='Creates and deploys a face recognition model'
)
def face_recognition_pipeline():
    fn.with_code()
    
    encode = fn.as_step(name='encode-images', handler='encode_images', out_path=ARTIFACTS_PATH, outputs=['idx2name', 'encodings_path']).apply(mount_v3io())
    
    train = fn.as_step(name='train', handler='train', out_path=ARTIFACTS_PATH, outputs=['model'], 
                               inputs={'processed_data': encode.outputs['encodings_path']}).apply(mount_v3io())
    
    deploy = serving_function.deploy_step(project='default', models={'face_rec_v1': train.outputs['model']})
    

In [14]:
client = kfp.Client(namespace='default-tenant')

In [15]:
#For debug purposes compile pipeline code
kfp.compiler.Compiler().compile(face_recognition_pipeline, 'face_rec.yaml')

### Run pipeline

In [16]:
arguments = {}
run_result = client.create_run_from_pipeline_func(face_recognition_pipeline, arguments=arguments, run_name='face_rec_1', experiment_name='face_rec')