# Named Entity Recognition pipeline

In [36]:
EXPERIMENT_NAME = 'named-entity-recognition1'
#BUCKET = "kubeflow-sharkheads"
#GCP_SECRET="kfp.gcp.use_gcp_secret('user-gcp-sa')"

## Imports

In [4]:
import kfp
from kfp import compiler
import kfp.components as comp
import kfp.dsl as dsl
from kfp import gcp

## Load components

In [5]:
preprocess_operation = kfp.components.load_component_from_file(
    '/bd-fs-mnt/TenantShare/repo/code/kubeflow-NER/components/preprocess/component.yaml')
help(preprocess_operation)

train_operation = kfp.components.load_component_from_file(
    '/bd-fs-mnt/TenantShare/repo/code/kubeflow-NER/components/train/component.yaml')
help(train_operation)

ai_platform_deploy_operation = comp.load_component_from_file(
    "/bd-fs-mnt/TenantShare/repo/code/kubeflow-NER/components/deploy/component.yaml")
help(ai_platform_deploy_operation)

Help on function preprocess:

preprocess(input_1_uri: 'GCSPath', output_x_uri_template: 'GCSPath', output_y_uri_template: 'GCSPath', output_preprocessing_state_uri_template: 'GCSPath')
    preprocess
    Performs the IOB preprocessing.

Help on function train:

train(input_x_uri: 'GCSPath', input_y_uri: 'GCSPath', input_job_dir_uri: 'GCSPath', input_tags: int, input_words: int, input_dropout, output_model_uri_template: 'GCSPath')
    train
    Trains the NER Bi-LSTM.

Help on function deploy:

deploy(model_path: 'GCSPath', model_name: str, model_region: str, model_version: str, model_runtime_version: str, model_prediction_class: str, model_python_version: str, model_package_uris: str)
    deploy
    Deploy the model with custom prediction route



## Build the Pipeline 

In [48]:
@dsl.pipeline(
  name='Named Entity Recognition Pipeline',
  description='Performs preprocessing, training and deployment.'
)
def pipeline():
    
    preprocess_task = preprocess_operation(
        input_1_uri='/bd-fs-mnt/TenantShare/repo/data/raw/ner.csv',
        output_y_uri_template="/bd-fs-mnt/TenantShare/repo/data/preprocess/y/data",
        output_x_uri_template="/bd-fs-mnt/TenantShare/repo/data/preprocess/x/data",
        output_preprocessing_state_uri_template="/bd-fs-mnt/TenantShare/repo/model/ner"
    ) 
    
    
    train_task = train_operation(
        input_x_uri=preprocess_task.outputs['output_x_uri'],
        input_y_uri=preprocess_task.outputs['output_y_uri'],
        input_job_dir_uri="/bd-fs-mnt/TenantShare/repo/misc/job",
        input_tags=preprocess_task.outputs['output_tags'],
        input_words=preprocess_task.outputs['output_words'],
        input_dropout=0.1,
        output_model_uri_template="/bd-fs-mnt/TenantShare/repo/model/ner"
    )
    
    
    # deploy_task = ai_platform_deploy_operation(
    #     model_path= train_task.output,
    #     model_name="named_entity_recognition_kubeflow",
    #     model_region="us-central1",
    #     model_version="version1",
    #     model_runtime_version="1.13",
    #     model_prediction_class="model_prediction.CustomModelPrediction",
    #     model_python_version="3.5",
    #     model_package_uris="gs://{}/routine/custom_prediction_routine-0.2.tar.gz".format(BUCKET)
    # )

## Compile the Pipeline

In [49]:
pipeline_func = pipeline
pipeline_filename = pipeline_func.__name__ + '.pipeline.zip'

import kfp.compiler as compiler
compiler.Compiler().compile(pipeline_func, pipeline_filename,type_check=False)

type name String is different from expected: GCSPath
type name String is different from expected: Integer
type name String is different from expected: Integer


## Create a Kubeflow Experiment

In [43]:
client = kfp.Client(host='https://68553fbb7778caf8-dot-us-central2.pipelines.googleusercontent.com')

try:
    experiment = client.get_experiment(experiment_name=EXPERIMENT_NAME)
except:
    experiment = client.create_experiment(EXPERIMENT_NAME)
    
print(experiment)

{'created_at': datetime.datetime(2021, 2, 21, 10, 46, 20, tzinfo=tzlocal()),
 'description': None,
 'id': '879c7655-e5ec-4a50-b9c9-fbc16f5438b9',
 'name': 'named-entity-recognition1',
 'resource_references': None,
 'storage_state': 'STORAGESTATE_AVAILABLE'}


## Run the Pipeline

In [44]:
arguments = {}

run_name = pipeline_func.__name__ + ' run'
run_result = client.run_pipeline(experiment.id, 
                                 run_name, 
                                 pipeline_filename, 
                                 arguments)

print(experiment.id)
print(run_name)
print(pipeline_filename)
print(arguments)

879c7655-e5ec-4a50-b9c9-fbc16f5438b9
pipeline run
pipeline.pipeline.zip
{}
