# FfDL Kubeflow Pipeline Notebook demo


### Define the necessary environment variables and install the KubeFlow Pipeline SDK
We assume this notebook kernel has access to Python's site-packages and is in Python3.

**Please fill in the below environment variables with you own settings.**

- **EXPERIMENT_NAME**: A unique experiment name that will be created for this notebook demo.
- **KFP_PACKAGE**: The latest release of kubeflow pipeline platform library.
- **KUBEFLOW_PIPELINE_LINK**: The link to access the KubeFlow pipeline API.

In [None]:
EXPERIMENT_NAME = 'jupyter-demo'
KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.6/kfp.tar.gz'
KUBEFLOW_PIPELINE_LINK = ''

#### Then, define the GitHub credentials manaully or load it from the credentials folder
- **config_file_url**: GitHub raw content link to the pipeline credentials file
- **github_token**: GitHub Token that can access your private repository

In [None]:
import configparser
config = configparser.ConfigParser()
config.read('credentials/github-creds.ini')
config_file_url = config['CREDENTIALS']['config_file_url']
github_token = config['CREDENTIALS']['github_token']

### Install the necessary python packages

Note: Please change pip3 to the package manager that's used for this Notebook Kernel.

In [None]:
!pip3 install ai_pipeline_params --upgrade
!pip3 install $KFP_PACKAGE --upgrade

### Import the KubeFlow Pipeline library and define the client and experiment 

In [None]:
import kfp
from kfp import compiler
import kfp.dsl as dsl
import kfp.notebook
import kfp.gcp as gcp

client = kfp.Client(KUBEFLOW_PIPELINE_LINK)

# Uncomment the below line if you want to create an experiment, 
# else we will assume the EXPERIMENT_NAME is already exist.

# exp = client.create_experiment(name=EXPERIMENT_NAME)

exp = client.get_experiment(experiment_name=EXPERIMENT_NAME)

### 2. Define pipeline tasks using the kfp library. 

In [None]:
import kfp.dsl as dsl
import ai_pipeline_params as params

# generate default secret name
secret_name = 'kfp-creds'


# create pipeline
@dsl.pipeline(
  name='FfDL pipeline',
  description='A pipeline for machine learning workflow using Fabric for Deep Learning and Seldon.'
)
def ffdlPipeline(
    GITHUB_TOKEN=dsl.PipelineParam(name='github-token'),
    CONFIG_FILE_URL=dsl.PipelineParam(name='config-file-url'),
    model_def_file_path=dsl.PipelineParam(name='model-def-file-path',
                                          value='gender-classification.zip'),
    manifest_file_path=dsl.PipelineParam(name='manifest-file-path',
                                         value='manifest.yml'),
    model_deployment_name=dsl.PipelineParam(name='model-deployment-name',
                                            value='gender-classifier'),
    model_class_name=dsl.PipelineParam(name='model-class-name',
                                       value='ThreeLayerCNN'),
    model_class_file=dsl.PipelineParam(name='model-class-file',
                                       value='gender_classification.py')
):
    """A pipeline for end to end machine learning workflow."""
    config_op = dsl.ContainerOp(
        name="config",
        image="aipipeline/wml-config",
        command=['python3'],
        arguments=['/app/config.py',
                   '--token', GITHUB_TOKEN,
                   '--url', CONFIG_FILE_URL,
                   '--name', secret_name],
        file_outputs={'secret-name': '/tmp/' + secret_name}
    )

    train = dsl.ContainerOp(
     name='train',
     image='aipipeline/ffdl-train:0.6',
     command=['sh', '-c'],
     arguments=['echo %s > /tmp/logs.txt; python -u train.py --model_def_file_path %s --manifest_file_path %s;'
                % (config_op.output, model_def_file_path, manifest_file_path)],
     file_outputs={'output': '/tmp/training_id.txt'}).apply(params.use_ai_pipeline_params(secret_name))

    serve = dsl.ContainerOp(
     name='serve',
     image='aipipeline/ffdl-serve:0.11',
     command=['sh', '-c'],
     arguments=['python -u serve.py --model_id %s --deployment_name %s --model_class_name %s --model_class_file %s;'
                % (train.output, model_deployment_name, model_class_name, model_class_file)],
     file_outputs={'output': '/tmp/deployment_result.txt'}).apply(params.use_ai_pipeline_params(secret_name))

In [None]:
# Below are the default parameters for the above pipeline, 
# you can customize these parameters for each pipeline run.

parameters={'config-file-url': config_file_url,
            'github-token': github_token,
            'model-def-file-path': 'gender-classification.zip',
            'manifest-file-path': 'manifest.yml',
            'model-deployment-name': 'gender-classifier',
            'model-class-name': 'ThreeLayerCNN',
            'model-class-file': 'gender_classification.py'}


compiler.Compiler().compile(ffdlPipeline,  'ffdl-pipeline.tar.gz')

run = client.run_pipeline(exp.id, 'ffdl-pipeline', 'ffdl-pipeline.tar.gz', 
                          params=parameters)

print('The above run link is assuming you ran this cell on JupyterHub that is deployed on the same cluster. ' +
      'The actual run link is ' + KUBEFLOW_PIPELINE_LINK + '/#/runs/details/' + run.id)