### Create the template file for creating the pipeline

In [16]:
%%writefile ./pipeline/sensor_training_pipeline.py
import os
from func_components import load_raw_data
from func_components import split_data
from jinja2 import Template
import kfp
from kfp.components import func_to_container_op
from kfp.dsl.types import Dict
from kfp.dsl.types import GCPProjectID
from kfp.dsl.types import GCPRegion
from kfp.dsl.types import GCSPath
from kfp.dsl.types import String
from kfp.gcp import use_gcp_secret

# Defaults and environment settings
BASE_IMAGE = os.getenv('BASE_IMAGE')
TRAINER_IMAGE = os.getenv('TRAINER_IMAGE')
RUNTIME_VERSION = os.getenv('RUNTIME_VERSION')
PYTHON_VERSION = os.getenv('PYTHON_VERSION')
COMPONENT_URL_SEARCH_PREFIX = os.getenv('COMPONENT_URL_SEARCH_PREFIX')
USE_KFP_SA = os.getenv('USE_KFP_SA')

# Create component factories
component_store = kfp.components.ComponentStore(
    local_search_paths=None, url_search_prefixes=[COMPONENT_URL_SEARCH_PREFIX])

# Create all the component ops
caip_train_op = component_store.load_component('ml_engine/train')
retrieve_raw_data_op = func_to_container_op(
    load_raw_data, base_image=BASE_IMAGE)
split_preprocess_data_op = func_to_container_op(
    split_data, base_image=BASE_IMAGE)


# Define the pipeline
@kfp.dsl.pipeline(
    name='Bearing Sensor Data Training',
    description='The pipeline for training and deploying an anomaly detector based on an autoencoder')

def pipeline_run(project_id,
                 region,
                 source_bucket_name, 
                 prefix,
                 dest_bucket_name,
                 dest_file_name,
                 dataset_location='US'):
    
    # Read in the raw sensor data from the public dataset and load in the project bucket
    raw_data = retrieve_raw_data_op(source_bucket_name,
                                    prefix,
                                    dest_bucket_name,
                                    dest_file_name)
    
    # Preprocess and split the raw data by time
    split_data = split_preprocess_data_op(raw_data.outputs['dest_bucket_name'],
                                          raw_data.outputs['dest_file_name'],
                                          '2004-02-15 12:52:39',
                                          True)
    
    # Set up the training args
    train_args = ["--bucket", split_data.outputs['bucket_name'],
                  "--train_file", split_data.outputs['train_dest_file'],
                  "--test_file", split_data.outputs['test_dest_file'],
                  "--epochs", 100,
                  "--batch_size", 10]
    
    # Train the model on AI Platform
    train_model = caip_train_op(project_id,
                                region=region,
                                master_image_uri=TRAINER_IMAGE,
                                args=train_args)


Overwriting ./pipeline/sensor_training_pipeline.py


### Set up the environment

In [17]:
REGION = 'us-central1'
ENDPOINT = '542a643864f1d5b3-dot-us-central2.pipelines.googleusercontent.com'
ARTIFACT_STORE_URI = 'gs://rrusson-kubeflow-test'
PROJECT_ID = !(gcloud config get-value core/project)
PROJECT_ID = PROJECT_ID[0]

### Create the base image and load it into gcr.io

In [18]:
IMAGE_NAME='rrusson_kubeflow_base'
TAG='v1'
BASE_IMAGE='gcr.io/{}/{}:{}'.format(PROJECT_ID, IMAGE_NAME, TAG)

In [None]:
# DON'T RUN THIS IF THE IMAGE EXISTS!
!gcloud builds submit --timeout 15m --tag $BASE_IMAGE base_image

### Create the training image from the base image and load it into the gcr.io (maybe just have one image?)

In [19]:
IMAGE_NAME='rrusson_kubeflow_tf2_trainer'
TAG='v1'
TRAINER_IMAGE='gcr.io/{}/{}:{}'.format(PROJECT_ID, IMAGE_NAME, TAG)

In [None]:
# DON'T RUN THIS IF THE IMAGE EXISTS!
!gcloud builds submit --timeout 15m --tag $TRAINER_IMAGE train_image

### Compile the Pipeline

In [20]:
USE_KFP_SA = False

COMPONENT_URL_SEARCH_PREFIX = 'https://raw.githubusercontent.com/kubeflow/pipelines/0.2.5/components/gcp/'
RUNTIME_VERSION = '1.15'
PYTHON_VERSION = '3.7'

%env USE_KFP_SA={USE_KFP_SA}
%env BASE_IMAGE={BASE_IMAGE}
%env TRAINER_IMAGE={TRAINER_IMAGE}
%env COMPONENT_URL_SEARCH_PREFIX={COMPONENT_URL_SEARCH_PREFIX}
%env RUNTIME_VERSION={RUNTIME_VERSION}
%env PYTHON_VERSION={PYTHON_VERSION}

env: USE_KFP_SA=False
env: BASE_IMAGE=gcr.io/mwpmltr/rrusson_kubeflow_base:v1
env: TRAINER_IMAGE=gcr.io/mwpmltr/rrusson_kubeflow_tf2_trainer:v1
env: COMPONENT_URL_SEARCH_PREFIX=https://raw.githubusercontent.com/kubeflow/pipelines/0.2.5/components/gcp/
env: RUNTIME_VERSION=1.15
env: PYTHON_VERSION=3.7


In [21]:
!dsl-compile --py pipeline/sensor_training_pipeline.py --output sensor_training_pipeline.yaml

### List the Pipeline in AI Platform Pipelines

In [22]:
PIPELINE_NAME='bearing_sensor_anomaly'

!kfp --endpoint $ENDPOINT pipeline upload \
-p $PIPELINE_NAME \
sensor_training_pipeline.yaml

Pipeline 1176a828-f9b2-4fac-af76-4dd7c67d8415 has been submitted

Pipeline Details
------------------
ID           1176a828-f9b2-4fac-af76-4dd7c67d8415
Name         bearing_sensor_anomaly
Description
Uploaded at  2020-12-03T02:10:36+00:00
+--------------------+-----------------+
| Parameter Name     | Default Value   |
| project_id         |                 |
+--------------------+-----------------+
| region             |                 |
+--------------------+-----------------+
| source_bucket_name |                 |
+--------------------+-----------------+
| prefix             |                 |
+--------------------+-----------------+
| dest_bucket_name   |                 |
+--------------------+-----------------+
| dest_file_name     |                 |
+--------------------+-----------------+
| dataset_location   | US              |
+--------------------+-----------------+


In [23]:
!kfp --endpoint $ENDPOINT pipeline list

+--------------------------------------+-------------------------------------------------+---------------------------+
| Pipeline ID                          | Name                                            | Uploaded at               |
| 1176a828-f9b2-4fac-af76-4dd7c67d8415 | bearing_sensor_anomaly                          | 2020-12-03T02:10:36+00:00 |
+--------------------------------------+-------------------------------------------------+---------------------------+
| 2b75dd48-dce3-4eee-ba58-9935aed42077 | [Tutorial] DSL - Control structures             | 2020-12-03T02:02:19+00:00 |
+--------------------------------------+-------------------------------------------------+---------------------------+
| 2c9947e6-24c0-492e-99ec-9ae4353b3378 | [Tutorial] Data passing in python components    | 2020-12-03T02:02:18+00:00 |
+--------------------------------------+-------------------------------------------------+---------------------------+
| a5f9bc01-dbf9-443e-8290-7ec50fb21019 | [Demo] 

### Submit a Run

In [24]:
PIPELINE_ID='1176a828-f9b2-4fac-af76-4dd7c67d8415'

In [9]:
EXPERIMENT_NAME = 'Load_Raw_Data'
RUN_ID = 'Run_001'
SOURCE_BUCKET_NAME = 'amazing-public-data'
PREFIX = 'bearing_sensor_data/bearing_sensor_data/'
DEST_BUCKET_NAME = 'rrusson-kubeflow-test'
DEST_FILE_NAME = 'raw_data_v3.csv'

GCS_STAGING_PATH = '{}/staging'.format(ARTIFACT_STORE_URI)

In [10]:
!kfp --endpoint $ENDPOINT run submit \
-e $EXPERIMENT_NAME \
-r $RUN_ID \
-p $PIPELINE_ID \
project_id=$PROJECT_ID \
gcs_root=$GCS_STAGING_PATH \
region=$REGION \
source_bucket_name=$SOURCE_BUCKET_NAME \
prefix=$PREFIX \
dest_bucket_name=$DEST_BUCKET_NAME \
DEST_FILE_NAME=$DEST_FILE_NAME

Creating experiment Load_Raw_Data.
(400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'Content-Length': '1451', 'Content-Type': 'text/html; charset=utf-8', 'Date': 'Wed, 02 Dec 2020 20:32:44 GMT', 'Vary': 'Origin', 'X-Content-Type-Options': 'nosniff', 'X-Frame-Options': 'SAMEORIGIN', 'X-Powered-By': 'Express', 'X-Xss-Protection': '0', 'Set-Cookie': 'S=cloud_datalab_tunnel=AqcMy91jG0arcx5ykiYV9xfrGNdeG2HH39HsK7cZiag; Path=/; Max-Age=3600'})
HTTP response body: 
<!DOCTYPE html>
<html lang=en>
  <meta charset=utf-8>
  <meta name=viewport content="initial-scale=1, minimum-scale=1, width=device-width">
  <title>Error 400 (Bad Request)!!1</title>
  <style>
    *{margin:0;padding:0}html,code{font:15px/22px arial,sans-serif}html{background:#fff;color:#222;padding:15px}body{margin:7% auto 0;max-width:390px;min-height:180px;padding:30px 0 15px}* > body{background:url(//www.google.com/images/errors/robot.png) 100% 5px no-repeat;padding-right:205px}p{margin:11px 0 22px;overflow:hidde

In [27]:
PROJECT_ID

'mwpmltr'