### Create the template file for creating the pipeline

In [16]:
%%writefile ./pipeline/sensor_training_pipeline.py
import os
from func_components import load_raw_data
from jinja2 import Template
import kfp
from kfp.components import func_to_container_op
from kfp.dsl.types import Dict
from kfp.dsl.types import GCPProjectID
from kfp.dsl.types import GCPRegion
from kfp.dsl.types import GCSPath
from kfp.dsl.types import String
from kfp.gcp import use_gcp_secret

# Defaults and environment settings
BASE_IMAGE = os.getenv('BASE_IMAGE')
TRAINER_IMAGE = os.getenv('TRAINER_IMAGE')
RUNTIME_VERSION = os.getenv('RUNTIME_VERSION')
PYTHON_VERSION = os.getenv('PYTHON_VERSION')
COMPONENT_URL_SEARCH_PREFIX = os.getenv('COMPONENT_URL_SEARCH_PREFIX')
USE_KFP_SA = os.getenv('USE_KFP_SA')

# Create component factories
component_store = kfp.components.ComponentStore(
    local_search_paths=None, url_search_prefixes=[COMPONENT_URL_SEARCH_PREFIX])

retrieve_raw_data_op = func_to_container_op(
    load_raw_data, base_image=BASE_IMAGE)


# Define the pipeline
@kfp.dsl.pipeline(
    name='Bearing Sensor Data Training',
    description='The pipeline for training and deploying an anomaly detector based on an autoencoder')

def pipeline_run(project_id,
                 source_bucket_name, 
                 prefix,
                 dest_bucket_name,
                 dest_file_name,
                 dataset_location='US'):
    
    # Read in the raw sensor data from the public dataset and load in the project bucket
    raw_data = retrieve_raw_data_op(project_id,
                                    source_bucket_name,
                                    prefix,
                                    dest_bucket_name,
                                    dest_file_name)

Writing ./pipeline/sensor_training_pipeline.py


### Set up the environment

In [25]:
REGION = 'us-central1'
ENDPOINT = '5b730578aa1d5a16-dot-us-central2.pipelines.googleusercontent.com'
ARTIFACT_STORE_URI = 'gs://rrusson-kubeflow-test'
PROJECT_ID = !(gcloud config get-value core/project)
PROJECT_ID = PROJECT_ID[0]

### Create the base image and load it into gcr.io

In [7]:
IMAGE_NAME='rrusson_kubeflow_base'
TAG='v1'
BASE_IMAGE='gcr.io/{}/{}:{}'.format(PROJECT_ID, IMAGE_NAME, TAG)

In [8]:
!gcloud builds submit --timeout 15m --tag $BASE_IMAGE base_image

Creating temporary tarball archive of 2 file(s) totalling 121 bytes before compression.
Uploading tarball of [base_image] to [gs://mwpmltr_cloudbuild/source/1606867971.745546-dae1685d2b174e8da5904b9ec0020d3c.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/mwpmltr/builds/83131216-08e1-41e5-9cb5-79e2d9e81404].
Logs are available at [https://console.cloud.google.com/cloud-build/builds/83131216-08e1-41e5-9cb5-79e2d9e81404?project=55590906972].
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "83131216-08e1-41e5-9cb5-79e2d9e81404"

FETCHSOURCE
Fetching storage object: gs://mwpmltr_cloudbuild/source/1606867971.745546-dae1685d2b174e8da5904b9ec0020d3c.tgz#1606867972431600
Copying gs://mwpmltr_cloudbuild/source/1606867971.745546-dae1685d2b174e8da5904b9ec0020d3c.tgz#1606867972431600...
/ [1 files][  291.0 B/  291.0 B]                                                
Operation completed over 1 objects/291.0 B.                              

### Compile the Pipeline

In [14]:
USE_KFP_SA = False

COMPONENT_URL_SEARCH_PREFIX = 'https://raw.githubusercontent.com/kubeflow/pipelines/0.2.5/components/gcp/'
RUNTIME_VERSION = '1.15'
PYTHON_VERSION = '3.7'

%env USE_KFP_SA={USE_KFP_SA}
%env BASE_IMAGE={BASE_IMAGE}
%env TRAINER_IMAGE={TRAINER_IMAGE}
%env COMPONENT_URL_SEARCH_PREFIX={COMPONENT_URL_SEARCH_PREFIX}
%env RUNTIME_VERSION={RUNTIME_VERSION}
%env PYTHON_VERSION={PYTHON_VERSION}

env: USE_KFP_SA=False
env: BASE_IMAGE=gcr.io/mwpmltr/rrusson_kubeflow_base:v1
env: TRAINER_IMAGE={TRAINER_IMAGE}
env: COMPONENT_URL_SEARCH_PREFIX=https://raw.githubusercontent.com/kubeflow/pipelines/0.2.5/components/gcp/
env: RUNTIME_VERSION=1.15
env: PYTHON_VERSION=3.7


In [17]:
!dsl-compile --py pipeline/sensor_training_pipeline.py --output sensor_training_pipeline.yaml

### List the Pipeline in AI Platform Pipelines

In [19]:
PIPELINE_NAME='bearing_sensor_anomaly'

!kfp --endpoint $ENDPOINT pipeline upload \
-p $PIPELINE_NAME \
sensor_training_pipeline.yaml

Pipeline 11fe0739-404d-4480-ba7a-2c2279e82cd2 has been submitted

Pipeline Details
------------------
ID           11fe0739-404d-4480-ba7a-2c2279e82cd2
Name         bearing_sensor_anomaly
Description
Uploaded at  2020-12-02T03:46:32+00:00
+--------------------+-----------------+
| Parameter Name     | Default Value   |
| project_id         |                 |
+--------------------+-----------------+
| source_bucket_name |                 |
+--------------------+-----------------+
| prefix             |                 |
+--------------------+-----------------+
| dest_bucket_name   |                 |
+--------------------+-----------------+
| dest_file_name     |                 |
+--------------------+-----------------+
| dataset_location   | US              |
+--------------------+-----------------+


In [20]:
!kfp --endpoint $ENDPOINT pipeline list

+--------------------------------------+-------------------------------------------------+---------------------------+
| Pipeline ID                          | Name                                            | Uploaded at               |
| 11fe0739-404d-4480-ba7a-2c2279e82cd2 | bearing_sensor_anomaly                          | 2020-12-02T03:46:32+00:00 |
+--------------------------------------+-------------------------------------------------+---------------------------+
| 931aacca-52b7-4e04-af2a-c67bb90a63a5 | [Tutorial] DSL - Control structures             | 2020-12-01T15:21:34+00:00 |
+--------------------------------------+-------------------------------------------------+---------------------------+
| 42d56ad8-c69e-4c5c-8fc8-0a9c6d53f7c4 | [Tutorial] Data passing in python components    | 2020-12-01T15:21:33+00:00 |
+--------------------------------------+-------------------------------------------------+---------------------------+
| d6b4b57a-ed39-4035-9af7-db952f00af36 | [Demo] 

### Submit a Run

In [21]:
PIPELINE_ID='11fe0739-404d-4480-ba7a-2c2279e82cd2'

In [22]:
EXPERIMENT_NAME = 'Load_Raw_Data'
RUN_ID = 'Run_001'
SOURCE_BUCKET_NAME = 'amazing-public-data'
PREFIX = 'bearing_sensor_data/bearing_sensor_data/'
DEST_BUCKET_NAME = 'rrusson-kubeflow-test'
DEST_FILE_NAME = 'raw_data_v2.csv'

GCS_STAGING_PATH = '{}/staging'.format(ARTIFACT_STORE_URI)

In [26]:
!kfp --endpoint $ENDPOINT run submit \
-e $EXPERIMENT_NAME \
-r $RUN_ID \
-p $PIPELINE_ID \
project_id=$PROJECT_ID \
gcs_root=$GCS_STAGING_PATH \
region=$REGION \
source_bucket_name=$SOURCE_BUCKET_NAME \
prefix=$PREFIX \
dest_bucket_name=$DEST_BUCKET_NAME \
DEST_FILE_NAME=$DEST_FILE_NAME

(400)
Reason: Bad Request
HTTP response headers: HTTPHeaderDict({'Content-Length': '1451', 'Content-Type': 'text/html; charset=utf-8', 'Date': 'Wed, 02 Dec 2020 03:55:53 GMT', 'Vary': 'Origin', 'X-Content-Type-Options': 'nosniff', 'X-Frame-Options': 'SAMEORIGIN', 'X-Powered-By': 'Express', 'X-Xss-Protection': '0', 'Set-Cookie': 'S=cloud_datalab_tunnel=ErymSmdAP_WyTOw9z8OP7jMbVNww7RYGRBgpWJsRkHE; Path=/; Max-Age=3600'})
HTTP response body: 
<!DOCTYPE html>
<html lang=en>
  <meta charset=utf-8>
  <meta name=viewport content="initial-scale=1, minimum-scale=1, width=device-width">
  <title>Error 400 (Bad Request)!!1</title>
  <style>
    *{margin:0;padding:0}html,code{font:15px/22px arial,sans-serif}html{background:#fff;color:#222;padding:15px}body{margin:7% auto 0;max-width:390px;min-height:180px;padding:30px 0 15px}* > body{background:url(//www.google.com/images/errors/robot.png) 100% 5px no-repeat;padding-right:205px}p{margin:11px 0 22px;overflow:hidden}ins{color:#777;text-decoration:no

In [27]:
PROJECT_ID

'mwpmltr'