# 06 - Test and Deploy Pipeline to AI Platform Managed Pipelines

The purpose of this notebook is to compile and run the TFX pipeline to AI Platform Managed Pipelines. The notebook covers the following tasks:
1. Test the pipeline locally using local runner.
2. Set the pipeline deployment configuration.
3. Build Container Image
4. Compile TFX Pipeline
5. Submit a pipeline job to AI Platform Pipelines (Managed).

## Setup

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import kfp
import tfx
from tfx.orchestration.local.local_dag_runner import LocalDagRunner
import tensorflow as tf
import ml_metadata as mlmd
from ml_metadata.proto import metadata_store_pb2
import logging

logging.getLogger().setLevel(logging.INFO)

print("Tensorflow Version:", tfx.__version__)
print("KFP Version:", kfp.__version__)

In [None]:
!rm -r src/raw_schema/.ipynb_checkpoints/

In [None]:
PROJECT = 'ksalama-cloudml' # Change to your project Id.
REGION = 'us-central1'
BUCKET = 'ksalama-cloudml-us' # Change to your bucket.
API_KEY = 'AIzaSyByWYn-3xe9OixHb1ZAr1gKEgbgSZAL76Y' # Change to your API Key.

CICD_IMAGE_NAME = 'cicd:latest'
CICD_IMAGE_URI = f"gcr.io/{PROJECT}/{CICD_IMAGE_NAME}"

## Build CI/CD  Container Image for Cloud Build

This is the runtime environment where the steps of testing and deploying the model will be executed.

In [None]:
!echo $CICD_IMAGE_URI

In [None]:
!gcloud builds submit --tag $CICD_IMAGE_URI build/. --timeout=15m

## 1. Run the Pipeline CICD steps locally

### Set pipeline configurations for the local run

In [None]:
os.environ["DATASET_DISPLAY_NAME"] = 'chicago_taxi_tips'
os.environ["MODEL_DISPLAY_NAME"]  =  'chicago_taxi_tips_classifier_v1'
os.environ["PROJECT"] = 'ksalama-cloudml'
os.environ["REGION"] = 'us-central1'
os.environ["GCS_LOCATION"] = f"gs://{BUCKET}/ucaip_demo/chicago_taxi/pipelines_local_runner"
os.environ["TRAIN_LIMIT"] = "8500"
os.environ["TEST_LIMIT"] = "1500"
os.environ["BEAM_RUNNER"] = "DirectRunner"
os.environ["TRAINING_RUNNER"] = "local"

In [None]:
from src.pipelines import config
for key, value in config.__dict__.items():
    if key.isupper(): print(f'{key}: {value}')

### Congifure local metadata store

In [None]:
MLMD_SQLLITE = 'mlmd.sqllite'

gcs_location = os.environ["GCS_LOCATION"]
print(f"artifacts location: {gcs_location}")

if tf.io.gfile.exists(gcs_location):
    print("Removing previous artifacts...")
    tf.io.gfile.rmtree(gcs_location)

if tf.io.gfile.exists(MLMD_SQLLITE):
    print("Removing local mlmd SQLite...")
    tf.io.gfile.remove(MLMD_SQLLITE)

metadata_connection_config = metadata_store_pb2.ConnectionConfig()
metadata_connection_config.sqlite.filename_uri = MLMD_SQLLITE
metadata_connection_config.sqlite.connection_mode = 3
print("ML metadata store is ready.")

### Run the pipeline locally

In [None]:
from src.pipelines import training_pipeline

In [None]:
pipeline_root = os.path.join(
    config.ARTIFACT_STORE_URI,
    config.PIPELINE_NAME,
)

runner = LocalDagRunner()

pipeline = training_pipeline.create_pipeline(
    metadata_connection_config=metadata_connection_config,
    pipeline_root=pipeline_root,
    num_epochs=50,
    batch_size=512,
    learning_rate=0.0003,
    hidden_units="256,128",
)

runner.run(pipeline)

print("Pipeline finished exection.")

### Set the pipeline configurations for the AI Platform run

In [None]:
os.environ["API_KEY"] = API_KEY
os.environ["DATASET_DISPLAY_NAME"] = 'chicago_taxi_tips'
os.environ["PROJECT"] = PROJECT
os.environ["REGION"] = REGION
os.environ["GCS_LOCATION"] = f"gs://{BUCKET}/ucaip_demo/chicago_taxi/pipelines_managed_runner"
os.environ["TRAIN_LIMIT"] = "85000"
os.environ["TEST_LIMIT"] = "15000"
os.environ["BEAM_RUNNER"] = "DataflowRunner"
os.environ["TRAINING_RUNNER"] = "caip"

In [None]:
from src.pipelines import config
for key, value in config.__dict__.items():
    if key.isupper(): print(f'{key}: {value}')

### Build container image

This is the tfx runtime environment for the training pipeline steps.

In [None]:
!echo $IMAGE_URI

In [None]:
!gcloud builds submit --tag $IMAGE_URI . --timeout=15m

### Compile pipeline

In [None]:
from src.pipelines import runner

pipeline_definition_file = runner.compile_pipeline()

### Submit run to AI Platform Managed Pipelines

In [None]:
from aiplatform.pipelines import client

pipeline_client = client.Client(
    project_id=PROJECT,
    region=REGION,
    api_key=API_KEY
)

pipeline_client.create_run_from_job_spec(
    job_spec_path=pipeline_definition_file,
    parameter_values={
        'learning_rate': 0.003,
        'batch_size': 512,
        'hidden_units': '128,128',
        'num_epochs': 30,
    }
)

![Pipeline execution](imgs/managed-pipeline.png)

## 2. Execute the Model Deployment CI/CD rountine in Cloud Build