In [1]:
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Guide to Building End-to-End Reinforcement Learning Application Pipelines using Vertex AI

<table align="left">

  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/tree/master/community-content/tf_agents_bandits_movie_recommendation_with_kfp_and_vertex_sdk/mlops_pipeline_tf_agents_bandits_movie_recommendation/mlops_pipeline_tf_agents_bandits_movie_recommendation.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
    </a>
  </td>
  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/tree/master/community-content/tf_agents_bandits_movie_recommendation_with_kfp_and_vertex_sdk/mlops_pipeline_tf_agents_bandits_movie_recommendation/mlops_pipeline_tf_agents_bandits_movie_recommendation.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
</table>

## Overview

This demo showcases the use of [TF-Agents](https://www.tensorflow.org/agents), [Kubeflow Pipelines (KFP)](https://www.kubeflow.org/docs/components/pipelines/overview/pipelines-overview/) and [Vertex AI](https://cloud.google.com/vertex-ai), particularly [Vertex Pipelines](https://cloud.google.com/vertex-ai/docs/pipelines), in building an end-to-end reinforcement learning (RL) pipeline of a movie recommendation system. The demo is intended for developers who want to create RL applications using TensorFlow, TF-Agents and Vertex AI services, and those who want to build end-to-end production pipelines using KFP and Vertex Pipelines. It is recommended for developers to have familiarity with RL and the contextual bandits formulation, and the TF-Agents interface.

### Dataset

This demo uses the [MovieLens 100K](https://www.kaggle.com/prajitdatta/movielens-100k-dataset) dataset to simulate an environment with users and their respective preferences. It is available at `gs://cloud-samples-data/vertex-ai/community-content/tf_agents_bandits_movie_recommendation_with_kfp_and_vertex_sdk/u.data`.


### Objective

In this notebook, you will learn how to build an end-to-end RL pipeline for a TF-Agents (particularly the bandits module) based movie recommendation system, using [KFP](https://www.kubeflow.org/docs/components/pipelines/overview/pipelines-overview/), [Vertex AI](https://cloud.google.com/vertex-ai) and particularly [Vertex Pipelines](https://cloud.google.com/vertex-ai/docs/pipelines) which is fully managed and highly scalable.

This Vertex Pipeline includes the following components:
1. *Generator* to generate MovieLens simulation data
2. *Ingester* to ingest data
3. *Trainer* to train the RL policy
4. *Deployer* to deploy the trained policy to a Vertex AI endpoint

After pipeline construction, you (1) create the *Simulator* (which utilizes Cloud Functions, Cloud Scheduler and Pub/Sub) to send simulated MovieLens prediction requests, (2) create the *Logger* to asynchronously log prediction inputs and results (which utilizes Cloud Functions, Pub/Sub and a hook in the prediction code), and (3) create the *Trigger* to trigger recurrent re-training.

A more general ML pipeline is demonstrated in [MLOps on Vertex AI](https://github.com/ksalama/ucaip-labs).

## Load notebook config

* use the prefix defined in `00-env-setup`

In [2]:
VERSION        = "v2"                       # TODO
PREFIX         = f'rec-bandits-{VERSION}'   # TODO

print(f"PREFIX: {PREFIX}")

PREFIX: rec-bandits-v2


In [3]:
# staging GCS
GCP_PROJECTS             = !gcloud config get-value project
PROJECT_ID               = GCP_PROJECTS[0]

# GCS bucket and paths
BUCKET_NAME              = f'{PREFIX}-{PROJECT_ID}-bucket'
BUCKET_URI               = f'gs://{BUCKET_NAME}'

config = !gsutil cat {BUCKET_URI}/config/notebook_env.py
print(config.n)
exec(config.n)


PROJECT_ID               = "hybrid-vertex"
PROJECT_NUM              = "934903580331"
LOCATION                 = "us-central1"

REGION                   = "us-central1"
BQ_LOCATION              = "US"
VPC_NETWORK_NAME         = "ucaip-haystack-vpc-network"

VERTEX_SA                = "934903580331-compute@developer.gserviceaccount.com"

PREFIX                   = "rec-bandits-v2"
VERSION                  = "v2"

BUCKET_NAME              = "rec-bandits-v2-hybrid-vertex-bucket"
BUCKET_URI               = "gs://rec-bandits-v2-hybrid-vertex-bucket"
DATA_GCS_PREFIX          = "data"
DATA_PATH                = "gs://rec-bandits-v2-hybrid-vertex-bucket/data"
VOCAB_SUBDIR             = "vocabs"
VOCAB_FILENAME           = "vocab_dict.pkl"
DATA_PATH_KFP_DEMO       = "gs://rec-bandits-v2-hybrid-vertex-bucket/data/kfp_demo_data/u.data"

VPC_NETWORK_FULL         = "projects/934903580331/global/networks/ucaip-haystack-vpc-network"

BIGQUERY_DATASET_NAME    = "rec_bandits_v2_bucket"
BIGQUERY_TABLE_NA

In [4]:
! python3 -c "import google_cloud_pipeline_components; print('google_cloud_pipeline_components version: {}'.format(google_cloud_pipeline_components.__version__))"

google_cloud_pipeline_components version: 2.4.1


### Import libraries and define constants

In [6]:
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [7]:
import sys
import time 

from google.cloud import aiplatform as vertex_ai
from google.cloud import bigquery

### pipelines
import kfp
from kfp import compiler, dsl
from kfp.dsl import Artifact, Dataset, Input, Metrics, Model, Output, component

from google_cloud_pipeline_components.v1.model import ModelUploadOp
from google_cloud_pipeline_components.v1.endpoint import ModelDeployOp, EndpointCreateOp
from google_cloud_pipeline_components.types import artifact_types
# from kfp.components import importer_node

# logging
import logging
logging.disable(logging.WARNING)

#python warning 
import warnings
warnings.filterwarnings("ignore")

print(f'kfp version: {kfp.__version__}')
print(f'bigquery SDK version: {bigquery.__version__}')
print(f'vertex_ai SDK version: {vertex_ai.__version__}')

kfp version: 2.3.0
bigquery SDK version: 3.11.4
vertex_ai SDK version: 1.33.1


## Set Vertex AI Experiment

In [8]:
EXPERIMENT_NAME   = f'mab-pipe-{PREFIX}'

# new experiment
invoke_time       = time.strftime("%Y%m%d-%H%M%S")
RUN_NAME          = f'run-{invoke_time}'

BASE_OUTPUT_DIR   = f'{BUCKET_URI}/{EXPERIMENT_NAME}/{RUN_NAME}'
LOG_DIR           = f"{BASE_OUTPUT_DIR}/logs"
ROOT_DIR          = f"{BASE_OUTPUT_DIR}/root"       # Root directory for writing logs/summaries/checkpoints.
ARTIFACTS_DIR     = f"{BASE_OUTPUT_DIR}/artifacts"  # Where the trained model will be saved and restored.

vertex_ai.init(
    project=PROJECT_ID,
    location=REGION,
    experiment=EXPERIMENT_NAME
)

bigquery_client = bigquery.Client(project=PROJECT_ID, location=BQ_LOCATION)

print(f"EXPERIMENT_NAME    : {EXPERIMENT_NAME}")
print(f"RUN_NAME           : {RUN_NAME}\n")
print(f"BASE_OUTPUT_DIR    : {BASE_OUTPUT_DIR}")
print(f"LOG_DIR            : {LOG_DIR}")
print(f"ROOT_DIR           : {ROOT_DIR}")
print(f"ARTIFACTS_DIR      : {ARTIFACTS_DIR}")
print(f"DATA_PATH_KFP_DEMO : {DATA_PATH_KFP_DEMO}")

EXPERIMENT_NAME    : mab-pipe-rec-bandits-v2
RUN_NAME           : run-20231019-184119

BASE_OUTPUT_DIR    : gs://rec-bandits-v2-hybrid-vertex-bucket/mab-pipe-rec-bandits-v2/run-20231019-184119
LOG_DIR            : gs://rec-bandits-v2-hybrid-vertex-bucket/mab-pipe-rec-bandits-v2/run-20231019-184119/logs
ROOT_DIR           : gs://rec-bandits-v2-hybrid-vertex-bucket/mab-pipe-rec-bandits-v2/run-20231019-184119/root
ARTIFACTS_DIR      : gs://rec-bandits-v2-hybrid-vertex-bucket/mab-pipe-rec-bandits-v2/run-20231019-184119/artifacts
DATA_PATH_KFP_DEMO : gs://rec-bandits-v2-hybrid-vertex-bucket/data/kfp_demo_data/u.data


# Create the RL pipeline components

This section consists of the following steps:
1. Create the *Generator* to generate MovieLens simulation data
2. Create the *Ingester* to ingest data
3. Create the *Trainer* to train the RL policy
4. Create the *Deployer* to deploy the trained policy to a Vertex AI endpoint

After pipeline construction, create the *Simulator* to send simulated MovieLens prediction requests, create the *Logger* to asynchronously log prediction inputs and results, and create the *Trigger* to trigger re-training.

Here's the entire workflow:
1. The startup pipeline has the following components: Generator --> Ingester --> Trainer --> Deployer. This pipeline only runs once.
2. Then, the Simulator generates prediction requests (e.g. every 5 mins),  and the Logger gets invoked immediately at each prediction request and logs each prediction request asynchronously into BigQuery. The Trigger runs the re-training pipeline (e.g. every 30 mins) with the following components: Ingester --> Trainer --> Deploy.

You can find the KFP SDK documentation [here](https://www.kubeflow.org/docs/components/pipelines/sdk/sdk-overview/).

In [9]:
data_config = f'''
PROJECT_ID          = \"{PROJECT_ID}\"
PREFIX              = \"{PREFIX}\"
BUCKET_NAME         = \"{BUCKET_NAME}\"
EXPERIMENT_NAME     = \"{EXPERIMENT_NAME}\"
RUN_NAME            = \"{RUN_NAME}\"
DATA_PATH           = \"{DATA_PATH}\"
BASE_OUTPUT_DIR     = \"{BASE_OUTPUT_DIR}\"
LOG_DIR             = \"{LOG_DIR}\"
ROOT_DIR            = \"{ROOT_DIR}\"
ARTIFACTS_DIR       = \"{ARTIFACTS_DIR}\"
DATA_PATH_KFP_DEMO  = \"{DATA_PATH_KFP_DEMO}\"
BQ_LOCATION         = \"{BQ_LOCATION}\"
BIGQUERY_DATASET_NAME = \"{BIGQUERY_DATASET_NAME}\"
BIGQUERY_TABLE_NAME   = \"{BIGQUERY_TABLE_NAME}\"
'''
# print(data_config)

with open(f'src/utils/data_config.py', 'w') as f:
    f.write(data_config)

leave these variables as they are...

In [12]:
# Dataset parameters
BIGQUERY_MAX_ROWS = 5                                      # Maximum number of rows of data in BigQuery to ingest.
BIGQUERY_TMP_FILE = ('tmp.json')

## (1) Generator

> Create the *Generator* to generate MovieLens simulation data

Create the Generator component to generate the initial set of training data using a MovieLens simulation environment and a random data-collecting policy. Store the generated data in BigQuery.

The Generator source code is [`src/generator/generator_component.py`](src/generator/generator_component.py).

### Run unit tests on the Generator component

Tests for [`src/generator/test_generator_component.py`](src/generator/test_generator_component.py)
* To avoid printing long test logs in-notebook, run the below commands in a notebook terminal

In [10]:
# ! python3 -m unittest src.generator.test_generator_component

In [None]:
from src.generator.generator_component import generate_movielens_dataset_for_bigquery

# generate_movielens_dataset_for_bigquery(
#     project_id=PROJECT_ID,
#     raw_data_path=DATA_PATH_KFP_DEMO,
#     batch_size=8,
#     rank_k=20,
#     num_actions=20,
#     driver_steps=3,
#     bigquery_tmp_file=BIGQUERY_TMP_FILE,
#     bigquery_dataset_name=BIGQUERY_DATASET_NAME,
#     bigquery_location=BQ_LOCATION,
#     bigquery_table_name=BIGQUERY_TABLE_NAME,
# )

# ==============================================================================

# # generate_movielens_dataset_for_bigquery().bigquery_dataset_name
# ==============================================================================
# """The Generator component for generating MovieLens simulation data."""
# ==============================================================================

In [19]:
from typing import NamedTuple
@component(
    base_image="tensorflow/tensorflow:2.13.0",
    output_component_file='./src/trainer2/generate_op.yaml',
    packages_to_install=[
      "google-cloud-bigquery",
      "tensorflow==2.13.0",
      "tf-agents==0.17.0",
    ],
)
def generate_movielens_dataset_for_bigquery(
    project_id: str,
    raw_data_path: str,
    batch_size: int,
    rank_k: int,
    num_actions: int,
    driver_steps: int,
    bigquery_tmp_file: str,
    bigquery_dataset_name: str,
    bigquery_location: str,
    bigquery_table_name: str
) -> NamedTuple("Outputs", [
    ("bigquery_dataset_name", str),
    ("bigquery_location", str),
    ("bigquery_table_name", str),
]):
  """Generates BigQuery training data using a MovieLens simulation environment.

  Serves as the Generator pipeline component:
  1. Generates `trajectories.Trajectory` data by applying a random policy on
    MovieLens simulation environment.
  2. Converts `trajectories.Trajectory` data to JSON format.
  3. Loads JSON-formatted data into BigQuery.

  This function is to be built into a Kubeflow Pipelines (KFP) component. As a
  result, this function must be entirely self-contained. This means that the
  import statements and helper functions must reside within itself.

  Args:
    project_id: GCP project ID. This is required because otherwise the BigQuery
      client will use the ID of the tenant GCP project created as a result of
      KFP, which doesn't have proper access to BigQuery.
    raw_data_path: Path to MovieLens 100K's "u.data" file.
    batch_size: Batch size of environment generated quantities eg. rewards.
    rank_k: Rank for matrix factorization in the MovieLens environment; also
      the observation dimension.
    num_actions: Number of actions (movie items) to choose from.
    driver_steps: Number of steps to run per batch.
    bigquery_tmp_file: Path to a JSON file containing the training dataset.
    bigquery_dataset_name: A string of the BigQuery dataset ID in the format of
      "project.dataset".
    bigquery_location: A string of the BigQuery dataset location.
    bigquery_table_name: A string of the BigQuery table ID in the format of
      "table_name".

  Returns:
    A NamedTuple of (`bigquery_dataset_name`, `bigquery_location`,
    `bigquery_table_name`).
  """
  # pylint: disable=g-import-not-at-top
  import collections
  import json
  from typing import Any, Dict

  from google.cloud import bigquery

  from tf_agents import replay_buffers
  from tf_agents import trajectories
  from tf_agents.bandits.agents.examples.v2 import trainer
  from tf_agents.bandits.environments import movielens_py_environment
  from tf_agents.drivers import dynamic_step_driver
  from tf_agents.environments import tf_py_environment
  from tf_agents.policies import random_tf_policy

  def generate_simulation_data(
      raw_data_path: str,
      batch_size: int,
      rank_k: int,
      num_actions: int,
      driver_steps: int) -> replay_buffers.TFUniformReplayBuffer:
    """Generates `trajectories.Trajectory` data from the simulation environment.

    Constructs a MovieLens simulation environment, and generates a set of
    `trajectories.Trajectory` data using a random policy.

    Args:
      raw_data_path: Path to MovieLens 100K's "u.data" file.
      batch_size: Batch size of environment generated quantities eg. rewards.
      rank_k: Rank for matrix factorization in the MovieLens environment; also
        the observation dimension.
      num_actions: Number of actions (movie items) to choose from.
      driver_steps: Number of steps to run per batch.

    Returns:
      A replay buffer holding randomly generated`trajectories.Trajectory` data.
    """
    # Create MovieLens simulation environment.
    env = movielens_py_environment.MovieLensPyEnvironment(
        raw_data_path,
        rank_k,
        batch_size,
        num_movies=num_actions,
        csv_delimiter="\t")
    environment = tf_py_environment.TFPyEnvironment(env)

    # Define random policy for collecting data.
    random_policy = random_tf_policy.RandomTFPolicy(
        action_spec=environment.action_spec(),
        time_step_spec=environment.time_step_spec())

    # Use replay buffer and observers to keep track of Trajectory data.
    data_spec = random_policy.trajectory_spec
    replay_buffer = trainer._get_replay_buffer(
        data_spec
        , environment.batch_size
        , driver_steps
        , 1
    )
    observers = [replay_buffer.add_batch]

    # Run driver to apply the random policy in the simulation environment.
    driver = dynamic_step_driver.DynamicStepDriver(
        env=environment,
        policy=random_policy,
        num_steps=driver_steps * environment.batch_size,
        observers=observers)
    driver.run()

    return replay_buffer

  def build_dict_from_trajectory(
      trajectory: trajectories.Trajectory) -> Dict[str, Any]:
    """Builds a dict from `trajectory` data.

    Args:
      trajectory: A `trajectories.Trajectory` object.

    Returns:
      A dict holding the same data as `trajectory`.
    """
    trajectory_dict = {
        "step_type": trajectory.step_type.numpy().tolist(),
        "observation": [{
            "observation_batch": batch
        } for batch in trajectory.observation.numpy().tolist()],
        "action": trajectory.action.numpy().tolist(),
        "policy_info": trajectory.policy_info,
        "next_step_type": trajectory.next_step_type.numpy().tolist(),
        "reward": trajectory.reward.numpy().tolist(),
        "discount": trajectory.discount.numpy().tolist(),
    }
    return trajectory_dict

  def write_replay_buffer_to_file(
      replay_buffer: replay_buffers.TFUniformReplayBuffer,
      batch_size: int,
      dataset_file: str) -> None:
    """Writes replay buffer data to a file, each JSON in one line.

    Each `trajectories.Trajectory` object in `replay_buffer` will be written as
    one line to the `dataset_file` in JSON format. I.e., the `dataset_file`
    would be a newline-delimited JSON file.

    Args:
      replay_buffer: A `replay_buffers.TFUniformReplayBuffer` holding
        `trajectories.Trajectory` objects.
      batch_size: Batch size of environment generated quantities eg. rewards.
      dataset_file: File path. Will be overwritten if already exists.
    """
    dataset = replay_buffer.as_dataset(sample_batch_size=batch_size)
    dataset_size = replay_buffer.num_frames().numpy()

    with open(dataset_file, "w") as f:
      for example in dataset.take(count=dataset_size):
        traj_dict = build_dict_from_trajectory(example[0])
        f.write(json.dumps(traj_dict) + "\n")

  def load_dataset_into_bigquery(
      project_id: str,
      dataset_file: str,
      bigquery_dataset_name: str,
      bigquery_location: str,
      bigquery_table_name: str) -> None:
    """Loads training dataset into BigQuery table.

    Loads training dataset of `trajectories.Trajectory` in newline delimited
    JSON into a BigQuery dataset and table, using a BigQuery client.

    Args:
      project_id: GCP project ID. This is required because otherwise the
        BigQuery client will use the ID of the tenant GCP project created as a
        result of KFP, which doesn't have proper access to BigQuery.
      dataset_file: Path to a JSON file containing the training dataset.
      bigquery_dataset_name: A string of the BigQuery dataset ID in the format of
        "dataset_name".
      bigquery_location: A string of the BigQuery dataset location.
      bigquery_table_name: A string of the BigQuery table ID in the format of
        "project.dataset.table".
    """
    
    _bq_dataset_ref = f"{project_id}.{bigquery_dataset_name}"
    
    # Construct a BigQuery client object.
    client = bigquery.Client(project=project_id)

    # Construct a full Dataset object to send to the API.
    dataset = bigquery.Dataset(_bq_dataset_ref)

    # Specify the geographic location where the dataset should reside.
    dataset.location = bigquery_location

    # Create the dataset, or get the dataset if it exists.
    dataset = client.create_dataset(dataset = dataset, exists_ok=True, timeout=30)

    job_config = bigquery.LoadJobConfig(
        schema=[
            bigquery.SchemaField("step_type", "INT64", mode="REPEATED"),
            bigquery.SchemaField(
                "observation",
                "RECORD",
                mode="REPEATED",
                fields=[
                    bigquery.SchemaField("observation_batch", "FLOAT64",
                                         "REPEATED")
                ]),
            bigquery.SchemaField("action", "INT64", mode="REPEATED"),
            bigquery.SchemaField("policy_info", "FLOAT64", mode="REPEATED"),
            bigquery.SchemaField("next_step_type", "INT64", mode="REPEATED"),
            bigquery.SchemaField("reward", "FLOAT64", mode="REPEATED"),
            bigquery.SchemaField("discount", "FLOAT64", mode="REPEATED"),
        ],
        source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
    )
    
    _bq_table_ref = f"{project_id}.{bigquery_dataset_name}.{bigquery_table_name}"

    with open(dataset_file, "rb") as source_file:
      load_job = client.load_table_from_file(
          source_file, _bq_table_ref, job_config=job_config)

    load_job.result()  # Wait for the job to complete.

  replay_buffer = generate_simulation_data(
      raw_data_path=raw_data_path,
      batch_size=batch_size,
      rank_k=rank_k,
      num_actions=num_actions,
      driver_steps=driver_steps
  )

  write_replay_buffer_to_file(
      replay_buffer=replay_buffer,
      batch_size=batch_size,
      dataset_file=bigquery_tmp_file
  )

  load_dataset_into_bigquery(project_id, bigquery_tmp_file, bigquery_dataset_name,
                             bigquery_location, bigquery_table_name)

  outputs = collections.namedtuple(
      "Outputs",
      ["bigquery_dataset_name", "bigquery_location", "bigquery_table_name"])

  return outputs(bigquery_dataset_name, bigquery_location, bigquery_table_name)

# compiler.Compiler().compile(pipeline_func=generator_comp, package_path="./src/trainer2/generate_op.yaml")

## (2) Ingestor

> Create the *Ingester* to ingest data

Create the Ingester component to ingest data from BigQuery, package them as `tf.train.Example` objects, and output TFRecord files.

Read more about `tf.train.Example` and TFRecord [here](https://www.tensorflow.org/tutorials/load_data/tfrecord).

The Ingester component source code is in [`src/ingester/ingester_component.py`](src/ingester/ingester_component.py).

### Run unit tests on the Ingester component

In [20]:
# ! python3 -m unittest src.ingester.test_ingester_component

In [21]:
# ingestor_comp = src.ingester.ingest_bigquery_dataset_into_tfrecord(
#     project_id=PROJECT_ID,
#     bigquery_table_name=BIGQUERY_TABLE_NAME,
#     bigquery_dataset_name=BIGQUERY_DATASET_NAME,
#     bigquery_max_rows=10000,
#     tfrecord_file=TFRECORD_FILE,
# )

In [44]:
from typing import NamedTuple
@component(
    base_image="tensorflow/tensorflow:2.13.0",
    output_component_file='./src/trainer2/ingest_op.yaml',
    packages_to_install=[
      "google-cloud-bigquery",
      "tensorflow==2.13.0",
    ],
)
def ingest_bigquery_dataset_into_tfrecord(
    project_id: str,
    bigquery_table_name: str,
    bigquery_dataset_name: str,
    tfrecord_file: str,
    bigquery_max_rows: int = None
) -> NamedTuple("Outputs", [
    ("tfrecord_file", str),
]):
  """Ingests data from BigQuery, formats them and outputs TFRecord files.

  Serves as the Ingester pipeline component:
  1. Reads data in BigQuery that contains 7 pieces of data: `step_type`,
    `observation`, `action`, `policy_info`, `next_step_type`, `reward`,
    `discount`.
  2. Packages the data as `tf.train.Example` objects and outputs them as
    TFRecord files.

  This function is to be built into a Kubeflow Pipelines (KFP) component. As a
  result, this function must be entirely self-contained. This means that the
  import statements and helper functions must reside within itself.

  Args:
    project_id: GCP project ID. This is required because otherwise the BigQuery
      client will use the ID of the tenant GCP project created as a result of
      KFP, which doesn't have proper access to BigQuery.
    bigquery_table_name: A string of the BigQuery table ID in the format of
      "bigquery_table_name".
    tfrecord_file: Path to file to write the ingestion result TFRecords.
    bigquery_max_rows: Optional; maximum number of rows to ingest.

  Returns:
    A NamedTuple of the path to the output TFRecord file.
  """
  # pylint: disable=g-import-not-at-top
  import collections
  from typing import Optional

  from google.cloud import bigquery

  import tensorflow as tf

  def read_data_from_bigquery(
      project_id: str,
      bigquery_table_name: str,
      bigquery_dataset_name: str,
      bigquery_max_rows: Optional[int]) -> bigquery.table.RowIterator:
    """Reads data from BigQuery at `bigquery_table_name` and creates an iterator.

    The table contains 7 columns that form `trajectories.Trajectory` objects:
    `step_type`, `observation`, `action`, `policy_info`, `next_step_type`,
    `reward`, `discount`.

    Args:
      project_id: GCP project ID. This is required because otherwise the
        BigQuery client will use the ID of the tenant GCP project created as a
        result of KFP, which doesn't have proper access to BigQuery.
      bigquery_table_name: A string of the BigQuery table ID in the format of
        "project.dataset.table".
      bigquery_max_rows: Optional; maximum number of rows to fetch.

    Returns:
      A row iterator over all data at `bigquery_table_name`.
    """
    # Construct a BigQuery client object.
    client = bigquery.Client(project=project_id)
    
    _bq_table_id = f"{project_id}.{bigquery_dataset_name}.{bigquery_table_name}"

    # Get dataset.
    query_job = client.query(
        f"""
        SELECT * FROM `{_bq_table_id}`
        """
    )
    table = query_job.result(max_results=bigquery_max_rows)

    return table

  def _bytes_feature(tensor: tf.Tensor) -> tf.train.Feature:
    """Returns a `tf.train.Feature` with bytes from `tensor`.

    Args:
      tensor: A `tf.Tensor` object.

    Returns:
      A `tf.train.Feature` object containing bytes that represent the content of
      `tensor`.
    """
    value = tf.io.serialize_tensor(tensor)
    if isinstance(value, type(tf.constant(0))):
      value = value.numpy()
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

  def build_example(data_row: bigquery.table.Row) -> tf.train.Example:
    """Builds a `tf.train.Example` from `data_row` content.

    Args:
      data_row: A `bigquery.table.Row` object that contains 7 pieces of data:
        `step_type`, `observation`, `action`, `policy_info`, `next_step_type`,
        `reward`, `discount`. Each piece of data except `observation` is a 1D
        array; `observation` is a 1D array of `{"observation_batch": 1D array}.`

    Returns:
      A `tf.train.Example` object holding the same data as `data_row`.
    """
    feature = {
        "step_type":
            _bytes_feature(data_row.get("step_type")),
        "observation":
            _bytes_feature([
                observation["observation_batch"]
                for observation in data_row.get("observation")
            ]),
        "action":
            _bytes_feature(data_row.get("action")),
        "policy_info":
            _bytes_feature(data_row.get("policy_info")),
        "next_step_type":
            _bytes_feature(data_row.get("next_step_type")),
        "reward":
            _bytes_feature(data_row.get("reward")),
        "discount":
            _bytes_feature(data_row.get("discount")),
    }
    example_proto = tf.train.Example(
        features=tf.train.Features(feature=feature))
    return example_proto

  def write_tfrecords(
      tfrecord_file: str,
      table: bigquery.table.RowIterator) -> None:
    """Writes the row data in `table` into TFRecords in `tfrecord_file`.

    Args:
      tfrecord_file: Path to file to write the TFRecords.
      table: A row iterator over all data to be written.
    """
    with tf.io.TFRecordWriter(tfrecord_file) as writer:
      for data_row in table:
        example = build_example(data_row)
        writer.write(example.SerializeToString())

  table = read_data_from_bigquery(
      project_id=project_id,
      bigquery_dataset_name=bigquery_dataset_name,
      bigquery_table_name=bigquery_table_name,
      bigquery_max_rows=bigquery_max_rows
  )

  write_tfrecords(tfrecord_file, table)

  outputs = collections.namedtuple(
      "Outputs",
      ["tfrecord_file"])

  return outputs(tfrecord_file)

## (3) Trainer

> Create the *Trainer* to train the RL policy

Create the Trainer component to train the RL policy on the training dataset, and then submit a remote custom training job to Vertex AI. This component trains a policy using the TF-Agents LinUCB agent on the MovieLens simulation dataset, and saves the trained policy as a `SavedModel`.

The Trainer component source code is in [`src/trainer/trainer_component.py`](src/trainer/trainer_component.py). You use additional Vertex AI platform code in pipeline construction to submit the training code defined in `Trainer` as a custom training job to Vertex AI. (The additional code is similar to what [`kfp.v2.google.experimental.run_as_aiplatform_custom_job`](https://github.com/kubeflow/pipelines/blob/master/sdk/python/kfp/v2/google/experimental/custom_job.py) does. You can find an example notebook [here](https://github.com/GoogleCloudPlatform/ai-platform-samples/blob/master/ai-platform-unified/notebooks/official/pipelines/google_cloud_pipeline_components_model_train_upload_deploy.ipynb) for how to use that first-party Trainer component.)

The Trainer performs **off-policy training**, where you train a policy on a static set of pre-collected data records containing information including `observation`, `action` and `reward`. For a data record, the policy in training might not output the same action given the observation in that data record.

If you're interested in pipeline metrics, read about [KFP Pipeline Metrics](https://www.kubeflow.org/docs/components/pipelines/sdk/pipelines-metrics/) here.

In [45]:
TRAINING_REPLICA_COUNT = 1                                  # Number of replica to run the custom training job.
TRAINING_MACHINE_TYPE = ("n1-highcpu-16")                   # Type of machine to run the custom training job.
TRAINING_ACCELERATOR_TYPE = "ACCELERATOR_TYPE_UNSPECIFIED"  # Type of accelerators to run the custom training job.
TRAINING_ACCELERATOR_COUNT = 0                              # Number of accelerators for the custom training job.

### Run unit tests on the Trainer component

In [46]:
# ! python3 -m unittest src.trainer.test_trainer_component

In [47]:
# ingestor_comp = src.trainer.train_reinforcement_learning_policy(
#     training_artifacts_dir=ARTIFACTS_DIR,
#     tfrecord_file=TFRECORD_FILE,
#     num_epochs=5,
#     rank_k=20,
#     num_actions=20,
#     tikhonov_weight=0.01,
#     agent_alpha=10,
#     project=PROJECT_ID,
#     location=REGION,
# )

In [68]:
from typing import NamedTuple
@component(
    base_image="tensorflow/tensorflow:2.13.0",
    output_component_file='./src/trainer2/component_train_job_op.yaml',
    packages_to_install=[
      "tensorflow==2.13.0",
      "tf-agents==0.17.0",
    ],
)
def train_reinforcement_learning_policy(
    training_artifacts_dir: str,
    tfrecord_file: str,
    num_epochs: int,
    rank_k: int,
    num_actions: int,
    tikhonov_weight: float,
    agent_alpha: float
) -> NamedTuple("Outputs", [
    ("training_artifacts_dir", str),
]):
  """Implements off-policy training for a policy on dataset of TFRecord files.

  The Trainer's task is to submit a remote training job to Vertex AI, with the
  training logic of a specified custom training container. The task will be
  handled by: `kfp.v2.google.experimental.run_as_aiplatform_custom_job` (which
  takes in the component made from this placeholder function)

  This function is to be built into a Kubeflow Pipelines (KFP) component. As a
  result, this function must be entirely self-contained. This means that the
  import statements and helper functions must reside within itself.

  Args:
    training_artifacts_dir: Path to store the Trainer artifacts (trained
      policy).
    tfrecord_file: Path to file to write the ingestion result TFRecords.
    num_epochs: Number of training epochs.
    rank_k: Rank for matrix factorization in the MovieLens environment; also
      the observation dimension.
    num_actions: Number of actions (movie items) to choose from.
    tikhonov_weight: LinUCB Tikhonov regularization weight of the Trainer.
    agent_alpha: LinUCB exploration parameter that multiplies the confidence
      intervals of the Trainer.

  Returns:
    A NamedTuple of (`training_artifacts_dir`).
  """
  # pylint: disable=g-import-not-at-top
  import collections
  from typing import Dict, List, NamedTuple  # pylint: disable=redefined-outer-name,reimported

  import tensorflow as tf

  from tf_agents import agents
  from tf_agents import policies
  from tf_agents import trajectories
  from tf_agents.bandits.agents import lin_ucb_agent
  from tf_agents.policies import policy_saver
  from tf_agents.specs import tensor_spec

  import logging

  per_arm = False  # Using the non-per-arm version of the MovieLens environment.

  # Mapping from feature name to serialized value
  feature_description = {
      "step_type": tf.io.FixedLenFeature((), tf.string),
      "observation": tf.io.FixedLenFeature((), tf.string),
      "action": tf.io.FixedLenFeature((), tf.string),
      "policy_info": tf.io.FixedLenFeature((), tf.string),
      "next_step_type": tf.io.FixedLenFeature((), tf.string),
      "reward": tf.io.FixedLenFeature((), tf.string),
      "discount": tf.io.FixedLenFeature((), tf.string),
  }

  def _parse_record(raw_record: tf.Tensor) -> Dict[str, tf.Tensor]:
    """Parses a serialized `tf.train.Example` proto.

    Args:
      raw_record: A serialized data record of a `tf.train.Example` proto.

    Returns:
      A dict mapping feature names to values as `tf.Tensor` objects of type
      string containing serialized protos, following `feature_description`.
    """
    return tf.io.parse_single_example(raw_record, feature_description)

  def build_trajectory(
      parsed_record: Dict[str, tf.Tensor],
      policy_info: policies.utils.PolicyInfo) -> trajectories.Trajectory:
    """Builds a `trajectories.Trajectory` object from `parsed_record`.

    Args:
      parsed_record: A dict mapping feature names to values as `tf.Tensor`
        objects of type string containing serialized protos.
      policy_info: Policy information specification.

    Returns:
      A `trajectories.Trajectory` object that contains values as de-serialized
      `tf.Tensor` objects from `parsed_record`.
    """
    return trajectories.Trajectory(
        step_type=tf.expand_dims(
            tf.io.parse_tensor(parsed_record["step_type"], out_type=tf.int32),
            axis=1),
        observation=tf.expand_dims(
            tf.io.parse_tensor(
                parsed_record["observation"], out_type=tf.float32),
            axis=1),
        action=tf.expand_dims(
            tf.io.parse_tensor(parsed_record["action"], out_type=tf.int32),
            axis=1),
        policy_info=policy_info,
        next_step_type=tf.expand_dims(
            tf.io.parse_tensor(
                parsed_record["next_step_type"], out_type=tf.int32),
            axis=1),
        reward=tf.expand_dims(
            tf.io.parse_tensor(parsed_record["reward"], out_type=tf.float32),
            axis=1),
        discount=tf.expand_dims(
            tf.io.parse_tensor(parsed_record["discount"], out_type=tf.float32),
            axis=1))

  def train_policy_on_trajectory(
      agent: agents.TFAgent,
      tfrecord_file: str,
      num_epochs: int
  ) -> NamedTuple("TrainOutputs", [
      ("policy", policies.TFPolicy),
      ("train_loss", Dict[str, List[float]]),
  ]):
    """Trains the policy in `agent` on the dataset of `tfrecord_file`.

    Parses `tfrecord_file` as `tf.train.Example` objects, packages them into
    `trajectories.Trajectory` objects, and trains the agent's policy on these
    trajectory objects.

    Args:
      agent: A TF-Agents agent that carries the policy to train.
      tfrecord_file: Path to the TFRecord file containing the training dataset.
      num_epochs: Number of epochs to train the policy.

    Returns:
      A NamedTuple of (a trained TF-Agents policy, a dict mapping from
      "epoch<i>" to lists of loss values produced at each training step).
    """
    raw_dataset = tf.data.TFRecordDataset([tfrecord_file])
    parsed_dataset = raw_dataset.map(_parse_record)

    train_loss = collections.defaultdict(list)
    for epoch in range(num_epochs):
      for parsed_record in parsed_dataset:
        trajectory = build_trajectory(parsed_record, agent.policy.info_spec)
        loss, _ = agent.train(trajectory)
        train_loss[f"epoch{epoch + 1}"].append(loss.numpy())

    train_outputs = collections.namedtuple(
        "TrainOutputs",
        ["policy", "train_loss"])
    return train_outputs(agent.policy, train_loss)

  def execute_training_and_save_policy(
      training_artifacts_dir: str,
      tfrecord_file: str,
      num_epochs: int,
      rank_k: int,
      num_actions: int,
      tikhonov_weight: float,
      agent_alpha: float) -> None:
    """Executes training for the policy and saves the policy.

    Args:
      training_artifacts_dir: Path to store the Trainer artifacts (trained
        policy).
      tfrecord_file: Path to file to write the ingestion result TFRecords.
      num_epochs: Number of training epochs.
      rank_k: Rank for matrix factorization in the MovieLens environment; also
        the observation dimension.
      num_actions: Number of actions (movie items) to choose from.
      tikhonov_weight: LinUCB Tikhonov regularization weight of the Trainer.
      agent_alpha: LinUCB exploration parameter that multiplies the confidence
        intervals of the Trainer.
    """
    # Define time step and action specs for one batch.
    time_step_spec = trajectories.TimeStep(
        step_type=tensor_spec.TensorSpec(
            shape=(), dtype=tf.int32, name="step_type"),
        reward=tensor_spec.TensorSpec(
            shape=(), dtype=tf.float32, name="reward"),
        discount=tensor_spec.BoundedTensorSpec(
            shape=(), dtype=tf.float32, name="discount", minimum=0.,
            maximum=1.),
        observation=tensor_spec.TensorSpec(
            shape=(rank_k,), dtype=tf.float32,
            name="observation"))

    action_spec = tensor_spec.BoundedTensorSpec(
        shape=(),
        dtype=tf.int32,
        name="action",
        minimum=0,
        maximum=num_actions - 1)

    # Define RL agent/algorithm.
    agent = lin_ucb_agent.LinearUCBAgent(
        time_step_spec=time_step_spec,
        action_spec=action_spec,
        tikhonov_weight=tikhonov_weight,
        alpha=agent_alpha,
        dtype=tf.float32,
        accepts_per_arm_features=per_arm)
    agent.initialize()
    logging.info("TimeStep Spec (for each batch):\n%s\n", agent.time_step_spec)
    logging.info("Action Spec (for each batch):\n%s\n", agent.action_spec)

    # Perform off-policy training.
    policy, _ = train_policy_on_trajectory(
        agent=agent,
        tfrecord_file=tfrecord_file,
        num_epochs=num_epochs)

    # Save trained policy.
    saver = policy_saver.PolicySaver(policy)
    saver.save(training_artifacts_dir)

  execute_training_and_save_policy(
      training_artifacts_dir=training_artifacts_dir,
      tfrecord_file=tfrecord_file,
      num_epochs=num_epochs,
      rank_k=rank_k,
      num_actions=num_actions,
      tikhonov_weight=tikhonov_weight,
      agent_alpha=agent_alpha)

  outputs = collections.namedtuple(
      "Outputs",
      ["training_artifacts_dir"])

  return outputs(training_artifacts_dir)

# compiler.Compiler().compile(train_op, "./src/trainer2/component_train_job_op.yaml")

## (4) Deployer 

> Create the *Deployer* to deploy the trained policy to a Vertex AI endpoint

Use [`google_cloud_pipeline_components.aiplatform`](https://cloud.google.com/vertex-ai/docs/pipelines/build-pipeline#google-cloud-components) components during pipeline construction to:
1. Upload the trained policy
2. Create a Vertex AI endpoint
3. Deploy the uploaded trained policy to the endpoint

These 3 components formulate the Deployer. They support flexible configurations; for instance, if you want to set up traffic splitting for the endpoint to run A/B testing, you may pass in your configurations to [google_cloud_pipeline_components.aiplatform.ModelDeployOp](https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-0.1.3/google_cloud_pipeline_components.aiplatform.html#google_cloud_pipeline_components.aiplatform.ModelDeployOp).

In [48]:
# # Deployer parameters
# TRAINED_POLICY_DISPLAY_NAME = (f"trained-linucb-policy-{VERSION}")
# TRAFFIC_SPLIT = {"0": 100}
# ENDPOINT_DISPLAY_NAME = f"mab-linucb-endpoint-{VERSION}" 
# ENDPOINT_MACHINE_TYPE = "n1-standard-4"                     # Type of machine of the prediction endpoint.
# ENDPOINT_REPLICA_COUNT = 1                                  # Number of replicas of the prediction endpoint.
# ENDPOINT_ACCELERATOR_TYPE = "ACCELERATOR_TYPE_UNSPECIFIED"  # Type of accelerators to run the custom training job.
# ENDPOINT_ACCELERATOR_COUNT = 0                              # Number of accelerators for the custom training job.

# print(f"TRAINED_POLICY_DISPLAY_NAME : {TRAINED_POLICY_DISPLAY_NAME}")
# print(f"ENDPOINT_DISPLAY_NAME       : {ENDPOINT_DISPLAY_NAME}")

TRAINED_POLICY_DISPLAY_NAME : trained-linucb-policy-v2
ENDPOINT_DISPLAY_NAME       : mab-linucb-endpoint-v2


### Prediction container (custom)

> Create a custom prediction container using Cloud Build

Before setting up the Deployer, define and build a custom prediction container that serves predictions using the trained policy. The source code, Cloud Build YAML configuration file and Dockerfile are in `src/prediction_container`.

This prediction container is the serving container for the deployed, trained policy. See a more detailed guide on building prediction custom containers in the step-by-step guide (`04-pipelines/.../train_deploy_mab.ipynb`)

In [49]:
# Prediction container parameters
PREDICTION_CONTAINER_DIR = "src/prediction_container"

# Logger parameters 
LOGGER_PUBSUB_TOPIC = "logger-pubsub-topic"      # Pub/Sub topic name for the Logger.
LOGGER_CLOUD_FUNCTION = "logger-cloud-function"  # Cloud Functions name for the Logger.

### Serving CI/CD with Cloud Build

> Create a Cloud Build YAML file using Kaniko build

Note: For this application, you are recommended to use `E2_HIGHCPU_8` or other high resouce machine configurations instead of the standard machine type listed [here](https://cloud.google.com/build/docs/api/reference/rest/v1/projects.builds#Build.MachineType) to prevent out-of-memory errors.

In [50]:
cloudbuild_yaml = """steps:
- name: "gcr.io/kaniko-project/executor:latest"
  args: ["--destination=gcr.io/{PROJECT_ID}/{IMAGE_NAME_04_pred}:latest",
         "--cache=false",
         "--cache-ttl=99h"]
  env: ["AIP_STORAGE_URI={ARTIFACTS_DIR}",
        "PROJECT_ID={PROJECT_ID}",
        "LOGGER_PUBSUB_TOPIC={LOGGER_PUBSUB_TOPIC}"]
options:
  machineType: "E2_HIGHCPU_8"
""".format(
    PROJECT_ID=PROJECT_ID,
    IMAGE_NAME_04_pred=IMAGE_NAME_04_pred,
    ARTIFACTS_DIR=ARTIFACTS_DIR,
    LOGGER_PUBSUB_TOPIC=LOGGER_PUBSUB_TOPIC,
)
# uncomment to print and view before writting
# print(cloudbuild_yaml)

with open(f"{PREDICTION_CONTAINER_DIR}/cloudbuild.yaml", "w") as fp:
    fp.write(cloudbuild_yaml)

### Run unit tests on the prediction code

In [51]:
# ! python3 -m unittest src.prediction_container.test_main

### Build custom prediction container

In [52]:
# ! gcloud builds submit --config $PREDICTION_CONTAINER_DIR/cloudbuild.yaml $PREDICTION_CONTAINER_DIR

In [53]:
CLOUD_BUILD_CMD = f'gcloud builds submit --config {PREDICTION_CONTAINER_DIR}/cloudbuild.yaml {PREDICTION_CONTAINER_DIR}'

print(CLOUD_BUILD_CMD)

gcloud builds submit --config src/prediction_container/cloudbuild.yaml src/prediction_container


## Create Managed TensorBoard instance

In [54]:
# # create new TB instance
TENSORBOARD_DISPLAY_NAME=f"{EXPERIMENT_NAME}-{RUN_NAME}"

tensorboard = vertex_ai.Tensorboard.create(
    display_name=TENSORBOARD_DISPLAY_NAME
    , project=PROJECT_ID
    , location=REGION
)

TB_RESOURCE_NAME = tensorboard.resource_name

# use existing TB instance
# TB_RESOURCE_NAME = 'projects/934903580331/locations/us-central1/tensorboards/6924469145035603968'

print(f"TB_RESOURCE_NAME: {TB_RESOURCE_NAME}")
print(f"TB display name: {tensorboard.display_name}")

TB_RESOURCE_NAME: projects/934903580331/locations/us-central1/tensorboards/3188552934224822272
TB display name: mab-pipe-rec-bandits-v2-run-20231019-184119


# Build RL pipeline

You author the pipeline using custom KFP components built from the previous section, and [create a pipeline run](https://cloud.google.com/vertex-ai/docs/pipelines/run-pipeline#kubeflow-pipelines-sdk) using Vertex AI Pipelines. You can read more about whether to enable execution caching [here](https://cloud.google.com/vertex-ai/docs/pipelines/build-pipeline#caching). You can also specifically configure the `worker pool spec` for training if for instance you want to train at scale and/or at a higher speed; you can adjust the `replica count`, `machine type`, `accelerator` type and count, and many other specifications.

Here, you build a "startup" pipeline that generates randomly sampled training data (with the `Generator`) as the first step. This pipeline runs only once.

In [55]:
from google_cloud_pipeline_components.v1.custom_job import utils as pipe_utils
from google_cloud_pipeline_components.v1.custom_job import create_custom_training_job_from_component as custom_train_comp
from kfp.components import load_component_from_url, load_component_from_file

**PIPE_VERSION** is an *optional* label to help organize pipeline runs or *versions*

In [82]:
PIPE_VERSION = "v8"

### Online Agent's Endpoint

In [102]:
# Deployer parameters
TRAINED_POLICY_DISPLAY_NAME = (f"trained-linucb-policy-{PIPE_VERSION}")
TRAFFIC_SPLIT = {"0": 100}
ENDPOINT_DISPLAY_NAME = f"mab-linucb-endpoint-{PIPE_VERSION}" 
ENDPOINT_MACHINE_TYPE = "n1-standard-4"                     # Type of machine of the prediction endpoint.
ENDPOINT_REPLICA_COUNT = 1                                  # Number of replicas of the prediction endpoint.
ENDPOINT_ACCELERATOR_TYPE = "ACCELERATOR_TYPE_UNSPECIFIED"  # Type of accelerators to run the custom training job.
ENDPOINT_ACCELERATOR_COUNT = 0                              # Number of accelerators for the custom training job.

print(f"TRAINED_POLICY_DISPLAY_NAME : {TRAINED_POLICY_DISPLAY_NAME}")
print(f"ENDPOINT_DISPLAY_NAME       : {ENDPOINT_DISPLAY_NAME}")

TRAINED_POLICY_DISPLAY_NAME : trained-linucb-policy-v8
ENDPOINT_DISPLAY_NAME       : mab-linucb-endpoint-v8


### Pipeline parameters

In [83]:
# pipeline root
PIPELINE_ROOT = f"{BASE_OUTPUT_DIR}/pipeline_{PIPE_VERSION}_root"  # Root directory for pipeline artifacts.
print("PIPELINE_ROOT  :", PIPELINE_ROOT)

# Data params
TFRECORD_FILE = (f"{PIPELINE_ROOT}/trainer_input_path/*")  # TFRecord file to be used for training.
print(f"TFRECORD_FILE : {TFRECORD_FILE}")

# Dataset parameters
BIGQUERY_MAX_ROWS = 5                                      # Maximum number of rows of data in BigQuery to ingest.
BIGQUERY_TMP_FILE = ('tmp.json')

PIPELINE_ROOT  : gs://rec-bandits-v2-hybrid-vertex-bucket/mab-pipe-rec-bandits-v2/run-20231019-184119/pipeline_v8_root
TFRECORD_FILE : gs://rec-bandits-v2-hybrid-vertex-bucket/mab-pipe-rec-bandits-v2/run-20231019-184119/pipeline_v8_root/trainer_input_path/*


### Load custom pipeline steps from component YAML

In [84]:
# generate_op = load_component_from_file("./src/generator/component.yaml")
generate_op = load_component_from_file("./src/trainer2/generate_op.yaml")

# ingest_op = load_component_from_file("./src/ingester/component.yaml")
ingest_op = load_component_from_file("./src/trainer2/ingest_op.yaml")

# TODO
# train_op = load_component_from_file("./src/trainer/component.yaml")
# train_op = load_component_from_file("./src/trainer2/component_train_job_op.yaml")

print(f"generate_op : {generate_op.name}")
print(f"train_op    : {train_op.name}")
print(f"ingest_op   : {ingest_op.name}")

generate_op : generate-movielens-dataset-for-bigquery
train_op    : train-reinforcement-learning-policy
ingest_op   : ingest-bigquery-dataset-into-tfrecord


### Create Train Job Component

**TODO**
* I'm confused with this - don't understand yet

**References**
* google cloud components [ReadTheDocs](https://google-cloud-pipeline-components.readthedocs.io/en/google-cloud-pipeline-components-2.0.0/api/v1/model.html#v1.model.ModelUploadOp)
* Google Cloud Components source [code on GitHub](https://github.com/kubeflow/pipelines/tree/master/components/google-cloud/google_cloud_pipeline_components)
  * code example: [lightweight python function](https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/pipelines/custom_model_training_and_batch_prediction.ipynb) converting to `CustomJob`
  * code example: [self-contained training component to Vertex AI Training](https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/9901150c5c82e9646a68c8c800fdf9d0c407c574/notebooks/official/pipelines/get_started_with_machine_management.ipynb#L445)
* `kfp` source [code on GitHub](https://github.com/kubeflow/pipelines/tree/master/sdk/python/kfp)

In [85]:
# no decorator needed, presumably this is handled in `pipe_utils`?

# Run the Trainer component and submit custom job to Vertex AI.
component_train_job_op = pipe_utils.create_custom_training_job_from_component(
    component_spec=train_reinforcement_learning_policy,
    display_name='mab-training-job',
    replica_count=TRAINING_REPLICA_COUNT,
    machine_type=TRAINING_MACHINE_TYPE,
    accelerator_type=TRAINING_ACCELERATOR_TYPE,
    accelerator_count=TRAINING_ACCELERATOR_COUNT,
    enable_web_access=True,
    restart_job_on_worker_restart=False,
    base_output_directory=BASE_OUTPUT_DIR,
    tensorboard=TB_RESOURCE_NAME,
    service_account=VERTEX_SA,
)

# compiler.Compiler().compile(train_op, "./src/trainer2/component_train_job_op.yaml")

<kfp.dsl.yaml_component.YamlComponent at 0x7fcc4f60a350>

In [86]:
component_train_job_op.required_inputs

['agent_alpha',
 'num_actions',
 'num_epochs',
 'rank_k',
 'tfrecord_file',
 'tikhonov_weight',
 'training_artifacts_dir']

## Create pipeline

In [87]:
PIPELINE_NAME = f'mab-agent-mlops-pipe-{PIPE_VERSION}'.replace('_', '-')
print("Pipeline Display Name:", PIPELINE_NAME)

Pipeline Display Name: mab-agent-mlops-pipe-v8


In [88]:
@dsl.pipeline(
    name=f"{PIPELINE_NAME}-startup"
)
def pipeline(
    # Pipeline configs
    project_id: str,
    raw_data_path: str,
    training_artifacts_dir: str,
    tb_resource_name: str,
    base_train_output_uri: str,
    # BigQuery configs
    bigquery_dataset_name: str,
    bigquery_location: str,
    bigquery_table_name: str,
    bigquery_max_rows: int = 10000,
    # TF-Agents RL configs
    batch_size: int = 8,
    rank_k: int = 20,
    num_actions: int = 20,
    driver_steps: int = 3,
    num_epochs: int = 5,
    tikhonov_weight: float = 0.01,
    agent_alpha: float = 10,
) -> None:
    """Authors a RL pipeline for MovieLens movie recommendation system.

    Integrates the Generator, Ingester, Trainer and Deployer components. This
    pipeline generates initial training data with a random policy and runs once
    as the initiation of the system.

    Args:
      project_id: GCP project ID. This is required because otherwise the BigQuery
        client will use the ID of the tenant GCP project created as a result of
        KFP, which doesn't have proper access to BigQuery.
      raw_data_path: Path to MovieLens 100K's "u.data" file.
      training_artifacts_dir: Path to store the Trainer artifacts (trained policy).

      bigquery_dataset: A string of the BigQuery dataset ID in the format of
        "project.dataset".
      bigquery_location: A string of the BigQuery dataset location.
      bigquery_table_name: A string of the BigQuery table ID in the format of
        "table_name".
      bigquery_max_rows: Optional; maximum number of rows to ingest.

      batch_size: Optional; batch size of environment generated quantities eg.
        rewards.
      rank_k: Optional; rank for matrix factorization in the MovieLens environment;
        also the observation dimension.
      num_actions: Optional; number of actions (movie items) to choose from.
      driver_steps: Optional; number of steps to run per batch.
      num_epochs: Optional; number of training epochs.
      tikhonov_weight: Optional; LinUCB Tikhonov regularization weight of the
        Trainer.
      agent_alpha: Optional; LinUCB exploration parameter that multiplies the
        confidence intervals of the Trainer.
    """
    
    from kfp.dsl import importer_node
    
    # Run the Generator component -> simulated data, in trajectory form, stored in BigQuery
    generate_task = (
        generate_op(
            project_id=project_id,
            raw_data_path=raw_data_path,
            batch_size=batch_size,
            rank_k=rank_k,
            num_actions=num_actions,
            driver_steps=driver_steps,
            bigquery_tmp_file=BIGQUERY_TMP_FILE,
            bigquery_dataset_name=bigquery_dataset_name,
            bigquery_location=bigquery_location,
            bigquery_table_name=bigquery_table_name,
        )
        .set_display_name("Generator")
        .set_caching_options(True)
    )
    
    # Run the Ingester component -> write Trajectories to TFRecords
    ingest_task = (
        ingest_op(
            project_id=project_id,
            bigquery_table_name=generate_task.outputs["bigquery_table_name"],
            bigquery_dataset_name=generate_task.outputs["bigquery_dataset_name"],
            bigquery_max_rows=bigquery_max_rows,
            tfrecord_file=TFRECORD_FILE,
        )
        .set_display_name("Ingestor")
        .set_caching_options(True)
    )

    # create_custom_training_job_op_from_component
    # Convert the train_op component into a Vertex AI Custom Job pre-built component
    train_task = (
        component_train_job_op(
            training_artifacts_dir=training_artifacts_dir,
            tfrecord_file=ingest_task.outputs["tfrecord_file"],
            num_epochs=num_epochs,
            rank_k=rank_k,
            num_actions=num_actions,
            tikhonov_weight=tikhonov_weight,
            agent_alpha=agent_alpha,
            project=PROJECT_ID,
            location=REGION,
        )
        .set_display_name("TrainerTask")
        .set_caching_options(True)
    )
    
    # import trained policy to the pipeline DAG; policy is now an artifact in the metadata graph
    import_unmanaged_model_task = (
        importer_node.importer(
            artifact_uri=train_task.outputs["training_artifacts_dir"],
            artifact_class=artifact_types.UnmanagedContainerModel,
            metadata={
                "containerSpec": {
                    "imageUri": f"gcr.io/{PROJECT_ID}/{IMAGE_NAME_04_pred}:latest"
                }
            },
        )
    )

    # Run the Deployer components.
    # Upload the trained policy as a model.
    model_upload_op = (
        ModelUploadOp(
            project=project_id,
            display_name=TRAINED_POLICY_DISPLAY_NAME,
            unmanaged_container_model=import_unmanaged_model_task.outputs["artifact"],
            # artifact_uri=train_task.outputs["training_artifacts_dir"],
            # serving_container_image_uri=f"gcr.io/{PROJECT_ID}/{IMAGE_NAME_04_pred}:latest",
        )
        .set_display_name("Register Trained Policy")
        .set_caching_options(True)
    )
    # Create a Vertex AI endpoint. (This operation can occur in parallel with
    # the Generator, Ingester, Trainer components.)
    endpoint_create_op = (
        EndpointCreateOp(
            project=project_id, 
            display_name=ENDPOINT_DISPLAY_NAME
        )
        .set_display_name("Create Online Endpoint")
        .set_caching_options(True)
    )
    # Deploy the uploaded, trained policy to the created endpoint. (This operation
    # has to occur after both model uploading and endpoint creation complete.)
    deploy_op = (
        ModelDeployOp(
            endpoint=endpoint_create_op.outputs["endpoint"],
            model=model_upload_op.outputs["model"],
            deployed_model_display_name=TRAINED_POLICY_DISPLAY_NAME,
            traffic_split=TRAFFIC_SPLIT,
            dedicated_resources_machine_type=ENDPOINT_MACHINE_TYPE,
            dedicated_resources_accelerator_type=ENDPOINT_ACCELERATOR_TYPE,
            dedicated_resources_accelerator_count=ENDPOINT_ACCELERATOR_COUNT,
            dedicated_resources_min_replica_count=ENDPOINT_REPLICA_COUNT,
        )
        .set_display_name("Deploy Policy")
        .set_caching_options(True)
    )

### Compile pipeline

In [89]:
PIPELINE_SPEC_LOCAL_PATH = "custom_pipeline_spec.json"                      # Path to pipeline specification file.

# remove any spec from previous runs
! rm -f $PIPELINE_SPEC_LOCAL_PATH

PIPELINE_SPEC_GCS_URI = f'{PIPELINE_ROOT}/{PIPELINE_SPEC_LOCAL_PATH}'
print("PIPELINE_SPEC_GCS_URI:", PIPELINE_SPEC_GCS_URI)

# Compile the authored pipeline.
compiler.Compiler().compile(
    pipeline_func=pipeline, 
    package_path=PIPELINE_SPEC_LOCAL_PATH
)

PIPELINE_SPEC_GCS_URI: gs://rec-bandits-v2-hybrid-vertex-bucket/mab-pipe-rec-bandits-v2/run-20231019-184119/pipeline_v8_root/custom_pipeline_spec.json


Copy spec to pipeline root...

In [90]:
!gsutil -q cp ./$PIPELINE_SPEC_LOCAL_PATH $PIPELINE_SPEC_GCS_URI

## Submit Pipeline to Vetex AI

In [91]:
# Create a pipeline run job.
job = vertex_ai.PipelineJob(
    display_name=f"{PIPELINE_NAME}-startup",
    template_path=PIPELINE_SPEC_GCS_URI,
    pipeline_root=PIPELINE_ROOT,
    failure_policy='fast', # slow | fast
    parameter_values={
        # Pipeline configs
        "project_id": PROJECT_ID,
        "raw_data_path": DATA_PATH_KFP_DEMO,
        "training_artifacts_dir": ARTIFACTS_DIR,
        "base_train_output_uri": BASE_OUTPUT_DIR,
        "tb_resource_name":TB_RESOURCE_NAME,
        # BigQuery configs
        "bigquery_dataset_name": BIGQUERY_DATASET_NAME,
        "bigquery_location": BQ_LOCATION,
        "bigquery_table_name": BIGQUERY_TABLE_NAME,
        # "bigquery_max_rows": 10000,
        "batch_size": 8, # int = 8,
        "rank_k": 20, # int = 20,
        "num_actions": 20, # int = 20,
        "driver_steps": 3, # int = 3,
        "num_epochs": 5, # int = 5,
        "tikhonov_weight": 0.01, # float = 0.01,
        "agent_alpha": 10, # float = 10,
    },
    enable_caching=True,
)

job.run(
    sync=False,
    service_account=VERTEX_SA,
    # network=f'projects/{PROJECT_NUM}/global/networks/{VPC_NETWORK_NAME}'
)

# Create the *Simulator* to send simulated MovieLens prediction requests

Create the Simulator to [obtain observations](https://github.com/tensorflow/agents/blob/v0.8.0/tf_agents/bandits/environments/movielens_py_environment.py#L118-L125) from the MovieLens simulation environment, formats them, and sends prediction requests to the Vertex AI endpoint.

The workflow is: Cloud Scheduler --> Pub/Sub --> Cloud Functions --> Endpoint

In production, this Simulator logic can be modified to that of gathering real-world input features as observations, getting prediction results from the endpoint and communicating those results to real-world users.

The Simulator source code is [`src/simulator/main.py`](src/simulator/main.py).

**TODO**
* parameterize Cloud Function args

In [95]:
# Simulator parameters

# Pub/Sub topic name for the Simulator.
SIMULATOR_PUBSUB_TOPIC = (
    f"simulator-pubsub-topic-{PIPE_VERSION}"
)

# Cloud Functions name for the Simulator.
SIMULATOR_CLOUD_FUNCTION = (
    f"simulator-cloud-function-{PIPE_VERSION}"
)

# Cloud Scheduler cron job name for the Simulator.
SIMULATOR_SCHEDULER_JOB = (
    f"simulator-scheduler-job-{PIPE_VERSION}"
)

# Cloud Scheduler cron job schedule for the Simulator. Eg. "*/5 * * * *" means every 5 mins.
SIMULATOR_SCHEDULE = "*/5 * * * *"

# Cloud Scheduler message for the Simulator.
SIMULATOR_SCHEDULER_MESSAGE = (
    f"simulator-message-{PIPE_VERSION}"
)
# TF-Agents RL configs
BATCH_SIZE = 8
RANK_K = 20
NUM_ACTIONS = 20

print(f"SIMULATOR_PUBSUB_TOPIC      : {SIMULATOR_PUBSUB_TOPIC}")
print(f"SIMULATOR_CLOUD_FUNCTION    : {SIMULATOR_CLOUD_FUNCTION}")
print(f"SIMULATOR_SCHEDULER_JOB     : {SIMULATOR_SCHEDULER_JOB}")
print(f"SIMULATOR_SCHEDULE          : {SIMULATOR_SCHEDULE}")
print(f"SIMULATOR_SCHEDULER_MESSAGE : {SIMULATOR_SCHEDULER_MESSAGE}")

SIMULATOR_PUBSUB_TOPIC      : simulator-pubsub-topic-v8
SIMULATOR_CLOUD_FUNCTION    : simulator-cloud-function-v8
SIMULATOR_SCHEDULER_JOB     : simulator-scheduler-job-v8
SIMULATOR_SCHEDULE          : */5 * * * *
SIMULATOR_SCHEDULER_MESSAGE : simulator-message-v8


### Run unit tests on the Simulator

In [None]:
# ! python3 -m unittest src.simulator.test_main

### Create a Pub/Sub topic

- Read more about creating Pub/Sub topics [here](https://cloud.google.com/functions/docs/tutorials/pubsub)

In [96]:
# ! gcloud pubsub topics create $SIMULATOR_PUBSUB_TOPIC

Created topic [projects/hybrid-vertex/topics/simulator-pubsub-topic-v8].


### Set up a recurrent Cloud Scheduler job for the Pub/Sub topic

- Read more about possible ways to create cron jobs [here](https://cloud.google.com/scheduler/docs/creating#gcloud).
- Read about the cron job schedule format [here](https://man7.org/linux/man-pages/man5/crontab.5.html).

In [97]:
scheduler_job_args = " ".join(
    [
        SIMULATOR_SCHEDULER_JOB,
        f"--schedule='{SIMULATOR_SCHEDULE}'",
        f"--topic={SIMULATOR_PUBSUB_TOPIC}",
        f"--message-body={SIMULATOR_SCHEDULER_MESSAGE}",
    ]
)

! echo $scheduler_job_args

simulator-scheduler-job-v8 --schedule=*/5 * * * * --topic=simulator-pubsub-topic-v8 --message-body=simulator-message-v8


In [100]:
! gcloud scheduler jobs create pubsub $scheduler_job_args --location=$REGION

name: projects/hybrid-vertex/locations/us-central1/jobs/simulator-scheduler-job-v8
pubsubTarget:
  data: c2ltdWxhdG9yLW1lc3NhZ2Utdjg=
  topicName: projects/hybrid-vertex/topics/simulator-pubsub-topic-v8
retryConfig:
  maxBackoffDuration: 3600s
  maxDoublings: 16
  maxRetryDuration: 0s
  minBackoffDuration: 5s
schedule: '*/5 * * * *'
state: ENABLED
timeZone: Etc/UTC
userUpdateTime: '2023-10-19T20:13:22Z'


### Define the *Simulator* logic in a Cloud Function to be triggered periodically, and deploy this Function

- Specify dependencies of the Function in [`src/simulator/requirements.txt`](src/simulator/requirements.txt).
- Read more about the available configurable arguments for deploying a Function [here](https://cloud.google.com/sdk/gcloud/reference/functions/deploy). For instance, based on the complexity of your Function, you may want to adjust its memory and timeout.
- Note that the environment variables in `ENV_VARS` should be comma-separated; there should not be additional spaces, or other characters in between. Read more about setting/updating/deleting environment variables [here](https://cloud.google.com/functions/docs/env-var).
- Read more about sending predictions to Vertex endpoints [here](https://cloud.google.com/vertex-ai/docs/predictions/online-predictions-custom-models).

In [101]:
endpoints = ! gcloud ai endpoints list \
    --region=$REGION \
    --filter=display_name=$ENDPOINT_DISPLAY_NAME
print("\n".join(endpoints), "\n")

Using endpoint [https://us-central1-aiplatform.googleapis.com/]
ENDPOINT_ID          DISPLAY_NAME
6324832886641917952  mab-linucb-endpoint-v2
4323827277205864448  mab-linucb-endpoint-v2
7899685381338038272  mab-linucb-endpoint-v2
982156353696956416   mab-linucb-endpoint-v2 



In [105]:
ENDPOINT_ID = endpoints[2].split(" ")[0]
print(f"ENDPOINT_ID={ENDPOINT_ID}")

ENDPOINT_ID=6324832886641917952


In [106]:
ENV_VARS = ",".join(
    [
        f"PROJECT_ID={PROJECT_ID}",
        f"REGION={REGION}",
        f"ENDPOINT_ID={ENDPOINT_ID}",
        f"RAW_DATA_PATH={DATA_PATH_KFP_DEMO}",
        f"BATCH_SIZE={BATCH_SIZE}",
        f"RANK_K={RANK_K}",
        f"NUM_ACTIONS={NUM_ACTIONS}",
    ]
)

! echo $ENV_VARS

PROJECT_ID=hybrid-vertex,REGION=us-central1,ENDPOINT_ID=6324832886641917952,RAW_DATA_PATH=gs://rec-bandits-v2-hybrid-vertex-bucket/data/kfp_demo_data/u.data,BATCH_SIZE=8,RANK_K=20,NUM_ACTIONS=20


#### Deploy Cloud Function
Ingress settings control what traffic can reach the function
* Default == `all` 
* `INGRESS_SETTINGS` must be one of: `all`, `internal-only`, `internal-and-gclb`

If neededing to update your org policy, see `constraints/cloudfunctions.allowedIngressSettings` in the [IAM & Admin Console](https://pantheon.corp.google.com/iam-admin/orgpolicies/list?e=13802955&mods=-ai_platform_fake_service,-ai_platform_staging_service&). Also refer to the Cloud Functions documentation e.g.,  [gcloud functions deploy](https://cloud.google.com/sdk/gcloud/reference/functions/deploy) and [secure and control access](https://cloud.google.com/functions/docs/securing/using-vpc-service-controls#console)

In [125]:
# get org ID
# ! gcloud organizations list

In [111]:
# !gcloud resource-manager org-policies allow cloudfunctions.allowedIngressSettings ALLOW_INTERNAL_ONLY \
#     --organization $YOUR_ORG_ID

In [112]:
! gcloud functions deploy $SIMULATOR_CLOUD_FUNCTION \
    --gen2 \
    --region=$REGION \
    --trigger-topic=$SIMULATOR_PUBSUB_TOPIC \
    --runtime=python310 \
    --memory=512MB \
    --timeout=200s \
    --source=src/simulator \
    --entry-point=simulate \
    --stage-bucket=$BUCKET_NAME \
    --update-env-vars=$ENV_VARS \
    --no-allow-unauthenticated \
    --ingress-settings=internal-and-gclb



Deploying function (may take a while - up to 2 minutes)...⠹                    
For Cloud Build Logs, visit: https://console.cloud.google.com/cloud-build/builds;region=us-central1/1fd773a7-6308-4480-9f39-788809060fed?project=934903580331
Deploying function (may take a while - up to 2 minutes)...done.                
availableMemoryMb: 512
buildId: 1fd773a7-6308-4480-9f39-788809060fed
buildName: projects/934903580331/locations/us-central1/builds/1fd773a7-6308-4480-9f39-788809060fed
dockerRegistry: CONTAINER_REGISTRY
entryPoint: simulate
environmentVariables:
  BATCH_SIZE: '8'
  ENDPOINT_ID: '6324832886641917952'
  NUM_ACTIONS: '20'
  PROJECT_ID: hybrid-vertex
  RANK_K: '20'
  RAW_DATA_PATH: gs://rec-bandits-v2-hybrid-vertex-bucket/data/kfp_demo_data/u.data
  REGION: us-central1
eventTrigger:
  eventType: google.pubsub.topic.publish
  failurePolicy: {}
  resource: projects/hybrid-vertex/topics/simulator-pubsub-topic-v8
  service: pubsub.googleapis.com
ingressSettings: ALLOW_INTERNAL_AN

## Create the *Logger* to asynchronously log prediction inputs and results

Create the Logger to get environment feedback as rewards from the MovieLens simulation environment based on prediction observations and predicted actions, formulate trajectory data, and store said data back to BigQuery. The Logger closes the RL feedback loop from prediction to training data, and allows re-training of the policy on new training data.

The Logger is triggered by a hook in the prediction code. At each prediction request, the prediction code messages a Pub/Sub topic, which triggers the Logger code.

The workflow is: prediction container code (at prediction request) --> Pub/Sub --> Cloud Functions (logging predictions back to BigQuery)

In production, this Logger logic can be modified to that of gathering real-world feedback (rewards) based on observations and predicted actions.

The Logger source code is [`src/logger/main.py`](src/logger/main.py).

### Run unit tests on the Logger

In [None]:
# ! python3 -m unittest src.logger.test_main

### Create a Pub/Sub topic

- Read more about creating Pub/Sub topics [here](https://cloud.google.com/functions/docs/tutorials/pubsub)

In [114]:
! gcloud pubsub topics create $LOGGER_PUBSUB_TOPIC

Created topic [projects/hybrid-vertex/topics/logger-pubsub-topic].


### Define the *Logger* logic in a Cloud Function to be triggered by a Pub/Sub topic, which is triggered by the prediction code at each prediction request.

- Specify dependencies of the Function in [`src/logger/requirements.txt`](src/logger/requirements.txt).
- Read more about the available configurable arguments for deploying a Function [here](https://cloud.google.com/sdk/gcloud/reference/functions/deploy). For instance, based on the complexity of your Function, you may want to adjust its memory and timeout.
- Note that the environment variables in `ENV_VARS` should be comma-separated; there should not be additional spaces, or other characters in between. Read more about setting/updating/deleting environment variables [here](https://cloud.google.com/functions/docs/env-var).

arg stash
* `--no-allow-unauthenticated`

In [115]:
ENV_VARS = ",".join(
    [
        f"PROJECT_ID={PROJECT_ID}",
        f"RAW_DATA_PATH={DATA_PATH_KFP_DEMO}",
        f"BATCH_SIZE={BATCH_SIZE}",
        f"RANK_K={RANK_K}",
        f"NUM_ACTIONS={NUM_ACTIONS}",
        f"BIGQUERY_TMP_FILE={BIGQUERY_TMP_FILE}",
        f"BIGQUERY_DATASET_NAME={BIGQUERY_DATASET_NAME}",
        f"BIGQUERY_LOCATION={BQ_LOCATION}",
        f"BIGQUERY_TABLE_NAME={BIGQUERY_TABLE_NAME}",
    ]
)

! echo $ENV_VARS

PROJECT_ID=hybrid-vertex,RAW_DATA_PATH=gs://rec-bandits-v2-hybrid-vertex-bucket/data/kfp_demo_data/u.data,BATCH_SIZE=8,RANK_K=20,NUM_ACTIONS=20,BIGQUERY_TMP_FILE=tmp.json,BIGQUERY_DATASET_NAME=rec_bandits_v2_bucket,BIGQUERY_LOCATION=US,BIGQUERY_TABLE_NAME=training_dataset


In [117]:
! gcloud functions deploy -q $LOGGER_CLOUD_FUNCTION \
    --gen2 \
    --region=$REGION \
    --trigger-topic=$LOGGER_PUBSUB_TOPIC \
    --runtime=python310 \
    --memory=512MB \
    --timeout=200s \
    --source=src/logger \
    --entry-point=log \
    --stage-bucket=$BUCKET_NAME \
    --update-env-vars=$ENV_VARS \
    --ingress-settings=internal-and-gclb

# Retraining pipeline 

Topics
* discuss continous learning / near-real time learning
* built-in exploration strategies
* objectives 

## Set Vertex AI Experiment

In [None]:
# EXPERIMENT_NAME   = f'mab-retrain-pipe-{PREFIX}'

# invoke_time       = time.strftime("%Y%m%d-%H%M%S")
# RUN_NAME          = f'run-{invoke_time}'

# BASE_OUTPUT_DIR   = f'{BUCKET_URI}/{EXPERIMENT_NAME}/{RUN_NAME}'
# LOG_DIR           = f"{BASE_OUTPUT_DIR}/logs"
# ROOT_DIR          = f"{BASE_OUTPUT_DIR}/root"       # Root directory for writing logs/summaries/checkpoints.
# ARTIFACTS_DIR     = f"{BASE_OUTPUT_DIR}/artifacts"  # Where the trained model will be saved and restored.

# vertex_ai.init(
#     project=PROJECT_ID,
#     location=REGION,
#     experiment=EXPERIMENT_NAME
# )

# bigquery_client = bigquery.Client(project=PROJECT_ID, location=BQ_LOCATION)

print(f"EXPERIMENT_NAME    : {EXPERIMENT_NAME}")
print(f"RUN_NAME           : {RUN_NAME}\n")
print(f"BASE_OUTPUT_DIR    : {BASE_OUTPUT_DIR}")
print(f"LOG_DIR            : {LOG_DIR}")
print(f"ROOT_DIR           : {ROOT_DIR}")
print(f"ARTIFACTS_DIR      : {ARTIFACTS_DIR}")

## Create the *Trigger* to trigger re-training

Create a pipeline scheduler object that recurrently re-runs the pipeline and trains the policy on new data. You create a pipeline for orchestration on Vertex Pipelines

When the Simulator sends prediction requests to the endpoint, the Logger is triggered by the hook in the prediction code to log prediction results to BigQuery, as new training data. As this pipeline has a recurrent schedule, it utlizes the new training data in training a new policy, therefore closing the feedback loop. 

> Theoretically speaking, if you set the pipeline scheduler to be infinitely frequent, then you would be approaching real-time, continuous training.

In [126]:
# Schedule to trigger the pipeline. Eg. "*/30 * * * *" means every 30 mins.
TRIGGER_SCHEDULE = "*/30 * * * *"

In [127]:

# component_train_job_op = pipe_utils.create_custom_training_job_from_component(
#     component_spec=train_reinforcement_learning_policy,
#     display_name='mab-training-job',
#     replica_count=TRAINING_REPLICA_COUNT,
#     machine_type=TRAINING_MACHINE_TYPE,
#     accelerator_type=TRAINING_ACCELERATOR_TYPE,
#     accelerator_count=TRAINING_ACCELERATOR_COUNT,
#     enable_web_access=True,
#     restart_job_on_worker_restart=False,
#     base_output_directory=BASE_OUTPUT_DIR,
#     tensorboard=TB_RESOURCE_NAME,
#     service_account=VERTEX_SA,
# )
# component_train_job_op

In [128]:
# ingest_op = load_component_from_file("./src/ingester/component.yaml")
ingest_op = load_component_from_file("./src/trainer2/ingest_op.yaml")

# train_op = load_component_from_file("./src/trainer/component.yaml")
# train_op = load_component_from_file("./src/trainer2/component_train_job_op.yaml")

In [129]:
@dsl.pipeline(
    name=f"{PIPELINE_NAME}-retraining"
)
def pipeline(
    # Pipeline configs
    project_id: str,
    training_artifacts_dir: str,
    base_train_output_uri: str,
    # BigQuery configs
    bigquery_table_name: str,
    bigquery_dataset_name: str,
    bigquery_location: str,
    bigquery_max_rows: int = 10000,
    # TF-Agents RL configs
    # batch_size: int = 8,
    rank_k: int = 20,
    num_actions: int = 20,
    num_epochs: int = 5,
    tikhonov_weight: float = 0.01,
    agent_alpha: float = 10,
) -> None:
    """Authors a re-training pipeline for MovieLens movie recommendation system.

    Integrates the Ingester, Trainer and Deployer components.

    Args:
      project_id: GCP project ID. This is required because otherwise the BigQuery
        client will use the ID of the tenant GCP project created as a result of
        KFP, which doesn't have proper access to BigQuery.
      training_artifacts_dir: Path to store the Trainer artifacts (trained policy).

      bigquery_table_name: A string of the BigQuery table ID in the format of
        "table_name".
      bigquery_max_rows: Optional; maximum number of rows to ingest.

      rank_k: Optional; rank for matrix factorization in the MovieLens environment;
        also the observation dimension.
      num_actions: Optional; number of actions (movie items) to choose from.
      num_epochs: Optional; number of training epochs.
      tikhonov_weight: Optional; LinUCB Tikhonov regularization weight of the
        Trainer.
      agent_alpha: Optional; LinUCB exploration parameter that multiplies the
        confidence intervals of the Trainer.
    """
    
    from kfp.dsl import importer_node
    
    # Run the Ingester component.
    ingest_task = (
        ingest_op(
            project_id=project_id,
            bigquery_table_name=bigquery_table_name,
            bigquery_dataset_name=bigquery_dataset_name,
            bigquery_max_rows=bigquery_max_rows,
            tfrecord_file=TFRECORD_FILE,
        )
        .set_display_name("Ingestor")
        .set_caching_options(True)
    )
    
    # =====================================================
    # TODO: add step for creating new TensorBoard instance
    # =====================================================
    
    
    train_task = (
        component_train_job_op(
            training_artifacts_dir=training_artifacts_dir,
            tfrecord_file=ingest_task.outputs["tfrecord_file"],
            num_epochs=num_epochs,
            rank_k=rank_k,
            num_actions=num_actions,
            tikhonov_weight=tikhonov_weight,
            agent_alpha=agent_alpha,
            project=PROJECT_ID,
            location=REGION,
        )
        .set_display_name("TrainerTask")
        .set_caching_options(True)
    )
    
    # =====================================================
    # TODO: step for building prediction container?
    # =====================================================
    
    import_unmanaged_model_task = (
        importer_node.importer(
            artifact_uri=train_task.outputs["training_artifacts_dir"],
            artifact_class=artifact_types.UnmanagedContainerModel,
            metadata={
                "containerSpec": {
                    "imageUri": f"gcr.io/{PROJECT_ID}/{IMAGE_NAME_04_pred}:latest"
                }
            },
        )
    )

    # # Run the Deployer components.
    # # Upload the trained policy as a model.
    
    model_upload_op = (
        ModelUploadOp(
            project=project_id,
            display_name=TRAINED_POLICY_DISPLAY_NAME,
            unmanaged_container_model=import_unmanaged_model_task.outputs["artifact"],
            # artifact_uri=train_task.outputs["training_artifacts_dir"],
            # serving_container_image_uri=f"gcr.io/{PROJECT_ID}/{IMAGE_NAME_04_pred}:latest",
        )
        .set_display_name("Register Trained Policy")
        .set_caching_options(True)
    )
    
    
    # Create a Vertex AI endpoint. (This operation can occur in parallel with
    # the Generator, Ingester, Trainer components.)
    
    endpoint_create_op = (
        EndpointCreateOp(
            project=project_id, 
            display_name=ENDPOINT_DISPLAY_NAME
        )
        .set_display_name("Create Online Endpoint")
        .set_caching_options(True)
    )
        
    # Deploy the uploaded, trained policy to the created endpoint. 
    # (Occurs after both model registered and endpoint created)
    
    deploy_op = (
        ModelDeployOp(
            endpoint=endpoint_create_op.outputs["endpoint"],
            model=model_upload_op.outputs["model"],
            deployed_model_display_name=TRAINED_POLICY_DISPLAY_NAME,
            # traffic_split=TRAFFIC_SPLIT,
            dedicated_resources_machine_type=ENDPOINT_MACHINE_TYPE,
            dedicated_resources_accelerator_type=ENDPOINT_ACCELERATOR_TYPE,
            dedicated_resources_accelerator_count=ENDPOINT_ACCELERATOR_COUNT,
            dedicated_resources_min_replica_count=ENDPOINT_REPLICA_COUNT,
        )
        .set_display_name("Deploy Policy")
        .set_caching_options(True)
    )

In [130]:
# TODO: add timestamp to spec file??

PIPELINE_SPEC_LOCAL_PATH = "custom_pipeline_spec.json"                      # Path to pipeline specification file.

# # remove any spec from previous runs
# ! rm -f $PIPELINE_SPEC_LOCAL_PATH

PIPELINE_SPEC_GCS_URI = f'{PIPELINE_ROOT}/{PIPELINE_SPEC_LOCAL_PATH}'
print("PIPELINE_SPEC_GCS_URI:", PIPELINE_SPEC_GCS_URI)

# Compile the authored pipeline.
compiler.Compiler().compile(
    pipeline_func=pipeline, 
    package_path=PIPELINE_SPEC_LOCAL_PATH
)

PIPELINE_SPEC_GCS_URI: gs://rec-bandits-v2-hybrid-vertex-bucket/mab-pipe-rec-bandits-v2/run-20231019-184119/pipeline_v8_root/custom_pipeline_spec.json


In [131]:
!gsutil -q cp ./$PIPELINE_SPEC_LOCAL_PATH $PIPELINE_SPEC_GCS_URI

In [132]:
# Create a pipeline run job.
job = vertex_ai.PipelineJob(
    display_name=f"{PIPELINE_NAME}-startup",
    template_path=PIPELINE_SPEC_GCS_URI,
    pipeline_root=PIPELINE_ROOT,
    failure_policy='fast', # slow | fast
    parameter_values={
        # Pipeline configs
        "project_id": PROJECT_ID,
        # "raw_data_path": DATA_PATH_KFP_DEMO,
        "training_artifacts_dir": ARTIFACTS_DIR,
        "base_train_output_uri": BASE_OUTPUT_DIR,
        # "tb_resource_name":TB_RESOURCE_NAME,
        # BigQuery configs
        "bigquery_dataset_name": BIGQUERY_DATASET_NAME,
        "bigquery_location": BQ_LOCATION,
        "bigquery_table_name": BIGQUERY_TABLE_NAME,
        # "bigquery_max_rows": 10000,
        # "batch_size": 8, # int = 8,
        "rank_k": 20, # int = 20,
        "num_actions": 20, # int = 20,
        # "driver_steps": 3, # int = 3,
        "num_epochs": 5, # int = 5,
        "tikhonov_weight": 0.01, # float = 0.01,
        "agent_alpha": 10, # float = 10,
    },
    enable_caching=False,
)

job.run(
    sync=False,
    service_account=VERTEX_SA,
    # network=f'projects/{PROJECT_NUM}/global/networks/{VPC_NETWORK_NAME}'
)

**TODO**
* retrain trigger
* improve train script (include more metrics, managed TB, GPU, massive datasets
* experiemtal design for testing agents (see gabor's blog)
* Fix 2nd Cloud function

## Cleaning up

To clean up all Google Cloud resources used in this project, you can [delete the Google Cloud
project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.

Otherwise, you can delete the individual resources you created in this tutorial (you also need to clean up other resources that are difficult to delete here, such as the all/partial of data in BigQuery, the recurring pipeline and its Scheduler job, the uploaded policy/model, etc.):

In [None]:
# # Delete endpoint resource.
# ! gcloud ai endpoints delete $ENDPOINT_ID --quiet --region $REGION

# # Delete Pub/Sub topics.
# ! gcloud pubsub topics delete $SIMULATOR_PUBSUB_TOPIC --quiet
# ! gcloud pubsub topics delete $LOGGER_PUBSUB_TOPIC --quiet

# # Delete Cloud Functions.
# ! gcloud functions delete $SIMULATOR_CLOUD_FUNCTION --quiet
# ! gcloud functions delete $LOGGER_CLOUD_FUNCTION --quiet

# # Delete Scheduler job.
# ! gcloud scheduler jobs delete $SIMULATOR_SCHEDULER_JOB --quiet

# # Delete Cloud Storage objects that were created.
# ! gsutil -m rm -r $PIPELINE_ROOT
# ! gsutil -m rm -r $ARTIFACTS_DIR