![ga4](https://www.google-analytics.com/collect?v=2&tid=G-6VDTYWLKX6&cid=1&en=page_view&sid=1&dl=statmike%2Fvertex-ai-mlops%2FDev%2Fnew&dt=sklearn-test.ipynb)
<!--- header table --->
<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/statmike/vertex-ai-mlops/blob/main/Dev/new/sklearn-test.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo">
      <br>Run in<br>Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https%3A//raw.githubusercontent.com/statmike/vertex-ai-mlops/main/Dev/new/sklearn-test.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo">
      <br>Run in<br>Colab Enterprise
    </a>
  </td>      
  <td style="text-align: center">
    <a href="https://github.com/statmike/vertex-ai-mlops/blob/main/Dev/new/sklearn-test.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      <br>View on<br>GitHub
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https%3A//raw.githubusercontent.com/statmike/vertex-ai-mlops/main/Dev/new/sklearn-test.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
      <br>Open in<br>Vertex AI Workbench
    </a>
  </td>
</table>

# Scikit-Learn Workflow

A new template workflow for [scikit-Learn](https://scikit-learn.org/stable/index.html) model training and serving workflows in Vertex AI.

**Prerequisites:**
-  [01 - BigQuery - Table Data Source](../01%20-%20Data%20Sources/01%20-%20BigQuery%20-%20Table%20Data%20Source.ipynb)

---
## Colab Setup

To run this notebook in Colab run the cells in this section.  Otherwise, skip this section.

This cell will authenticate to GCP (follow prompts in the popup).

In [11]:
PROJECT_ID = 'statmike-mlops-349915' # replace with project ID

In [12]:
try:
    from google.colab import auth
    auth.authenticate_user()
    !gcloud config set project {PROJECT_ID}
    print('Colab authorized to GCP')
except Exception:
    print('Not a Colab Environment')
    pass

Not a Colab Environment


---
## Installs

The list `packages` contains tuples of package import names and install names.  If the import name is not found then the install name is used to install quitely for the current user.

In [53]:
# tuples of (import name, install name, min_version)
packages = [
    ('google.cloud.aiplatform', 'google-cloud-aiplatform'),
    ('google.cloud.bigquery', 'google-cloud-bigquery'),
    ('google.cloud.storage', 'google-cloud-storage'),
    ('bigframes', 'bigframes'),
    ('kfp', 'kfp'),
    ('google.cloud.artifactregistry_v1', 'google-cloud-artifact-registry')
]

import importlib
install = False
for package in packages:
    if not importlib.util.find_spec(package[0]):
        print(f'installing package {package[1]}')
        install = True
        !pip install {package[1]} -U -q --user
    elif len(package) == 3:
        if importlib.metadata.version(package[0]) < package[2]:
            print(f'updating package {package[1]}')
            install = True
            !pip install {package[1]} -U -q --user

## API Enablement

In [54]:
!gcloud services enable aiplatform.googleapis.com
!gcloud services enable artifactregistry.googleapis.com

### Restart Kernel (If Installs Occured)

After a kernel restart the code submission can start with the next cell after this one.

In [55]:
if install:
    import IPython
    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

---
## Setup

Inputs

In [56]:
project = !gcloud config get-value project
PROJECT_ID = project[0]
PROJECT_ID

'statmike-mlops-349915'

In [57]:
REGION = 'us-central1'
EXPERIMENT = 'sklearn-workflow'
SERIES = 'dev'

# gcs bucket
GCS_BUCKET = PROJECT_ID

# source data
BQ_PROJECT = PROJECT_ID
BQ_DATASET = 'fraud'
BQ_TABLE = 'fraud_prepped'

Packages

In [58]:
import os
import sklearn.ensemble
import pickle
import importlib
import time
from datetime import datetime
from google.cloud import aiplatform
from google.cloud import bigquery
from google.cloud import artifactregistry_v1
from google.cloud import storage
import bigframes.pandas as bpd

import kfp

Clients

In [59]:
# vertex ai clients
aiplatform.init(project = PROJECT_ID, location = REGION)

# artifact registry client
ar_client = artifactregistry_v1.ArtifactRegistryClient()

# gcs storage client
gcs = storage.Client(project = PROJECT_ID)

# bigquery clients
bq = bigquery.Client(project = PROJECT_ID)
bpd.options.bigquery.project = PROJECT_ID

parameters:

In [60]:
DIR = f"temp/{EXPERIMENT}"
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

In [61]:
RUN_NAME = f'run-{TIMESTAMP}'

In [62]:
SERVICE_ACCOUNT = !gcloud config list --format='value(core.account)' 
SERVICE_ACCOUNT = SERVICE_ACCOUNT[0]
SERVICE_ACCOUNT

'1026793852137-compute@developer.gserviceaccount.com'

environment:

In [63]:
if not os.path.exists(DIR):
    os.makedirs(DIR)

---
## Data Source

In [13]:
data = bq.query(f'SELECT * FROM {BQ_PROJECT}.{BQ_DATASET}.{BQ_TABLE}').to_dataframe()

In [14]:
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V23,V24,V25,V26,V27,V28,Amount,Class,transaction_id,splits
0,35337,1.092844,-0.01323,1.359829,2.731537,-0.707357,0.873837,-0.79613,0.437707,0.39677,...,-0.167647,0.027557,0.592115,0.219695,0.03697,0.010984,0.0,0,a1b10547-d270-48c0-b902-7a0f735dadc7,TEST
1,60481,1.238973,0.035226,0.063003,0.641406,-0.260893,-0.580097,0.049938,-0.034733,0.405932,...,-0.057718,0.104983,0.537987,0.589563,-0.046207,-0.006212,0.0,0,814c62c8-ade4-47d5-bf83-313b0aafdee5,TEST
2,139587,1.870539,0.211079,0.224457,3.889486,-0.380177,0.249799,-0.577133,0.179189,-0.120462,...,0.180776,-0.060226,-0.228979,0.080827,0.009868,-0.036997,0.0,0,d08a1bfa-85c5-4f1b-9537-1c5a93e6afd0,TEST
3,162908,-3.368339,-1.980442,0.153645,-0.159795,3.847169,-3.516873,-1.209398,-0.292122,0.760543,...,-1.171627,0.214333,-0.159652,-0.060883,1.294977,0.120503,0.0,0,802f3307-8e5a-4475-b795-5d5d8d7d0120,TEST
4,165236,2.180149,0.218732,-2.637726,0.348776,1.063546,-1.249197,0.942021,-0.547652,-0.087823,...,-0.176957,0.563779,0.730183,0.707494,-0.131066,-0.090428,0.0,0,c8a5b93a-1598-4689-80be-4f9f5df0b8ce,TEST


---
## Model Training: Local

https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.HistGradientBoostingClassifier.html

In [92]:
train_x = data.loc[data['splits']=='TRAIN', ~data.columns.isin(['transaction_id', 'splits'])]
train_y = train_x.pop('Class').astype('int')

In [93]:
classifier = sklearn.ensemble.HistGradientBoostingClassifier().fit(train_x, train_y)

In [94]:
classifier.score(train_x, train_y)

0.9984083205808972

In [95]:
classifier.predict(train_x[0:5]), train_y[0:5].values

(array([0, 0, 0, 0, 0]), array([0, 0, 0, 0, 0]))

In [96]:
classifier.predict_proba(train_x[0:5])

array([[0.99854863, 0.00145137],
       [0.99854863, 0.00145137],
       [0.99854863, 0.00145137],
       [0.99854863, 0.00145137],
       [0.99854863, 0.00145137]])

In [114]:
classifier.classes_

array([0, 1])

In [155]:
with open(f'{DIR}/model.pkl','wb') as f:
    pickle.dump(classifier, f)

In [156]:
with open(f'{DIR}/model.pkl','rb') as f:
    classifier_import = pickle.load(f)

In [160]:
classifier_import.predict_proba(train_x[0:5])

array([[0.99854863, 0.00145137],
       [0.99854863, 0.00145137],
       [0.99854863, 0.00145137],
       [0.99854863, 0.00145137],
       [0.99854863, 0.00145137]])

In [290]:
predictions = [
    dict(
        classes = list(classifier.classes_),
        scores = list(val)
    )
    for val in list(classifier.predict_proba(train_x[0:5]))
]
predictions

[{'classes': [0, 1], 'scores': [0.9985486267744933, 0.0014513732255066786]},
 {'classes': [0, 1], 'scores': [0.9985486267744933, 0.0014513732255066786]},
 {'classes': [0, 1], 'scores': [0.9985486267744933, 0.0014513732255066786]},
 {'classes': [0, 1], 'scores': [0.9985486267744933, 0.0014513732255066786]},
 {'classes': [0, 1], 'scores': [0.9985486267744933, 0.0014513732255066786]}]

---
## Model Training: Vertex AI Training Custom Job
-https://cloud.google.com/vertex-ai/docs/training/create-custom-job#create_custom_job-python_vertex_ai_sdk
-https://cloud.google.com/vertex-ai/docs/training/exporting-model-artifacts#scikit-learn

In [104]:
%%writefile {DIR}/train.py
# imports
from google.cloud import bigquery
import sklearn.ensemble
import argparse
import pickle
import os
import logging

# setup logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler())

# import argument to local variables
parser = argparse.ArgumentParser()
parser.add_argument('--project_id', dest = 'PROJECT_ID', type=str)
parser.add_argument('--bq_project', dest = 'BQ_PROJECT', type=str)
parser.add_argument('--bq_dataset', dest = 'BQ_DATASET', type=str)
parser.add_argument('--bq_table', dest = 'BQ_TABLE', type=str)
args = parser.parse_args()
logging.info('Finished parsing input parameters.')

# bigquery client
bq = bigquery.Client(project = args.PROJECT_ID)

# download data
data = bq.query(f'SELECT * FROM {args.BQ_PROJECT}.{args.BQ_DATASET}.{args.BQ_TABLE}').to_dataframe()
logging.info('Read data from BQ.')

# prepare training data
train_x = data.loc[data['splits']=='TRAIN', ~data.columns.isin(['transaction_id', 'splits'])]
train_y = train_x.pop('Class').astype('int')
logging.info('Prepared training data.')

# fit model
classifier = sklearn.ensemble.HistGradientBoostingClassifier().fit(train_x, train_y)
logging.info('Model training complete.')

# Use predefined environment variable to establish model directory
storage_path = f"/gcs/{os.environ['AIP_MODEL_DIR'][5:]}" + 'model.pkl'
os.makedirs(os.path.dirname(storage_path), exist_ok=True)

# output the model save files directly to GCS destination
with open(storage_path,'wb') as f:
    pickle.dump(classifier, f)
logging.info('Model saved to GCS.')

Overwriting temp/sklearn-workflow/train.py


https://cloud.google.com/vertex-ai/docs/training/pre-built-containers#scikit-learn

In [105]:
CMDARGS = [
    "--project_id=" + PROJECT_ID,
    "--bq_project=" + BQ_PROJECT,
    "--bq_dataset=" + BQ_DATASET,
    "--bq_table=" + BQ_TABLE
]

TRAIN_IMAGE = 'us-docker.pkg.dev/vertex-ai/training/sklearn-cpu.1-0:latest'
TRAIN_COMPUTE = 'n1-standard-4'
URI = f"gs://{GCS_BUCKET}/{SERIES}/{EXPERIMENT}"

In [106]:
customJob = aiplatform.CustomJob.from_local_script(
    display_name = f'{SERIES}_{EXPERIMENT}_{TIMESTAMP}',
    script_path = f'{DIR}/train.py',
    container_uri = TRAIN_IMAGE,
    args = CMDARGS,
    requirements = ['db-dtypes', 'google-cloud-bigquery'],
    replica_count = 1,
    machine_type = TRAIN_COMPUTE,
    accelerator_count = 0,
    base_output_dir = f"{URI}/models/{TIMESTAMP}",
    staging_bucket = f"{URI}/models/{TIMESTAMP}",
    labels = {'series' : f'{SERIES}', 'experiment' : f'{EXPERIMENT}'}
)

Training script copied to:
gs://statmike-mlops-349915/dev/sklearn-workflow/models/20240225203750/aiplatform-2024-02-25-21:56:30.388-aiplatform_custom_trainer_script-0.1.tar.gz.


In [107]:
customJob.run(
    service_account = SERVICE_ACCOUNT
)

Creating CustomJob
CustomJob created. Resource name: projects/1026793852137/locations/us-central1/customJobs/1880269818137935872
To use this CustomJob in another session:
custom_job = aiplatform.CustomJob.get('projects/1026793852137/locations/us-central1/customJobs/1880269818137935872')
View Custom Job:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/1880269818137935872?project=1026793852137
CustomJob projects/1026793852137/locations/us-central1/customJobs/1880269818137935872 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/1026793852137/locations/us-central1/customJobs/1880269818137935872 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/1026793852137/locations/us-central1/customJobs/1880269818137935872 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/1026793852137/locations/us-central1/customJobs/1880269818137935872 current state:
JobState.JOB_STATE_PENDING
CustomJob projects/1026793852137/locations/us-central1/customJobs

In [108]:
customJob.display_name

'dev_sklearn-workflow_20240225203750'

In [109]:
customJob.resource_name

'projects/1026793852137/locations/us-central1/customJobs/1880269818137935872'

Create hyperlinks to job here:

In [110]:
job_link = f"https://console.cloud.google.com/vertex-ai/locations/{REGION}/training/{customJob.resource_name.split('/')[-1]}/cpu?cloudshell=false&project={PROJECT_ID}"
print(f'Review the Custom Job here:\n{job_link}')

Review the Custom Job here:
https://console.cloud.google.com/vertex-ai/locations/us-central1/training/1880269818137935872/cpu?cloudshell=false&project=statmike-mlops-349915


---
## Register Model: Vertex AI Model Registry

- https://cloud.google.com/vertex-ai/docs/predictions/pre-built-containers
- https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.Model#google_cloud_aiplatform_Model_training_job

**NOTE:** It was very important to use a serving container with the same version of scikit-learn as the training container.  When serving was set to 1.3 after training with 1.0 the following error happens on serving:
> `ModuleNotFoundError: No module named 'sklearn.ensemble._hist_gradient_boosting.loss'"`

In [163]:
DEPLOY_IMAGE = 'us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-0:latest'
DEPLOY_COMPUTE = 'n1-standard-4'

In [164]:
upload_model = True
try:
    model = aiplatform.Model(
        project = PROJECT_ID,
        location = REGION,
        model_name = f'model_{SERIES}_{EXPERIMENT}'
    )
    print('Model already in registry')
    if RUN_NAME in model.version_aliases:
        upload_model = False
        print("This version already loaded, no action taken.")
    else:
        ('Loading model as new default version.')
        parent_model = model.resource_name
except Exception:
    print('This is a new model, creating in model registry')
    parent_model = ''

if upload_model:
    print('Uploading Model now...')
    model = aiplatform.Model.upload(
        display_name = f'{SERIES}_{EXPERIMENT}',
        model_id = f'model_{SERIES}_{EXPERIMENT}',
        parent_model =  parent_model,
        serving_container_image_uri = DEPLOY_IMAGE,
        artifact_uri = f"{URI}/models/{TIMESTAMP}/model",
        is_default_version = True,
        version_aliases = [RUN_NAME],
        version_description = RUN_NAME
    )

This is a new model, creating in model registry
Uploading Model now...
Creating Model
Create Model backing LRO: projects/1026793852137/locations/us-central1/models/model_dev_sklearn-workflow/operations/489564183696769024
Model created. Resource name: projects/1026793852137/locations/us-central1/models/model_dev_sklearn-workflow@1
To use this Model in another session:
model = aiplatform.Model('projects/1026793852137/locations/us-central1/models/model_dev_sklearn-workflow@1')


In [165]:
model.versioned_resource_name

'projects/1026793852137/locations/us-central1/models/model_dev_sklearn-workflow@1'

---
## Model Serving: Online with Vertex AI Prediction Endpoints

- https://cloud.google.com/vertex-ai/docs/general/deployment
- https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.Endpoint

### Create/Retrieve Endpoint

In [166]:
endpoints = aiplatform.Endpoint.list(filter = f"display_name={SERIES}")
if endpoints:
    endpoint = endpoints[0]
    print(f'Endpoint Exists: {endpoint.resource_name}')
else:
    endpoint = aiplatform.Endpoint.create(
        display_name = SERIES
    )
    print('Endpoint Created: ')
    
print(f'Review the Endpoint in the Console:\nhttps://console.cloud.google.com/vertex-ai/locations/{REGION}/endpoints/{endpoint.name}?project={PROJECT_ID}')

Endpoint Exists: projects/1026793852137/locations/us-central1/endpoints/8609714806183690240
Review the Endpoint in the Console:
https://console.cloud.google.com/vertex-ai/locations/us-central1/endpoints/8609714806183690240?project=statmike-mlops-349915


In [167]:
endpoint.display_name

'dev'

In [168]:
endpoint.traffic_split

{}

In [169]:
deployed_models = endpoint.list_models()
#deployed_models

### Deploy Model To Endpoint

In [170]:
endpoint.deploy(
    model = model,
    deployed_model_display_name = model.display_name,
    traffic_percentage = 100,
    machine_type = DEPLOY_COMPUTE,
    min_replica_count = 1,  
    max_replica_count = 1
) 

Deploying Model projects/1026793852137/locations/us-central1/models/model_dev_sklearn-workflow to Endpoint : projects/1026793852137/locations/us-central1/endpoints/8609714806183690240
Deploy Endpoint model backing LRO: projects/1026793852137/locations/us-central1/endpoints/8609714806183690240/operations/240740304284549120
Endpoint model deployed. Resource name: projects/1026793852137/locations/us-central1/endpoints/8609714806183690240


In [181]:
endpoint.traffic_split

{'8337304203332419584': 100}

In [173]:
endpoint.list_models()

[id: "8337304203332419584"
 model: "projects/1026793852137/locations/us-central1/models/model_dev_sklearn-workflow"
 display_name: "dev_sklearn-workflow"
 create_time {
   seconds: 1708909118
   nanos: 730301000
 }
 dedicated_resources {
   machine_spec {
     machine_type: "n1-standard-4"
   }
   min_replica_count: 1
   max_replica_count: 1
 }
 model_version_id: "1"]

In [175]:
model.resource_name

'projects/1026793852137/locations/us-central1/models/model_dev_sklearn-workflow'

In [177]:
model.version_id

'1'

In [178]:
for deployed_model in endpoint.list_models():
    if deployed_model.id in endpoint.traffic_split:
        print(f"Model {deployed_model.display_name} with version {deployed_model.model_version_id} has traffic = {endpoint.traffic_split[deployed_model.id]}")
    else:
        endpoint.undeploy(deployed_model_id = deployed_model.id)
        print(f"Undeploying {deployed_model.display_name} with version {deployed_model.model_version_id} because it has no traffic.")

Model dev_sklearn-workflow with version 1 has traffic = 100


In [179]:
endpoint.traffic_split

{'8337304203332419584': 100}

### Get Predictions
- https://cloud.google.com/vertex-ai/docs/predictions/get-online-predictions

In [190]:
test_x = data.loc[data['splits']=='TEST', ~data.columns.isin(['transaction_id', 'splits'])]
test_y = test_x.pop('Class').astype('int')

instances = test_x.values.tolist()

In [191]:
test_x.columns

Index(['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount'],
      dtype='object')

In [192]:
instances[0]

[35337,
 1.0928441854981998,
 -0.0132303486713432,
 1.35982868199426,
 2.7315370965921004,
 -0.707357349219652,
 0.8738370029866129,
 -0.7961301510622031,
 0.437706509544851,
 0.39676985012996396,
 0.587438102569443,
 -0.14979756231827498,
 0.29514781622888103,
 -1.30382621882143,
 -0.31782283120234495,
 -2.03673231037199,
 0.376090905274179,
 -0.30040350116459497,
 0.433799615590844,
 -0.145082264348681,
 -0.240427548108996,
 0.0376030733329398,
 0.38002620963091405,
 -0.16764742731151097,
 0.0275573495476881,
 0.59211469704354,
 0.219695164116351,
 0.0369695108704894,
 0.010984441006191,
 0.0]

In [193]:
endpoint.predict(instances = instances[0:1])

Prediction(predictions=[0.0], deployed_model_id='8337304203332419584', metadata=None, model_version_id='1', model_resource_name='projects/1026793852137/locations/us-central1/models/model_dev_sklearn-workflow', explanations=None)

In [195]:
test_y.ne(0).idxmax()

53

In [198]:
endpoint.predict(instances = instances[53:54])

Prediction(predictions=[1.0], deployed_model_id='8337304203332419584', metadata=None, model_version_id='1', model_resource_name='projects/1026793852137/locations/us-central1/models/model_dev_sklearn-workflow', explanations=None)

## Model Serving: Batch With Vertex AI Prediction Batch Jobs

- https://cloud.google.com/vertex-ai/docs/predictions/get-batch-predictions#bigquery
- https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.Model#google_cloud_aiplatform_Model_batch_predict

Here, we want to filter to rows with `splits = EVAL` and exclude columns.  This will use the gapic version of the API:
- https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.BatchPredictionJob
- https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.services.job_service.JobServiceClient#google_cloud_aiplatform_v1_services_job_service_JobServiceClient_create_batch_prediction_job

In [199]:
client_options = {"api_endpoint": f"{REGION}-aiplatform.googleapis.com"}
jobs_client = aiplatform.gapic.JobServiceClient(client_options = client_options)

In [209]:
batch_prediction_job = aiplatform.gapic.BatchPredictionJob(
    display_name = f'{SERIES}_{EXPERIMENT}',
    model = model.versioned_resource_name,
    input_config = dict(
        instances_format = 'bigquery',
        bigquery_source = dict(input_uri = f'bq://{BQ_PROJECT}.{BQ_DATASET}.{BQ_TABLE}')
    ),
    output_config = dict(
        predictions_format = 'bigquery',
        bigquery_destination = dict(output_uri = f'bq://{BQ_PROJECT}.{BQ_DATASET}')
    ),
    dedicated_resources = dict(
        machine_spec = dict(machine_type = DEPLOY_COMPUTE),
        starting_replica_count = 10,
        max_replica_count = 10
    ),
    instance_config = dict(
        instance_type = 'array',
        included_fields = list(train_x.columns),
        #excluded_fields = ['Class', 'splits', 'transaction_id']
    )
)

In [210]:
bqBatchJob = jobs_client.create_batch_prediction_job(
    parent = f'projects/{PROJECT_ID}/locations/{REGION}',
    batch_prediction_job = batch_prediction_job
)

In [215]:
state = jobs_client.get_batch_prediction_job(
    name = bqBatchJob.name
).state
state

<JobState.JOB_STATE_RUNNING: 3>

In [235]:
state, state.value

(<JobState.JOB_STATE_RUNNING: 3>, 3)

In [228]:
state._member_names_

['JOB_STATE_UNSPECIFIED',
 'JOB_STATE_QUEUED',
 'JOB_STATE_PENDING',
 'JOB_STATE_RUNNING',
 'JOB_STATE_SUCCEEDED',
 'JOB_STATE_FAILED',
 'JOB_STATE_CANCELLING',
 'JOB_STATE_CANCELLED',
 'JOB_STATE_PAUSED',
 'JOB_STATE_EXPIRED',
 'JOB_STATE_UPDATING',
 'JOB_STATE_PARTIALLY_SUCCEEDED']

In [238]:
while state.value <= 3:
    print('Checking again in 1 Minute...')
    time.sleep(60)
    state = jobs_client.get_batch_prediction_job(
        name = bqBatchJob.name
    ).state

Checking again in 1 Minute...


In [239]:
state.name

'JOB_STATE_SUCCEEDED'

In [242]:
bqBatchJob = jobs_client.get_batch_prediction_job(
    name = bqBatchJob.name
)
bqBatchJob.output_info

bigquery_output_dataset: "bq://statmike-mlops-349915.fraud"
bigquery_output_table: "predictions_2024_02_26T08_04_51_229Z_720"

In [279]:
str(bqBatchJob.end_time - bqBatchJob.start_time)

'0:13:48.626896'

In [243]:
bq_table = f"{bqBatchJob.output_info.bigquery_output_dataset.split('bq://')[-1]}.{bqBatchJob.output_info.bigquery_output_table}"
bq_table

'statmike-mlops-349915.fraud.predictions_2024_02_26T08_04_51_229Z_720'

In [244]:
batch_predictions = bq.query(f'''
SELECT *
FROM `{bq_table}`
''').to_dataframe()

In [250]:
batch_predictions.loc[(batch_predictions['prediction'] == '0') & (batch_predictions['Class'] == 1)].head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V24,V25,V26,V27,V28,Amount,Class,transaction_id,splits,prediction
143692,140293,0.951025,3.252926,-5.039105,4.632411,3.014501,-1.34957,0.98094,-1.819539,-2.099049,...,-1.185942,-1.286177,0.000365,0.169662,0.108276,0.77,1,d29d9996-2d90-4ad6-9cd9-e744424b81e0,TRAIN,0
143693,54846,-2.986466,-0.000891,0.605887,0.338338,0.685448,-1.581954,0.504206,-0.233403,0.636768,...,0.355065,0.448552,0.19349,1.214588,-0.013923,1.79,1,b949175e-d019-4637-b02f-8d0f8cc6c51c,TRAIN,0
143694,87202,-0.41982,-1.155978,-2.092516,2.78675,0.736297,-0.167292,1.600027,-0.117427,-0.796954,...,0.516131,-0.602941,-0.305024,-0.021363,0.129096,451.27,1,d0cfc4bf-40be-468e-b80b-403e5219136d,TRAIN,0
143695,139107,-4.6665,-3.95232,0.206094,5.153525,5.229469,0.93904,-0.635033,-0.704506,-0.234786,...,-0.759673,-0.502304,0.630639,-0.51388,0.729526,22.47,1,7a960a18-0a2f-4351-a148-7978c2bb36b0,TRAIN,0
143696,102318,-1.020632,1.496959,-4.490937,1.836727,0.627318,-2.735569,-1.546274,0.459822,-0.682741,...,-0.1555,0.412166,-0.22008,0.392338,-0.020089,22.04,1,f3b8a1b7-0000-4231-b541-52996821a6ad,TRAIN,0


---
## Model Serving: Customize Online Serving With Vertex AI Prediction Endpoints

- https://cloud.google.com/vertex-ai/docs/predictions/custom-prediction-routines
- https://github.com/googleapis/python-aiplatform/blob/main/google/cloud/aiplatform/prediction/predictor.py

### Setup Artifact Registry

Artifact registry organizes artifacts with repositories.  Each repository contains packages and is designated to hold a partifcular format of package: Docker images, Python Packages and [others](https://cloud.google.com/artifact-registry/docs/supported-formats#package).

#### List Repositories

This may be empty if no repositories have been created for this project

In [27]:
for repo in ar_client.list_repositories(parent = f'projects/{PROJECT_ID}/locations/{REGION}'):
    print(repo.name)

projects/statmike-mlops-349915/locations/us-central1/repositories/statmike-mlops-349915
projects/statmike-mlops-349915/locations/us-central1/repositories/statmike-mlops-349915-docker
projects/statmike-mlops-349915/locations/us-central1/repositories/statmike-mlops-349915-python


#### Create Docker Image Repository

Create an Artifact Registry Repository to hold Docker Images created by this notebook.  First, check to see if it is already created by a previous run and retrieve it if it has.  Otherwise, create!

In [299]:
docker_repo = None
for repo in ar_client.list_repositories(parent = f'projects/{PROJECT_ID}/locations/{REGION}'):
    if f'{PROJECT_ID}' == repo.name.split('/')[-1]:
        docker_repo = repo
        print(f'Retrieved existing repo: {docker_repo.name}')

if not docker_repo:
    operation = ar_client.create_repository(
        request = artifactregistry_v1.CreateRepositoryRequest(
            parent = f'projects/{PROJECT_ID}/locations/{REGION}',
            repository_id = f'{PROJECT_ID}',
            repository = artifactregistry_v1.Repository(
                description = f'A repository for the {SERIES} series that holds docker images.',
                name = f'{PROJECT_ID}',
                format_ = artifactregistry_v1.Repository.Format.DOCKER,
                labels = {'series': SERIES}
            )
        )
    )
    print('Creating Repository ...')
    docker_repo = operation.result()
    print(f'Completed creating repo: {docker_repo.name}')

Retrieved existing repo: projects/statmike-mlops-349915/locations/us-central1/repositories/statmike-mlops-349915


In [300]:
docker_repo.name, docker_repo.format_.name

('projects/statmike-mlops-349915/locations/us-central1/repositories/statmike-mlops-349915',
 'DOCKER')

In [301]:
REPOSITORY = f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{docker_repo.name.split('/')[-1]}"

In [302]:
REPOSITORY

'us-central1-docker.pkg.dev/statmike-mlops-349915/statmike-mlops-349915'

### Build Custom Prediction Routine

In [281]:
if not os.path.exists(DIR + '/SRC'):
    os.makedirs(DIR + '/SRC')

In [312]:
%%writefile {DIR}/SRC/requirements.txt
fastapi
uvicorn
google-cloud-aiplatform[prediction]
joblib
numpy
scikit-learn=1.0
google-cloud-storage

Overwriting temp/sklearn-workflow/SRC/requirements.txt


In [313]:
%%writefile {DIR}/SRC/predictor.py

# packages
import joblib
import numpy as np
import os
import pickle

from google.cloud.aiplatform.constants import prediction
from google.cloud.aiplatform.utils import prediction_utils
from google.cloud.aiplatform.prediction.predictor import Predictor
    
class CprPredictor(Predictor):
    def __init__(self):
        return

    def load(self, artifacts_uri: str) -> None:
        prediction_utils.download_model_artifacts(artifacts_uri)
        if os.path.exists(prediction.MODEL_FILENAME_JOBLIB):
            self._model = joblib.load(prediction.MODEL_FILENAME_JOBLIB)
        elif os.path.exists(prediction.MODEL_FILENAME_PKL):
            self._model = pickle.load(open(prediction.MODEL_FILENAME_PKL, "rb"))
        else:
            valid_filenames = [
                prediction.MODEL_FILENAME_JOBLIB,
                prediction.MODEL_FILENAME_PKL,
            ]
            raise ValueError(
                f"One of the following model files must be provided: {valid_filenames}."
            )
    
    def predict(self, instances):
        instances = instances["instances"]
        classes = list(self._model.classes_)
        scores = list(self._model.predict_proba(np.asarray(instances)))
        return dict(predictions = [dict(classes = classes, scores = list(score)) for score in scores])


Overwriting temp/sklearn-workflow/SRC/predictor.py


### Run Custom Prediction Routine: Local

In [314]:
from google.cloud.aiplatform.prediction import LocalModel

# load the local predictor class and call it CprPreditor:
predictor = importlib.import_module(f"{'.'.join(DIR.split('/'))}.SRC.predictor")

In [315]:
local_model = LocalModel.build_cpr_model(
    src_dir = f"{DIR}/SRC",
    output_image_uri = f"{REPOSITORY}/{SERIES}_{EXPERIMENT}",
    predictor = predictor.CprPredictor,
    requirements_path = f"{DIR}/SRC/requirements.txt",
)

  self.stdin = io.open(p2cwrite, 'wb', bufsize)
  self.stdout = io.open(c2pread, 'rb', bufsize)


In [316]:
local_model.get_serving_container_spec()

image_uri: "us-central1-docker.pkg.dev/statmike-mlops-349915/statmike-mlops-349915/dev_sklearn-workflow"
predict_route: "/predict"
health_route: "/health"

In [317]:
with local_model.deploy_to_local_endpoint(
    artifact_uri = model.uri
) as local_endpoint:
    #predict_response = local_endpoint.predict(
    #    request = json.dumps(dict(instances = [instances[0:1]])),
    #    headers = {"Content-Type": "application/json"}
    #)
    health_check_response = local_endpoint.run_health_check()

Exception during starting serving: ('Container exited before the first health check succeeded.', '', 1).
Exception during entering a context: ('Container exited before the first health check succeeded.', '', 1).


DockerError: ('Container exited before the first health check succeeded.', '', 1)

In [None]:
health_check_response, health_check_response.content

In [None]:
predict_response

In [None]:
print(predict_response.content)

### Save Image To Artifact Registry

First, configure `gcloud` as the credential helper for Google Cloud Docker registries:

In [713]:
!gcloud auth configure-docker {REGION}-docker.pkg.dev --quiet


{
  "credHelpers": {
    "gcr.io": "gcloud",
    "us.gcr.io": "gcloud",
    "eu.gcr.io": "gcloud",
    "asia.gcr.io": "gcloud",
    "staging-k8s.gcr.io": "gcloud",
    "marketplace.gcr.io": "gcloud",
    "us-central1-docker.pkg.dev": "gcloud"
  }
}
Adding credentials for: us-central1-docker.pkg.dev
gcloud credential helpers already registered correctly.


In [None]:
local_model.push_image()

In [None]:
print(f'Review the image in artifact registry here:\nhttps://console.cloud.google.com/artifacts/docker/{PROJECT_ID}/{REGION}/{PROJECT_ID}/{SERIES}_{EXPERIMENT}?project={PROJECT_ID}')

### Register Model

### Deploy Model To Endpoint

### Get Predictions

---
## Model Serving: Batch With BigQuery ML

### Convert Model To ONNX

### Import Model With BigQuery ML

### Get Predictions

---
## Model Evaluations: With SDK

https://cloud.google.com/vertex-ai/docs/evaluation/introduction?authuser=1&_ga=2.56160942.-427663343.1708439669#tabular

---
## Model Evaluations: With Pipeline Components

---
## Model Registry: Optional Additions

In [255]:
model.to_dict()

{'name': 'projects/1026793852137/locations/us-central1/models/model_dev_sklearn-workflow@1',
 'displayName': 'dev_sklearn-workflow',
 'predictSchemata': {},
 'metadata': None,
 'containerSpec': {'imageUri': 'us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-0:latest'},
 'supportedDeploymentResourcesTypes': ['DEDICATED_RESOURCES'],
 'supportedInputStorageFormats': ['jsonl',
  'bigquery',
  'csv',
  'tf-record',
  'tf-record-gzip',
  'file-list'],
 'supportedOutputStorageFormats': ['jsonl', 'bigquery'],
 'createTime': '2024-02-26T00:58:28.590188Z',
 'updateTime': '2024-02-26T00:58:29.560475Z',
 'etag': 'AMEw9yOee46PG34bICm1RvqIgYGKCF0MPG7B3VMJNHf4v2v6L8YkI-MSiR4L3DY75Cbr',
 'supportedExportFormats': [{'id': 'custom-trained',
   'exportableContents': ['ARTIFACT']}],
 'artifactUri': 'gs://statmike-mlops-349915/dev/sklearn-workflow/models/20240225203750/model',
 'versionId': '1',
 'versionAliases': ['run-20240225203750', 'default'],
 'versionDescription': 'run-20240225203750',
 'versionC

In [270]:

# example model from BQML
test = aiplatform.Model(
    project = PROJECT_ID,
    location = REGION,
    model_name = 'bqml_bqml_random-forest'
)
test.to_dict()

{'name': 'projects/1026793852137/locations/us-central1/models/bqml_bqml_random-forest',
 'displayName': 'bqml_bqml_random-forest',
 'supportedDeploymentResourcesTypes': ['DEDICATED_RESOURCES'],
 'supportedInputStorageFormats': ['jsonl',
  'bigquery',
  'csv',
  'tf-record',
  'tf-record-gzip',
  'file-list'],
 'supportedOutputStorageFormats': ['jsonl', 'bigquery'],
 'createTime': '2023-06-27T12:30:07.561556Z',
 'updateTime': '2023-06-27T12:32:06.390904Z',
 'etag': 'AMEw9yOim4p6j_hCCCHgN_pxHj9ot-6hpRtHMNxJJrOHhbkPklVRulB6UzcmEv5yVKNX',
 'labels': {'experiment': 'random-forest', 'series': 'bqml'},
 'explanationSpec': {'parameters': {'sampledShapleyAttribution': {'pathCount': 5}},
  'metadata': {'inputs': {'V3': {},
    'V18': {},
    'V21': {},
    'V13': {},
    'V11': {},
    'Amount': {},
    'V8': {},
    'V15': {},
    'V27': {},
    'V22': {},
    'V4': {},
    'V24': {},
    'Time': {},
    'V17': {},
    'V16': {},
    'V9': {},
    'V1': {},
    'V14': {},
    'V10': {},
    'V5

In [256]:
# example model from Training Pipeline (not a Vertex Pipeline)
test = aiplatform.Model(
    project = PROJECT_ID,
    location = REGION,
    model_name = 'model_05_05f'
)
test.to_dict()

{'name': 'projects/1026793852137/locations/us-central1/models/model_05_05f',
 'displayName': '05_05f',
 'predictSchemata': {},
 'metadata': None,
 'trainingPipeline': 'projects/1026793852137/locations/us-central1/trainingPipelines/6528349671444709376',
 'containerSpec': {'imageUri': 'us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-12:latest'},
 'supportedDeploymentResourcesTypes': ['DEDICATED_RESOURCES',
  'SHARED_RESOURCES'],
 'supportedInputStorageFormats': ['jsonl',
  'bigquery',
  'csv',
  'tf-record',
  'tf-record-gzip',
  'file-list'],
 'supportedOutputStorageFormats': ['jsonl', 'bigquery'],
 'createTime': '2022-09-27T19:21:47.583324Z',
 'updateTime': '2024-02-22T21:13:55.966054Z',
 'etag': 'AMEw9yObhb_VzCdWLiEzm7nRwKtvJG_ZDxWv_Yc7GywHp_M3phooJkxLMLMoJtkK5fQE',
 'labels': {'run_name': 'run-20240222211353',
  'experiment_name': 'experiment-05-05f-tf-classification-dnn',
  'series': '05',
  'experiment': '05f'},
 'supportedExportFormats': [{'id': 'custom-trained',
   'exportableCo

In [271]:
# example autoML model
test = aiplatform.Model(
    project = PROJECT_ID,
    location = REGION,
    model_name = '3955644813528793088'
)
test.to_dict()

{'name': 'projects/1026793852137/locations/us-central1/models/3955644813528793088',
 'displayName': '02c_fraud_20230825120638',
 'predictSchemata': {'instanceSchemaUri': 'https://storage.googleapis.com/caip-tenant-c9fb6c93-0c9f-41d7-96df-f5ebf0a7575c/schema/predict/instance.yaml?GoogleAccessId=service-1026793852137@gcp-sa-aiplatform.iam.gserviceaccount.com&Expires=1708985221&Signature=edZ2b1BmgniRuAfWPlqc1EUxEiadhJ0%252BjG41LZ5%252FNk3Qw2e4%252BAZpdxm5YUTys6lBXFnMO%252Fl4%252FsYvEFY3tg6hftTo3es9YdmG01LpRSwQAOaFJ4lkWXS%252FslgtDuNgOxwQSDR9G%252BR9lNT4Bi9OkV7nX3JGeO2oy8ImFBvD8k3rdhZ9ipNqrJobINTta1s7a2mf4nY1oifV3wn2qWLhZ4v1aqyC979qVko3AqY3umx9xCZ2u4mOlaso%252B6t%252Fsb0UYofqtQSRtkk3PRAPsD9SNBQzQigJ%252Fq3FlmcwkH0qaBQJsIM84pBGRnH8k7mUNM%252BEUipJZC4ZzuvRZFZLRVdlaxV%252FVQ%253D%253D',
  'predictionSchemaUri': 'https://storage.googleapis.com/caip-tenant-c9fb6c93-0c9f-41d7-96df-f5ebf0a7575c/schema/predict/prediction.yaml?GoogleAccessId=service-1026793852137@gcp-sa-aiplatform.iam.gserviceaccou

In [264]:
test.to_dict()['predictSchemata']['instanceSchemaUri']

'https://storage.googleapis.com/caip-tenant-c9fb6c93-0c9f-41d7-96df-f5ebf0a7575c/schema/predict/instance.yaml?GoogleAccessId=service-1026793852137@gcp-sa-aiplatform.iam.gserviceaccount.com&Expires=1708976819&Signature=PKHI%252Bv6b5EBvMjEBGEJEkAJFPxHnAzoofOa5Ac23gHcN8QShcu%252BD53FR7qkUf7sDX7DHBNqecWORycOVrYsKLA0gMTbRE2BatdSl8p6pWJymfUTuiiur%252BiOolSQAZkDBXC3Yt0D8JtqYr%252BYiyo60QTKtOVoFf3uyKFiR0%252BMB3g8aralZGixuf8Ty1g3xPfYeby2ljRGAYk4GUxSqmbWgssq2zXXKLs5ORNgRchEb0k9ZlogHphc8ZskB2Zl%252FpeC%252BjNGfq2hxNyQzQo7IPYq98LToeVAOV14F6uN3bp8y1HiQxYdJwImSj%252FNFRYeWBMW6IM74TQNzzTW8ynDj6eKYdA%253D%253D'

In [267]:
test.to_dict()['predictSchemata']['predictionSchemaUri']

'https://storage.googleapis.com/caip-tenant-c9fb6c93-0c9f-41d7-96df-f5ebf0a7575c/schema/predict/prediction.yaml?GoogleAccessId=service-1026793852137@gcp-sa-aiplatform.iam.gserviceaccount.com&Expires=1708976819&Signature=JXZzs4r6dQ3wsuP77DlxfrAD8CJ6MDdWHVs8Mb6rFPDnIWoVEvffM3SPsE1VCMwaHZzLdhII1jKD7nD7vXtlpxTToBPPRLldhpEEqRVtlo%252Fv7CDTmzS5kk%252FiL8cnW179rlyI%252FLX7SigRfebrLMMZMI%252BwaPLeNPTc%252FYwPy0TwXJT23SVVaroEsODwvINZ4QYNMP%252F9ZYk1G%252BK0MQFiNVTTHVB3DMHJ9y8Gj1Yw5sECwUfxQ%252FQzTEEVA9v2jNhPP1IBxdkC3%252F9aUrvGRtT85h0M4IGajJP23KSpYw0CFl4aLRMYNzNKXr3zl3XwHZ5wuBPPRipRvZAp6zzUc3Vv38ajRg%253D%253D'

In [269]:
test.to_dict()['metadataSchemaUri']

'https://storage.googleapis.com/google-cloud-aiplatform/schema/model/metadata/automl_tabular_1.0.0.yaml'

## Vertex AI Pipelines

- https://cloud.google.com/vertex-ai/docs/pipelines/introduction
- https://www.kubeflow.org/docs/components/pipelines/v2/introduction/
- https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.PipelineJob

In [34]:
if not os.path.exists(DIR + '/pipeline'):
    os.makedirs(DIR + '/pipeline')

In [35]:
@kfp.dsl.component(
    base_image = "python:3.10",
    packages_to_install = ["pandas"]
)
def example_string(text: str) -> str:
    text += ',... and more text'
    return text

In [36]:
@kfp.dsl.component(
    base_image = "python:3.10",
    packages_to_install = ["pandas"]
)
def example_number(number: float) -> float:
    number += 10
    return number

In [37]:
@kfp.dsl.component(
    base_image = "python:3.10",
    packages_to_install = ["pandas"]
)
def example_combo(text: str, number: float) -> str:
    result = f'{text}, ... and a number {number}'
    return result

In [38]:
@kfp.dsl.pipeline(
    name = 'simple-pipeline',
    description = 'A simple pipeline for testing',
    pipeline_root = f'gs://{GCS_BUCKET}/{SERIES}/{EXPERIMENT}/pipelines/example'
)
def example_pipeline(
    text: str,
    number: float
):
    text_task = example_string(text = text)
    number_task = example_number(number = number)
    combo_task = example_combo(text = text_task.output, number = number_task.output)

In [40]:
kfp.compiler.Compiler().compile(
    pipeline_func = example_pipeline,
    package_path = f'{DIR}/pipeline/example.yaml'
)

In [41]:
pipeline_job = aiplatform.PipelineJob(
    display_name = f"simple-pipeline",
    template_path = f"{DIR}/pipeline/example.yaml",
    parameter_values = dict(text ='Example text', number = 34.2),
    pipeline_root = f'gs://{GCS_BUCKET}/{SERIES}/{EXPERIMENT}/pipelines/simple',
    enable_caching = None # True (enabled), False (disable), None (defer to component level caching) 
)

In [42]:
response = pipeline_job.submit(
    service_account = SERVICE_ACCOUNT
)

Creating PipelineJob
PipelineJob created. Resource name: projects/1026793852137/locations/us-central1/pipelineJobs/simple-pipeline-20240304122159
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/1026793852137/locations/us-central1/pipelineJobs/simple-pipeline-20240304122159')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/simple-pipeline-20240304122159?project=1026793852137


In [43]:
print(f'The Dashboard can be viewed here:\n{pipeline_job._dashboard_uri()}')

The Dashboard can be viewed here:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/simple-pipeline-20240304122159?project=1026793852137


In [44]:
pipeline_job.wait()

PipelineJob projects/1026793852137/locations/us-central1/pipelineJobs/simple-pipeline-20240304122159 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/1026793852137/locations/us-central1/pipelineJobs/simple-pipeline-20240304122159 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/1026793852137/locations/us-central1/pipelineJobs/simple-pipeline-20240304122159 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/1026793852137/locations/us-central1/pipelineJobs/simple-pipeline-20240304122159 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob run completed. Resource name: projects/1026793852137/locations/us-central1/pipelineJobs/simple-pipeline-20240304122159


In [45]:
aiplatform.get_pipeline_df(pipeline = f'simple-pipeline')

Unnamed: 0,pipeline_name,run_name,param.input:number,param.input:text,param.vmlmd_lineage_integration
0,simple-pipeline,simple-pipeline-20240304122159,34.2,Example text,{'pipeline_run_component': {'parent_task_names...


In [46]:
tasks = {task.task_name: task for task in pipeline_job.task_details}

In [47]:
for task in tasks:
  print(task, tasks[task].state)

simple-pipeline-20240304122159 State.SUCCEEDED
example-number State.SUCCEEDED
example-combo State.SUCCEEDED
example-string State.SUCCEEDED


In [51]:
for task in tasks:
    print(task)

simple-pipeline-20240304122159
example-number
example-combo
example-string


In [52]:
#tasks['example-number']

### Schedule Pipeline

- https://cloud.google.com/vertex-ai/docs/pipelines/schedule-pipeline-run

Example here: run every 2 minutes, 5 times

In [64]:
bucket = gcs.lookup_bucket(GCS_BUCKET)
blob = bucket.blob(f'{SERIES}/{EXPERIMENT}/pipelines/example/example.yaml')
blob.upload_from_filename(f'{DIR}/pipeline/example.yaml')

In [67]:
pipeline_job = aiplatform.PipelineJob(
    display_name = f"simple-pipeline",
    template_path = f'gs://{GCS_BUCKET}/{SERIES}/{EXPERIMENT}/pipelines/example/example.yaml',
    parameter_values = dict(text ='Example text', number = 34.2),
    pipeline_root = f'gs://{GCS_BUCKET}/{SERIES}/{EXPERIMENT}/pipelines/simple',
    enable_caching = None # True (enabled), False (disable), None (defer to component level caching) 
)

In [69]:
pipeline_job_schedule = pipeline_job.create_schedule(
    display_name = f"simple-pipeline",
    cron = "*/2 * * * *",
    max_concurrent_run_count = 3,
    max_run_count = 5
)

Creating PipelineJobSchedule
PipelineJobSchedule created. Resource name: projects/1026793852137/locations/us-central1/schedules/968986403419455488
To use this PipelineJobSchedule in another session:
schedule = aiplatform.PipelineJobSchedule.get('projects/1026793852137/locations/us-central1/schedules/968986403419455488')
View Schedule:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/schedules/968986403419455488?project=1026793852137


In [70]:
time.sleep(6*2*60)

In [71]:
aiplatform.get_pipeline_df(pipeline = f'simple-pipeline')

Unnamed: 0,pipeline_name,run_name,param.vmlmd_lineage_integration,param.input:number,param.input:text
0,simple-pipeline,simple-pipeline-20240304061201156,{'pipeline_run_component': {'parent_task_names...,34.2,Example text
1,simple-pipeline,simple-pipeline-20240304061003879,{'pipeline_run_component': {'parent_task_names...,34.2,Example text
2,simple-pipeline,simple-pipeline-20240304060801037,{'pipeline_run_component': {'pipeline_run_id':...,34.2,Example text
3,simple-pipeline,simple-pipeline-20240304060601172,{'pipeline_run_component': {'pipeline_run_id':...,34.2,Example text
4,simple-pipeline,simple-pipeline-20240304060400986,{'pipeline_run_component': {'parent_task_names...,34.2,Example text
5,simple-pipeline,simple-pipeline-20240304122159,{'pipeline_run_component': {'task_name': 'simp...,34.2,Example text


In [76]:
schedules = aiplatform.PipelineJobSchedule.list(
    filter = f'display_name="simple-pipeline"',
)
schedules

[<google.cloud.aiplatform.pipeline_job_schedules.PipelineJobSchedule object at 0x7fedf4c56170> 
 resource name: projects/1026793852137/locations/us-central1/schedules/968986403419455488]

In [81]:
jobs = schedules[0].list_jobs()
for job in jobs:
    print(job.to_dict())

By enabling simple view, the PipelineJob resources returned from this method will not contain all fields.
{'name': 'projects/1026793852137/locations/us-central1/pipelineJobs/simple-pipeline-20240304061201156', 'createTime': '2024-03-04T14:12:01.303384Z', 'startTime': '2024-03-04T14:12:01.901199Z', 'endTime': '2024-03-04T14:12:04.253023Z', 'updateTime': '2024-03-04T14:12:04.253023Z', 'pipelineSpec': {'pipelineInfo': {'name': 'simple-pipeline'}}, 'state': 'PIPELINE_STATE_SUCCEEDED', 'jobDetail': {'pipelineContext': {'name': 'projects/1026793852137/locations/us-central1/metadataStores/default/contexts/simple-pipeline'}, 'pipelineRunContext': {'name': 'projects/1026793852137/locations/us-central1/metadataStores/default/contexts/simple-pipeline-20240304061201156'}}, 'labels': {'vertex-ai-pipelines-run-billing-id': '7978872483648372736'}}
{'name': 'projects/1026793852137/locations/us-central1/pipelineJobs/simple-pipeline-20240304061003879', 'createTime': '2024-03-04T14:10:04.023500Z', 'start

In [82]:
len(jobs)

5