## BQ ARIMA+ Pipeline

In [1]:
GCP_PROJECTS = !gcloud config get-value project
PROJECT_ID = GCP_PROJECTS[0]
PROJECT_NUM = !gcloud projects list --filter="$PROJECT_ID" --format="value(PROJECT_NUMBER)"
PROJECT_NUM = PROJECT_NUM[0]
LOCATION = 'us-central1'
REGION = "us-central1"

# VERTEX_SA = '934903580331-compute@developer.gserviceaccount.com'
VERTEX_SA = 'jt-vertex-sa@hybrid-vertex.iam.gserviceaccount.com'

print(f"PROJECT_ID: {PROJECT_ID}")
print(f"PROJECT_NUM: {PROJECT_NUM}")
print(f"LOCATION: {LOCATION}")
print(f"REGION: {REGION}")
print(f"VERTEX_SA: {VERTEX_SA}")

PROJECT_ID: hybrid-vertex
PROJECT_NUM: 934903580331
LOCATION: us-central1
REGION: us-central1
VERTEX_SA: jt-vertex-sa@hybrid-vertex.iam.gserviceaccount.com


In [2]:
EXPERIMENT = 'forecasting-1'
SERIES = 'a-forecast-tourney'

BQ_PROJECT = PROJECT_ID
BQ_DATASET = SERIES.replace('-','_')
BQ_TABLE = EXPERIMENT

viz_limit = 12

In [7]:
from google.cloud import bigquery
from google.cloud import storage

import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime, timedelta

from google.cloud import aiplatform as vertex_ai

import kfp
from typing import Any, Callable, Dict, NamedTuple, Optional, List
from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output,
                        OutputPath, component, Metrics)

from kfp.v2.dsl import HTML, Artifact, Condition, Input, Output, component

In [4]:
bq = bigquery.Client(project=PROJECT_ID)

vertex_ai.init(
    project=PROJECT_ID, 
    location=REGION,
    # credentials=credentials
)

storage_client = storage.Client(project=PROJECT_ID)

## pipeline components

In [5]:
REPO_DOCKER_PATH_PREFIX = 'src'

! rm -rf $REPO_DOCKER_PATH_PREFIX
! mkdir $REPO_DOCKER_PATH_PREFIX
# !mkdir -p ./$REPO_DOCKER_PATH_PREFIX

### create BQ dataset

In [8]:
# %%writefile {REPO_DOCKER_PATH_PREFIX}/create_bq_dataset.py

# import kfp
# from typing import Any, Callable, Dict, NamedTuple, Optional, List
# from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output,
#                         OutputPath, component, Metrics)

@kfp.v2.dsl.component(
  base_image='python:3.9',
  packages_to_install=['google-cloud-bigquery==3.6.0'],
)
def create_bq_dataset(
    project: str,
    new_bq_dataset: str,
    bq_location: str,
    experiment_name: str,
    dataset_tag: str,
) -> NamedTuple('Outputs', [
    ('bq_dataset_name', str),
    ('bq_dataset_uri', str),
]):
    
    from google.cloud import bigquery

    bq_client = bigquery.Client(project=project) # bq_location)
    
    ds = bigquery.Dataset(f"{project}.{new_bq_dataset}")
    ds.location = bq_location #REGION
    ds.labels = {'dataset_tag': f"{dataset_tag}", 'experiment': f'{experiment_name}'}
    ds = bq_client.create_dataset(dataset = ds, exists_ok = True)

    return (
        f'{new_bq_dataset}',
        f'bq://{project}:{new_bq_dataset}',
    )

### prepare forecast data

In [9]:
# %%writefile {REPO_DOCKER_PATH_PREFIX}/forecast_bqarima.py

# import kfp
# from typing import Any, Callable, Dict, NamedTuple, Optional, List
# from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output,
#                         OutputPath, component, Metrics)

@kfp.v2.dsl.component(
  base_image='python:3.9',
  packages_to_install=['google-cloud-bigquery==3.6.0'],
)
def forecast_bqarima(
    project: str,
    bq_input: str,
    bq_test: str,
    bq_horizon: str,
    forecast_test_length: int,
    forecast_horizon_length: int,
    target_column: str,
    time_column: str,
    series_column: str,
    cov_unavailable: list,
    cov_available: list,
    cov_attribute: list
) -> NamedTuple('Outputs', [
    ('bq_model', str), 
    ('bq_model_query', str), 
    ('bq_output', str), 
    ('platform', str), 
    ('method', str), 
    ('scenario', str)
]):
    
    import logging
    from google.cloud import bigquery
    bq = bigquery.Client(project = project)
    
    # parameters
    table = bq_test.split('.')[-1]
    bq_model = f"{bq_test[:-(len(table)+1)]}.arimaplus"
    bq_output = f"{bq_test[:-(len(table)+1)]}.fitted_forecast_arimaplus"
    platform = 'BigQuery' 
    method = 'ARIMA_PLUS'
    scenario = 'automatic'
    
    logging.info(f"table: {table}")
    logging.info(f"bq_model: {bq_model}")
    logging.info(f"bq_output: {bq_output}")
    logging.info(f"platform: {platform}")
    logging.info(f"method: {method}")
    
    # ======================================
    # Create Model: ARIMA_PLUS
    # ======================================
    # data_frequency should be ok as default = auto_frequency
    queryARIMA = f"""
        CREATE OR REPLACE MODEL `{bq_model}`
        OPTIONS
          (model_type = 'ARIMA_PLUS',
           time_series_timestamp_col = '{time_column}',
           time_series_data_col = '{target_column}',
           time_series_id_col = '{series_column}',
           auto_arima_max_order = 5,
           holiday_region = 'US',
           horizon = {forecast_test_length}+{forecast_horizon_length}
          ) AS
        SELECT {series_column}, {time_column}, {target_column}
        FROM `{bq_input}`
        WHERE splits in ('TRAIN','VALIDATE')
    """
    job = bq.query(query = queryARIMA)
    job.result()
    
    
    # ======================================
    # Create Raw Output
    # ======================================
    query = f"""
        CREATE OR REPLACE TABLE `{bq_output}` AS
        WITH
            FORECAST AS (
                SELECT
                    {series_column}, 
                    EXTRACT(DATE from time_series_timestamp) as {time_column},
                    time_series_adjusted_data as yhat,
                    prediction_interval_lower_bound as yhat_lower,
                    prediction_interval_upper_bound as yhat_upper
                FROM ML.EXPLAIN_FORECAST(MODEL `{bq_model}`, STRUCT({forecast_test_length+forecast_horizon_length} AS horizon, 0.95 AS confidence_level))
                WHERE time_series_type = 'forecast'
            ),
            ACTUAL AS (
                SELECT {series_column}, {time_column}, {target_column}, splits
                FROM `{bq_input}`
                WHERE splits = 'TEST'
            )
        SELECT {series_column}, {time_column}, {target_column}, yhat, yhat_lower, yhat_upper, splits
        FROM FORECAST
        LEFT OUTER JOIN ACTUAL
        USING ({series_column}, {time_column})
        ORDER BY {series_column}, {time_column} 
    """
    job = bq.query(query = query)
    job.result()
    
    # ======================================
    # Insert Output for Tournament (first remove prior run if present)
    # ======================================
    query = f"""
        DELETE `{bq_test}`
        WHERE platform = '{platform}' and method = '{method}' and scenario = '{scenario}'
    """
    job = bq.query(query)
    job.result()
    
    query = f"""
        INSERT INTO `{bq_test}`
        SELECT
            '{platform}' as platform,
            '{method}' as method,
            '{scenario}' as scenario,
            {series_column},
            {time_column},
            {target_column},
            yhat,
            yhat_lower,
            yhat_upper
        FROM `{bq_output}`
        WHERE splits = 'TEST'
        ORDER by {series_column}, {time_column}
    """
    job = bq.query(query = query)
    job.result()
    
    # Insert forecast for future horizon
    query = f"""
        DELETE `{bq_horizon}`
        WHERE platform = '{platform}' and method = '{method}' and scenario = '{scenario}'
    """
    job = bq.query(query)
    job.result()
    
    structvars = ''
    for i, v in enumerate(cov_unavailable+cov_available+cov_attribute+[target_column, time_column]):
        if i>0: structvars += ', '
        structvars += f'CAST(null AS FLOAT64) AS {v}'
    query = f"""
        INSERT INTO `{bq_horizon}`
        SELECT
            '{platform}' as platform,
            '{method}' as method,
            '{scenario}' as scenario,
            {series_column},
            {time_column},
            {target_column},
            yhat,
            yhat_lower,
            yhat_upper,
            STRUCT({structvars}) AS feature_attributions
        FROM `{bq_output}`
        WHERE splits is NULL
        ORDER by {series_column}, {time_column}
    """
    job = bq.query(query = query)
    job.result()
    
    return (
        bq_model, 
        queryARIMA, 
        bq_output, 
        platform, 
        method, 
        scenario
    )

### plot metrics

In [10]:
# %%writefile {REPO_DOCKER_PATH_PREFIX}/get_model_evaluation_metrics.py

# import kfp
# from typing import Any, Callable, Dict, NamedTuple, Optional, List
# from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output,
#                         OutputPath, component, Metrics)

@kfp.v2.dsl.component(
  base_image='python:3.9',
  packages_to_install=["jinja2", "pandas", "matplotlib"],
)
def get_model_evaluation_metrics(
    metrics_in: Input[Artifact], metrics_out: Output[HTML]
) -> NamedTuple("Outputs", [("avg_mean_absolute_error", float)]):
    """
    Get the average mean absolute error from the metrics
    Args:
        metrics_in: metrics artifact
        metrics_out: metrics artifact
    Returns:
        avg_mean_absolute_error: average mean absolute error
    """

    import pandas as pd

    # Helpers
    def prettyfier(styler):
        """
        Helper function to prettify the metrics table.
        Args:
            styler: Styler object
        Returns:
            Styler object
        """
        caption = {
            "selector": "caption",
            "props": [
                ("caption-side", "top"),
                ("font-size", "150%"),
                ("font-weight", "bold"),
                ("font-family", "arial"),
            ],
        }
        headers = {
            "selector": "th",
            "props": [("color", "black"), ("font-family", "arial")],
        }
        rows = {
            "selector": "td",
            "props": [("text-align", "center"), ("font-family", "arial")],
        }
        styler.set_table_styles([caption, headers, rows])
        styler.set_caption("Forecasting accuracy report ")
        styler.hide(axis="index")
        styler.format(precision=2)
        styler.background_gradient(cmap="Blues")
        return styler

    def get_column_names(header):
        """
        Helper function to get the column names from the metrics table.
        Args:
            header: header
        Returns:
            column_names: column names
        """
        header_clean = header.replace("_", " ")
        header_abbrev = "".join([h[0].upper() for h in header_clean.split()])
        header_prettied = f"{header_clean} ({header_abbrev})"
        return header_prettied

    # Extract rows and schema from metrics artifact
    rows = metrics_in.metadata["rows"]
    schema = metrics_in.metadata["schema"]

    # Convert into a tabular format
    columns = [metrics["name"] for metrics in schema["fields"] if "name" in metrics]
    records = []
    for row in rows:
        records.append([dl["v"] for dl in row["f"]])
    metrics = (
        pd.DataFrame.from_records(records, columns=columns, index="product_name")
        .astype(float)
        .round(3)
    )
    metrics = metrics.reset_index()

    # Create the HTML artifact for the metrics
    pretty_columns = list(
        map(
            lambda h: get_column_names(h)
            if h != columns[0]
            else h.replace("_", " ").capitalize(),
            columns,
        )
    )
    pretty_metrics = metrics.copy()
    pretty_metrics.columns = pretty_columns
    html_metrics = pretty_metrics.style.pipe(prettyfier).to_html()
    with open(metrics_out.path, "w") as f:
        f.write(html_metrics)

    # Create metrics dictionary for the model
    avg_mean_absolute_error = round(float(metrics.mean_absolute_error.mean()), 0)
    component_outputs = NamedTuple("Outputs", [("avg_mean_absolute_error", float)])

    return component_outputs(avg_mean_absolute_error)

## train config

In [11]:
%%writefile arima_uni_cfg.py
time_column='date'
timeseries_id='timeseries_id'
target_column='gross_quantity'
COVARIATE_COLUMNS = [
        'product_id',
        'location_id',
        'gross_quantity',
        # 'date',
        'weekday',
        'wday',
        'month',
        'year',
        'event_name_1',
        'event_type_1',
        'event_name_2',
        'event_type_2',
        'snap_CA',
        'snap_TX',
        'snap_WI',
        'dept_id',
        'cat_id',
        'state_id',
]

Overwriting arima_uni_cfg.py


# Build pipeline

## Vertex Experiments

In [21]:
import time

EXPERIMENT_PREFIX = 'm5_bqarima_pipe'                     # custom identifier for organizing experiments
EXPERIMENT_NAME=f'{EXPERIMENT_PREFIX}'
RUN_NAME = f'run-{time.strftime("%Y%m%d-%H%M%S")}'

print(f"EXPERIMENT_NAME: {EXPERIMENT_NAME}")
print(f"RUN_NAME: {RUN_NAME}")

EXPERIMENT_NAME: m5_bqarima_pipe
RUN_NAME: run-20230307-085431


## pipeline vars

In [22]:
VERSION = 'v01'

BUCKET_NAME = 'vertex-forecast-22'
GCS_BUCKET_URI =f'gs://{BUCKET_NAME}'

EXPERIMENT_GCS_DIR = f'gs://{BUCKET_NAME}/{EXPERIMENT_NAME}/{VERSION}'

# Stores pipeline executions for each run
PIPELINE_ROOT_PATH = f'{EXPERIMENT_GCS_DIR}/pipeline_root'
print(f'PIPELINE_ROOT_PATH: {PIPELINE_ROOT_PATH}')

PIPELINE_TAG = 'arima_uni'
PIPELINE_NAME = f'{PIPELINE_TAG}-{EXPERIMENT_NAME}-{VERSION}'.replace('_', '-')
print(f"PIPELINE_NAME: {PIPELINE_NAME}")

PIPELINE_ROOT_PATH: gs://vertex-forecast-22/m5_bqarima_pipe/v01/pipeline_root
PIPELINE_NAME: arima-uni-m5-bqarima-pipe-v01


In [23]:
from typing import Any, Callable, Dict, NamedTuple, Optional

# kfp
import kfp
import kfp.v2.dsl
from kfp.v2.google import client as pipelines_client
from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output,
                        OutputPath, component)

from google_cloud_pipeline_components import aiplatform as gcc_aip

from google_cloud_pipeline_components.v1.bigquery import (
    BigqueryCreateModelJobOp, BigqueryEvaluateModelJobOp,
    BigqueryExplainForecastModelJobOp, BigqueryForecastModelJobOp,
    BigqueryMLArimaEvaluateJobOp, BigqueryQueryJobOp)

In [24]:
!pwd
!ls

/home/jupyter/vertex-forecas-repo/wip-forecast-tutorial
01-prepare-datasets.ipynb  __pycache__	     bqarima_plus_pipeline.ipynb  src
02-bqarima_plus.ipynb	   arima_uni_cfg.py  custom_pipeline_spec.json


In [25]:
# from src import create_bq_dataset,get_model_evaluation_metrics

import arima_uni_cfg

@kfp.v2.dsl.pipeline(
  name=PIPELINE_NAME
)
def pipeline(
    project: str,
    location: str,
    version: str,
    dataset_tag: str,
    bq_location: str,
    new_bq_dataset: str,
    experiment_name: str,
    experiment_run: str,
    bq_source: str,
    forecast_horizon: int,
    bq_model_table: str,
    bq_evaluate_time_series_configuration: dict,
    bq_evaluate_model_configuration: dict,
    bq_forecast_configuration: dict,
    bq_explain_forecast_configuration: dict,
    performance_threshold: float,
    
):
    
    # create BQ dataset
    create_train_dataset_op = (
      create_bq_dataset(
          project=project,
          new_bq_dataset=new_bq_dataset,
          experiment_name=experiment_name,
          dataset_tag=dataset_tag,
          bq_location=bq_location,
      )
    )
    
    # Run an ARIMA PLUS experiment
    bq_arima_model_exp_op = (
        BigqueryCreateModelJobOp(
            query=f"""
        -- create model table
        CREATE OR REPLACE MODEL `{project}.{new_bq_dataset}.{bq_model_table}`
        OPTIONS(
            MODEL_TYPE = \'ARIMA_PLUS\',
            TIME_SERIES_TIMESTAMP_COL = \'{arima_uni_cfg.time_column}\',
            TIME_SERIES_DATA_COL = \'{arima_uni_cfg.target_column}\',
            TIME_SERIES_ID_COL = [\'{arima_uni_cfg.timeseries_id}\']
        ) AS
        SELECT
          {arima_uni_cfg.time_column},
          {arima_uni_cfg.timeseries_id},
          {arima_uni_cfg.target_column}
        FROM `{bq_source}`
        WHERE split='TRAIN';
        """,
            project=project,
            location=location,
        )
        .set_display_name("run arima+ model experiment")
        .after(create_train_dataset_op)
    )
    
    # Evaluate ARIMA PLUS time series
    _ = (
        BigqueryMLArimaEvaluateJobOp(
            project=project,
            location=location,
            model=bq_arima_model_exp_op.outputs["model"],
            show_all_candidate_models=False,
            job_configuration_query=bq_evaluate_time_series_configuration,
        )
        .set_display_name("evaluate arima plus time series")
        .after(bq_arima_model_exp_op)
    )
    
    # Evaluate ARIMA Plus model
    bq_arima_evaluate_model_op = (
        BigqueryEvaluateModelJobOp(
            project=project,
            location=location,
            model=bq_arima_model_exp_op.outputs["model"],
            query_statement=f"""SELECT * FROM `{bq_source}` WHERE split='TEST'""",
            job_configuration_query=bq_evaluate_model_configuration,
        )
        .set_display_name("evaluate arima plus model")
        .after(bq_arima_model_exp_op)
    )
    
    # Plot model metrics
    get_evaluation_model_metrics_op = (
        get_model_evaluation_metrics(
            bq_arima_evaluate_model_op.outputs["evaluation_metrics"]
        )
        .after(bq_arima_evaluate_model_op)
        .set_display_name("plot evaluation metrics")
    )
    
    # Check the model performance. If ARIMA_PLUS average MAE metric is below to a minimal threshold
    with Condition(
        get_evaluation_model_metrics_op.outputs["avg_mean_absolute_error"]
        < performance_threshold,
        name="avg. mae good",
    ):
        # Train the ARIMA PLUS model
        bq_arima_model_op = (
            BigqueryCreateModelJobOp(
                query=f"""
        -- create model table
        CREATE OR REPLACE MODEL `{project}.{new_bq_dataset}.{bq_model_table}`
        OPTIONS(
        MODEL_TYPE = \'ARIMA_PLUS\',
        TIME_SERIES_TIMESTAMP_COL = \'{arima_uni_cfg.time_column}\',
        TIME_SERIES_DATA_COL = \'{arima_uni_cfg.target_column}\',
        TIME_SERIES_ID_COL = [\'{arima_uni_cfg.timeseries_id}\'],
        MODEL_REGISTRY = \'vertex_ai\',
        VERTEX_AI_MODEL_ID = \'order_demand_forecasting\',
        VERTEX_AI_MODEL_VERSION_ALIASES = [\'staging\']
        ) AS
        SELECT
          {arima_uni_cfg.time_column},
          {arima_uni_cfg.target_column},
          {arima_uni_cfg.timeseries_id},
          FROM `{bq_source}`
        GROUP BY {arima_uni_cfg.time_column}, {arima_uni_cfg.timeseries_id};
        """,
                project=project,
                location=location,
            )
            .set_display_name("train arima+ model")
            .after(get_evaluation_model_metrics_op)
        )
        
        # Generate the ARIMA PLUS forecasts
        bq_arima_forecast_op = (
            BigqueryForecastModelJobOp(
                project=project,
                location=location,
                model=bq_arima_model_op.outputs["model"],
                horizon=forecast_horizon,  # 1 hour
                confidence_level=0.9,
                job_configuration_query=bq_forecast_configuration,
            )
            .set_display_name("generate hourly forecasts")
            .after(get_evaluation_model_metrics_op)
        )
        
        # Generate the ARIMA PLUS forecast explainations
        _ = (
            BigqueryExplainForecastModelJobOp(
                project=project,
                location=location,
                model=bq_arima_model_op.outputs["model"],
                horizon=forecast_horizon,  # 1 hour
                confidence_level=0.9,
                job_configuration_query=bq_explain_forecast_configuration,
            )
            .set_display_name("explain hourly forecasts")
            .after(bq_arima_forecast_op)
        )

## compile pipeline

In [26]:
PIPELINE_JSON_SPEC_LOCAL = "custom_pipeline_spec.json"

! rm -f $PIPELINE_JSON_SPEC_LOCAL

kfp.v2.compiler.Compiler().compile(
    pipeline_func=pipeline, 
    package_path=PIPELINE_JSON_SPEC_LOCAL,
)

## config

In [27]:
DATASET_TAG = 'm5'
BQ_LOCATION='US' # 'us'
NEW_BQ_DATASET = f'{EXPERIMENT_NAME}_{VERSION}'
BQ_SOURCE='hybrid-vertex.m5_us.combined_small20k_train' # combined_full_train

FORECAST_HORIZON=14


BQ_MODEL_TABLE_PREFIX = "orders_forecast_arima"
BQ_MODEL_TABLE = f"{BQ_MODEL_TABLE_PREFIX}_{VERSION}"

BQ_TRAINING_TABLE_PREFIX = "orders_training"
BQ_TRAINING_TABLE = f"{BQ_TRAINING_TABLE_PREFIX}_{VERSION}"
BQ_TRAIN_CONFIGURATION = {
    "destinationTable": {
        "projectId": PROJECT_ID,
        "datasetId": NEW_BQ_DATASET,
        "tableId": BQ_TRAINING_TABLE,
    },
    "writeDisposition": "WRITE_TRUNCATE",
}

BQ_EVALUATE_TS_TABLE_PREFIX = "orders_arima_time_series_evaluate"
BQ_EVALUATE_TS_TABLE = f"{BQ_EVALUATE_TS_TABLE_PREFIX}_{VERSION}"
BQ_EVALUATE_TS_CONFIGURATION = {
    "destinationTable": {
        "projectId": PROJECT_ID,
        "datasetId": NEW_BQ_DATASET,
        "tableId": BQ_EVALUATE_TS_TABLE,
    },
    "writeDisposition": "WRITE_TRUNCATE",
}

BQ_EVALUATE_MODEL_TABLE_PREFIX = "orders_arima_model_evaluate"
BQ_EVALUATE_MODEL_TABLE = f"{BQ_EVALUATE_MODEL_TABLE_PREFIX}_{VERSION}"
BQ_EVALUATE_MODEL_CONFIGURATION = {
    "destinationTable": {
        "projectId": PROJECT_ID,
        "datasetId": NEW_BQ_DATASET,
        "tableId": BQ_EVALUATE_MODEL_TABLE,
    },
    "writeDisposition": "WRITE_TRUNCATE",
}

BQ_FORECAST_TABLE_PREFIX = "orders_arima_forecast"
BQ_FORECAST_TABLE = f"{BQ_FORECAST_TABLE_PREFIX}_{VERSION}"
BQ_FORECAST_CONFIGURATION = {
    "destinationTable": {
        "projectId": PROJECT_ID,
        "datasetId": NEW_BQ_DATASET,
        "tableId": BQ_FORECAST_TABLE,
    },
    "writeDisposition": "WRITE_TRUNCATE",
}

BQ_EXPLAIN_FORECAST_TABLE_PREFIX = "orders_arima_explain_forecast"
BQ_EXPLAIN_FORECAST_TABLE = f"{BQ_EXPLAIN_FORECAST_TABLE_PREFIX}_{VERSION}"
BQ_EXPLAIN_FORECAST_CONFIGURATION = {
    "destinationTable": {
        "projectId": PROJECT_ID,
        "datasetId": NEW_BQ_DATASET,
        "tableId": BQ_EXPLAIN_FORECAST_TABLE,
    },
    "writeDisposition": "WRITE_TRUNCATE",
}
PERF_THRESHOLD = 3000

In [28]:


PIPELINES_FILEPATH = f'{PIPELINE_ROOT_PATH}/pipeline_spec.json'
print("PIPELINES_FILEPATH:", PIPELINES_FILEPATH)

# copy pipeline spec to gcs path
!gsutil cp $PIPELINE_JSON_SPEC_LOCAL $PIPELINES_FILEPATH

PIPELINES_FILEPATH: gs://vertex-forecast-22/m5_bqarima_pipe/v01/pipeline_root/pipeline_spec.json
Copying file://custom_pipeline_spec.json [Content-Type=application/json]...
/ [1 files][ 43.4 KiB/ 43.4 KiB]                                                
Operation completed over 1 objects/43.4 KiB.                                     


In [29]:
# vpc_network_name = 'ucaip-haystack-vpc-network'
# SERVICE_ACCOUNT = '934903580331-compute@developer.gserviceaccount.com'
# SERVICE_ACCOUNT = 'notebooksa@hybrid-vertex.iam.gserviceaccount.com'


job = vertex_ai.PipelineJob(
    display_name=PIPELINE_NAME,
    template_path=PIPELINES_FILEPATH,
    pipeline_root=f'{PIPELINE_ROOT_PATH}',
    failure_policy='fast', # slow | fast
    # enable_caching=False,
    parameter_values={
        # here
        'project':PROJECT_ID,
        'location':REGION,
        'version':VERSION,
        'dataset_tag':DATASET_TAG,
        'bq_location':BQ_LOCATION,
        'new_bq_dataset':NEW_BQ_DATASET,
        'experiment_name':EXPERIMENT_NAME,
        'experiment_run':RUN_NAME,
        'bq_source':BQ_SOURCE,
        'forecast_horizon':FORECAST_HORIZON,
        'bq_model_table':BQ_MODEL_TABLE,
        'bq_evaluate_time_series_configuration':BQ_EVALUATE_TS_CONFIGURATION,
        'bq_evaluate_model_configuration':BQ_EVALUATE_MODEL_CONFIGURATION,
        'bq_forecast_configuration':BQ_FORECAST_CONFIGURATION,
        'bq_explain_forecast_configuration':BQ_EXPLAIN_FORECAST_CONFIGURATION,
        'performance_threshold': PERF_THRESHOLD,
    },
)


EXPERIMENT_PREFIX = 'm5-bqarima_pipe'                     # custom identifier for organizing experiments
EXPERIMENT_NAME=f'{EXPERIMENT_PREFIX}'
RUN_NAME = f'run-{time.strftime("%Y%m%d-%H%M%S")}'

job.run(
    sync=False,
    service_account=VERTEX_SA,
    # network=f'projects/{PROJECT_NUM}/global/networks/{vpc_network_name}'
)

Creating PipelineJob
PipelineJob created. Resource name: projects/934903580331/locations/us-central1/pipelineJobs/arima-uni-m5-bqarima-pipe-v01-20230307085437
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/934903580331/locations/us-central1/pipelineJobs/arima-uni-m5-bqarima-pipe-v01-20230307085437')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/arima-uni-m5-bqarima-pipe-v01-20230307085437?project=934903580331
PipelineJob projects/934903580331/locations/us-central1/pipelineJobs/arima-uni-m5-bqarima-pipe-v01-20230307085437 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/934903580331/locations/us-central1/pipelineJobs/arima-uni-m5-bqarima-pipe-v01-20230307085437 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/934903580331/locations/us-central1/pipelineJobs/arima-uni-m5-bqarima-pipe-v01-20230307085437 current state:
PipelineState.PIPELINE_S