In [23]:
import os

In [11]:
!bq query \
-n 0 \
--destination_table kidney_tx_dataset.kidneytx_prediction_columns \
--replace \
--use_legacy_sql=false \
'SELECT Case_output, Patient_Age_at_TX, Patient_Age_at_Biopsy, serum_creatinine, urea, dimethylamine, UA_Pro, UA_Hb, phenylacetylglutamine, Hypertension, trigonellin, lactate, citrate, hippurate, Sex, alanine, Diabetes \
FROM `kidney_tx_dataset.kidneytx`' 

W0615 23:56:13.450022 140476583593792 bigquery_client.py:731] There is no apilog flag so non-critical logging is disabled.
Waiting on bqjob_r737b7242d3ff0627_00000188c17cfb0b_1 ... (1s) Current status: DONE   


In [12]:
# Setting constants, properties
#

REGION = "us-central1"

PROJECT = !(gcloud config get-value core/project)
PROJECT = PROJECT[0]

ARTIFACT_STORE = f"gs://{PROJECT}-spectrain-artifact-store"

DATA_ROOT = f"{ARTIFACT_STORE}/data"
TEST_FILE_PATH = f"{DATA_ROOT}/test/dataset.csv"
API_ENDPOINT = f"{REGION}-aiplatform.googleapis.com"

PIPELINE_ROOT = f"{ARTIFACT_STORE}/pipeline"
DATASET_SOURCE = f"bq://{PROJECT}.kidney_tx_dataset.kidneytx_prediction_columns"
PIPELINE_NAME = "kidneytx"
TARGET_COLUMN = "Case_output"

In [13]:
os.environ["PROJECT"] = PROJECT
os.environ["REGION"] = REGION
os.environ["PIPELINE_ROOT"] = PIPELINE_ROOT
os.environ["DATASET_SOURCE"] = DATASET_SOURCE
os.environ["TARGET_COLUMN"] = TARGET_COLUMN
os.environ["PIPELINE_NAME"] = PIPELINE_NAME

In [14]:
!rm ./pogue_python/pipeline_vertex_automl.py

rm: cannot remove './pogue_python/pipeline_vertex_automl.py': No such file or directory


In [15]:
%%writefile ./pogue_python/pipeline_vertex_automl.py
# ADDED BY ALEX
# Copyright 2021 Google LLC

# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at

# https://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS"
# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

"""Kubeflow Pipeline."""

import os

from google_cloud_pipeline_components.aiplatform import (
    AutoMLTabularTrainingJobRunOp,
    EndpointCreateOp,
    ModelDeployOp,
    TabularDatasetCreateOp,
)
from kfp.v2 import dsl

PIPELINE_ROOT = os.getenv("PIPELINE_ROOT")
PROJECT = os.getenv("PROJECT")
DATASET_SOURCE = os.getenv("DATASET_SOURCE")
PIPELINE_NAME = os.getenv("PIPELINE_NAME", "covertype")
DISPLAY_NAME = os.getenv("MODEL_DISPLAY_NAME", PIPELINE_NAME)
TARGET_COLUMN = os.getenv("TARGET_COLUMN", "Cover_Type")
SERVING_MACHINE_TYPE = os.getenv("SERVING_MACHINE_TYPE", "n1-standard-16")


@dsl.pipeline(
    name=f"{PIPELINE_NAME}-vertex-automl-pipeline",
    description=f"AutoML Vertex Pipeline for {PIPELINE_NAME}",
    pipeline_root=PIPELINE_ROOT,
)
def create_pipeline():

    dataset_create_task = TabularDatasetCreateOp(
        display_name=DISPLAY_NAME,
        bq_source=DATASET_SOURCE,
        project=PROJECT,
    )

    automl_training_task = AutoMLTabularTrainingJobRunOp(
        project=PROJECT,
        display_name=DISPLAY_NAME,
        optimization_prediction_type="classification",
        dataset=dataset_create_task.outputs["dataset"],
        target_column=TARGET_COLUMN,
    )

    endpoint_create_task = EndpointCreateOp(
        project=PROJECT,
        display_name=DISPLAY_NAME,
    )

    model_deploy_task = ModelDeployOp(  # pylint: disable=unused-variable
        model=automl_training_task.outputs["model"],
        endpoint=endpoint_create_task.outputs["endpoint"],
        deployed_model_display_name=DISPLAY_NAME,
        dedicated_resources_machine_type=SERVING_MACHINE_TYPE,
        dedicated_resources_min_replica_count=1,
        dedicated_resources_max_replica_count=1,
    )

Writing ./pogue_python/pipeline_vertex_automl.py


In [16]:
vars_of_interest = ["PIPELINE_ROOT", "PROJECT", "REGION", "DATASET_SOURCE", "TARGET_COLUMN", "PIPELINE_NAME"]
env_var_lines = [f"env: {var}={os.environ[var]}" for var in os.environ if var in vars_of_interest]
for line in env_var_lines:
    print(line)

env: PROJECT=qwiklabs-asl-00-c812c3b423f2
env: REGION=us-central1
env: PIPELINE_ROOT=gs://qwiklabs-asl-00-c812c3b423f2-spectrain-artifact-store/pipeline
env: DATASET_SOURCE=bq://qwiklabs-asl-00-c812c3b423f2.kidney_tx_dataset.kidneytx_prediction_columns
env: TARGET_COLUMN=Case_output
env: PIPELINE_NAME=kidneytx


In [17]:
PIPELINE_JSON = "spectrain_automl_vertex_pipeline.json"

In [18]:
!gsutil ls | grep ^{ARTIFACT_STORE}/$ || gsutil mb -l {REGION} {ARTIFACT_STORE}

gs://qwiklabs-asl-00-c812c3b423f2-spectrain-artifact-store/


In [19]:
from kfp.v2 import compiler
from pogue_python.pipeline_vertex_automl import create_pipeline

print(os.getenv("DATASET_SOURCE"))

compiler.Compiler().compile(
    pipeline_func=create_pipeline, 
    package_path=PIPELINE_JSON,
)

bq://qwiklabs-asl-00-c812c3b423f2.kidney_tx_dataset.kidneytx_prediction_columns




In [20]:
!grep -A5 "\"bq_source\":" {PIPELINE_JSON}

            "bq_source": {
              "type": "STRING"
            },
            "display_name": {
              "type": "STRING"
            },
--
                "bq_source": {
                  "runtimeValue": {
                    "constantValue": {
                      "stringValue": "bq://qwiklabs-asl-00-c812c3b423f2.kidney_tx_dataset.kidneytx_prediction_columns"
                    }
                  }


In [25]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT, location=REGION)

pipeline = aiplatform.PipelineJob(
    display_name="automl_spectrain_kfp_pipeline",
    template_path=PIPELINE_JSON,
    enable_caching=True,
)

pipeline.run()

Creating PipelineJob
PipelineJob created. Resource name: projects/469700469475/locations/us-central1/pipelineJobs/kidneytx-vertex-automl-pipeline-20230616061929
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/469700469475/locations/us-central1/pipelineJobs/kidneytx-vertex-automl-pipeline-20230616061929')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/kidneytx-vertex-automl-pipeline-20230616061929?project=469700469475
PipelineJob projects/469700469475/locations/us-central1/pipelineJobs/kidneytx-vertex-automl-pipeline-20230616061929 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/469700469475/locations/us-central1/pipelineJobs/kidneytx-vertex-automl-pipeline-20230616061929 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/469700469475/locations/us-central1/pipelineJobs/kidneytx-vertex-automl-pipeline-20230616061929 current state:
PipelineStat