### What we will learn

- We will build custom Kubeflow pipeline for classfication
- We will show how to use Vertex AI Experiments

In [66]:
import kfp

In [67]:
print(kfp.__version__)

1.8.14


In [68]:
from dotenv import load_dotenv
load_dotenv(dotenv_path='.env', verbose=True)

True

In [69]:
import os

BIGQUERY_PROJECT_ID = os.environ.get('BIGQUERY_PROJECT_ID')
BIGQUERY_DATASET = os.environ.get('BIGQUERY_DATASET')
BIGQUERY_DATASET_REGION = os.environ.get('BIGQUERY_DATASET_REGION')
BIGQUERY_TABLE = os.environ.get('BIGQUERY_TABLE')

VERTEXAI_PROJECT_ID = os.environ.get('VERTEXAI_PROJECT_ID')
VERTEXAI_REGION = os.environ.get('VERTEXAI_REGION')

BUCKET_NAME = os.environ.get('BUCKET_NAME')
BUCKET_URI = os.environ.get('BUCKET_URI')
BUCKET_REGION = os.environ.get('BUCKET_REGION')

PREFIX = os.environ.get('PREFIX')

print("BIGQUERY_PROJECT_ID: ",BIGQUERY_PROJECT_ID)
print("BIGQUERY_DATASET: ",BIGQUERY_DATASET)
print("BIGQUERY_DATASET_REGION: ",BIGQUERY_DATASET_REGION)
print("BIGQUERY_TABLE: ",BIGQUERY_TABLE)

print("VERTEXAI_PROJECT_ID: ",VERTEXAI_PROJECT_ID)
print("VERTEXAI_REGION: ",VERTEXAI_REGION)

print("BUCKET_NAME: ",BUCKET_NAME)
print("BUCKET_URI: ",BUCKET_NAME)
print("BUCKET_REGION: ",VERTEXAI_REGION)

PIPELINE_ROOT = 'gs://{}/pipeline_root'.format(BUCKET_NAME)

BIGQUERY_PROJECT_ID:  datafusionsbox
BIGQUERY_DATASET:  dataset4ccc
BIGQUERY_DATASET_REGION:  us
BIGQUERY_TABLE:  df_for_model_ccc_with_weights
VERTEXAI_PROJECT_ID:  datafusionsbox
VERTEXAI_REGION:  us-central1
BUCKET_NAME:  gcp-demo-ccc-vertexai
BUCKET_URI:  gcp-demo-ccc-vertexai
BUCKET_REGION:  us-central1


In [22]:
from kfp.v2.dsl import component, pipeline
from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output, OutputPath)

In [80]:
@component(
  packages_to_install=["pandas","db-dtypes", "google-cloud-bigquery", "pyarrow"]
)
def stage(bq_projectid: str, bq_dataset: str, bq_table: str, output_dataset: OutputPath('staged_bq_table')):
    from google.cloud import bigquery
    import google.auth
    
    ##authenticate 
    auth_credentials, auth_project = google.auth.default()
    print("Project: "+auth_project)
    client = bigquery.Client(project=bq_projectid, credentials = auth_credentials)
    
    
    query = f"SELECT * FROM {bq_projectid}.{bq_dataset}.{bq_table}"
    print(query)
    
    ## fetch query results as dataframe
    dataframe = client.query(query).to_dataframe()
    print(dataframe.head()) 
    
    ## export resultset into csv file om GCS
    dataframe.to_csv(output_dataset)

In [27]:
@pipeline(name="wf-kubeflow-bq2gcs")
def pipeline(
    in_bq_projectid: str = 'defaultprojectid',
    in_bq_dataset: str = 'xxxx',
    in_bq_table: str = 'yyyy'
):
    stagingTask = stage(bq_projectid = in_bq_projectid,
                                   bq_dataset   = in_bq_dataset, 
                                   bq_table     = in_bq_table)

In [31]:
dag_json_filename = "dag_ccc_ex3b_bq2gcs.json"   ##The output path dag_kubeflow_bq2gcs.yaml should ends with ".json".

In [32]:
from kfp.v2 import compiler
compiler.Compiler().compile(
   pipeline_func=pipeline,
   package_path=dag_yaml_filename
)



In [33]:
PIPELINE_PARAMETERS = {
    "in_bq_projectid":  BIGQUERY_PROJECT_ID, 
    "in_bq_dataset":    BIGQUERY_DATASET,
    "in_bq_table":      BIGQUERY_TABLE
}

LABELS = {}
ENABLE_CACHING=True

In [34]:
from google.cloud import aiplatform

job = aiplatform.PipelineJob(display_name = "kfp_pipeline_bq2gcs",
                             template_path = dag_json_filename,
                             ##pipeline_root = PIPELINE_ROOT,
                             parameter_values = PIPELINE_PARAMETERS, ## Make sure PIPELINE_PARAMETERS collection does not include parameters that are unknown to pipeline
                             enable_caching = ENABLE_CACHING,
                             labels = LABELS,
                             project = VERTEXAI_PROJECT_ID,
                             location = VERTEXAI_REGION)

job.run(service_account="339239659794-compute@developer.gserviceaccount.com")

Creating PipelineJob
PipelineJob created. Resource name: projects/339239659794/locations/us-central1/pipelineJobs/wf-kubeflow-bq2gcs-20230214142528
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/339239659794/locations/us-central1/pipelineJobs/wf-kubeflow-bq2gcs-20230214142528')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/wf-kubeflow-bq2gcs-20230214142528?project=339239659794
PipelineJob projects/339239659794/locations/us-central1/pipelineJobs/wf-kubeflow-bq2gcs-20230214142528 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/339239659794/locations/us-central1/pipelineJobs/wf-kubeflow-bq2gcs-20230214142528 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/339239659794/locations/us-central1/pipelineJobs/wf-kubeflow-bq2gcs-20230214142528 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/339239659794/locations/us-centra

In [161]:
@component(
  packages_to_install=["pandas","fsspec","gcsfs","scikit-learn"]
)
def preprocess(gcs_pipeline_root: str,
               app_prefix: str,
               user_id_column: str,
               target_column: str,
               weight_column: str,
               excluded_columns: list,
               staged_bq_table: InputPath('staged_bq_table'), 
               staged_training_dataset: OutputPath('staged_training_dataset'), 
               staged_validation_dataset: OutputPath('staged_validation_dataset'), 
               staged_test_dataset: OutputPath('staged_test_dataset')):
    
    import pandas as pd
    from sklearn.model_selection import train_test_split
    
    dataset = pd.read_csv(staged_bq_table, index_col=0)
    
    
    ## drop excluded columns
    ndataset = dataset.drop(excluded_columns, axis =1)
    
    X = ndataset.loc[:, ndataset.columns != target_column]
    Y = ndataset.loc[:, ndataset.columns == target_column]
    ## Feature engineering if any, e.g
    ## from sklearn.preprocessing import MinMaxScaler
    ## scaler = MinMaxScaler(feature_range = (0,1))
    ## scaler.fit(X)
    
    ## Split dataset into training, validation and testing sets
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=101)
    X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.2, random_state=101)
    
    training_dataset = pd.concat([X_train,Y_train], axis = 1)
    validation_dataset = pd.concat([X_val,Y_val], axis = 1)
    test_dataset = pd.concat([X_test,Y_test], axis = 1)

    ## Stage training, validation and testing datasets to GCS
    training_dataset.to_csv(staged_training_dataset, index = False)
    validation_dataset.to_csv(staged_validation_dataset,index = False)
    test_dataset.to_csv(staged_test_dataset, index = False)

In [85]:
PIPELINE_PARAMETERS = {
    "in_bq_projectid":  BIGQUERY_PROJECT_ID, 
    "in_bq_dataset":    BIGQUERY_DATASET,
    "in_bq_table":      BIGQUERY_TABLE,
    "in_pipeline_root": PIPELINE_ROOT,    ### THIS IS NEW HERE
    "in_app_prefix":    PREFIX            ### THIS IS NEW HERE
}

LABELS = {}
ENABLE_CACHING=True

dag_json_filename = "dag_ccc_ex3b_mlops.json"

In [86]:
from kfp.v2.dsl import component, pipeline
from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output, OutputPath)

In [87]:
@pipeline(name="wf-kubeflow-mlops")  ##WE CHANGE THE NAME
def pipeline(
    in_bq_projectid: str = 'defaultprojectid',
    in_bq_dataset: str = 'xxxx',
    in_bq_table: str = 'yyyy',
    in_pipeline_root: str = '',    ### NEW PIPELINE PARAMETERS
    in_app_prefix: str = 'demo'    ### NEW PIPELINE PARAMETERS
):
    stagingTask = stage(bq_projectid = in_bq_projectid,
                                   bq_dataset   = in_bq_dataset, 
                                   bq_table     = in_bq_table)
    
    _user_id_column="synerise_client_id"
    _target_column="y_if_trans"
    _weight_column="weight"
    _excluded_columns = [_user_id_column, _weight_column]
    
    preprocessTask = preprocess(gcs_pipeline_root = in_pipeline_root,
                                app_prefix = in_app_prefix, 
                                user_id_column = _user_id_column,
                                target_column=_target_column,
                                wight_column=_weight_column,
                                excluded_columns = _excluded_columns,
                                staged_bq_table = stagingTask.outputs["output_dataset"]
                               )

In [48]:
from kfp.v2 import compiler
compiler.Compiler().compile(
   pipeline_func=pipeline,
   package_path=dag_json_filename
)



In [49]:
from google.cloud import aiplatform

job = aiplatform.PipelineJob(display_name = "kfp_pipeline_mlops",
                             template_path = dag_yaml_filename,
                             ##pipeline_root = PIPELINE_ROOT,
                             parameter_values = PIPELINE_PARAMETERS, ## Make sure PIPELINE_PARAMETERS collection does not include parameters that are unknown to pipeline
                             enable_caching = ENABLE_CACHING,
                             labels = LABELS,
                             project = VERTEXAI_PROJECT_ID,
                             location = VERTEXAI_REGION)

job.run(service_account="339239659794-compute@developer.gserviceaccount.com")

Creating PipelineJob
PipelineJob created. Resource name: projects/339239659794/locations/us-central1/pipelineJobs/wf-kubeflow-mlops-20230221090954
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/339239659794/locations/us-central1/pipelineJobs/wf-kubeflow-mlops-20230221090954')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/wf-kubeflow-mlops-20230221090954?project=339239659794
PipelineJob projects/339239659794/locations/us-central1/pipelineJobs/wf-kubeflow-mlops-20230221090954 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/339239659794/locations/us-central1/pipelineJobs/wf-kubeflow-mlops-20230221090954 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/339239659794/locations/us-central1/pipelineJobs/wf-kubeflow-mlops-20230221090954 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/339239659794/locations/us-central1/pip

### It goes like that .... 
So lets add stapes for training, decision gate and deployment

In [193]:
@component(
  packages_to_install=["pandas","fsspec","gcsfs","scikit-learn", "google-cloud-aiplatform", "keras_tuner"],
    base_image = "us-docker.pkg.dev/vertex-ai/training/tf-cpu.2-8:latest"
)
def train(staged_training_dataset: InputPath('staged_training_dataset'), 
          staged_validation_dataset: InputPath('staged_validation_dataset'), 
          staged_test_dataset: InputPath('staged_test_dataset'),
          vertexai_experiment_name:str, 
          vertexai_region: str, 
          vertexai_projectid: str,
          gcs_pipeline_root: str,
          app_prefix: str, 
          user_id_column: str,
          target_column: str,
          weight_column: str,
          excluded_columns: list,
          output_model: Output[Model]
         ):
    
     import tensorflow as tf
     import keras_tuner
     from google.cloud import aiplatform
     from datetime import datetime
     import pandas as pd
     
    
     _METRICS = [
      tf.keras.metrics.TruePositives(name='tp'),
      tf.keras.metrics.FalsePositives(name='fp'),
      tf.keras.metrics.TrueNegatives(name='tn'),
      tf.keras.metrics.FalseNegatives(name='fn'), 
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc'),
      tf.keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
     ]
    
     ## function to build model
     def build_model(hptune):
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Dense(units=32, activation = "relu"))
        model.add(
           tf.keras.layers.Dense(
              # Define the hyperparameter
              units=32, ##hptune.Int("units", min_value=32, max_value=96, step=32),
              activation="relu" ##hptune.Choice("activation",["relu","tanh"]),
                )
        )
        if hptune.Boolean("dropout"):
           model.add(tf.keras.layers.Dropout(rate=0.25))
    
        model.add(tf.keras.layers.Dense(1, activation="sigmoid"))
        learning_rate = 1e-4 ##hptune.Float("lr",min_value = 1e-4, max_value=1e-2, sampling="log")

        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
            loss=tf.keras.losses.BinaryCrossentropy(), 
            metrics=_METRICS,
        )
        return model
    
     training_dataset = pd.read_csv(staged_training_dataset)
     validation_dataset = pd.read_csv(staged_validation_dataset)
     test_dataset = pd.read_csv(staged_test_dataset)
        
        
     feature_columns = [column for column in training_dataset.columns if  column != target_column]
     target_columns = [target_column]

     x_train = training_dataset[feature_columns]
     y_train = training_dataset[target_columns]
        
     x_val = validation_dataset[feature_columns]
     y_val = validation_dataset[target_columns]

     x_test = test_dataset[feature_columns]
     y_test = test_dataset[target_columns]
        
     trials_dir=f"{app_prefix}_trials"
     ##Create a Keras Hyperband Hyperparameter tuner with an accuracy objective
     tuner =  keras_tuner.Hyperband(
       hypermodel=build_model,
       objective=keras_tuner.Objective("precision", direction="max"),
       max_epochs=2,
       factor=3,
       hyperband_iterations=1,
       seed=None,
       hyperparameters=None,
       tune_new_entries=True,
       allow_new_entries=True,
       directory=trials_dir
     )
    
     weight_for_0 = 0.5 ##(1 / neg) * (total / 2.0)
     weight_for_1 = 20  ##(1 / pos) * (total / 2.0)

     class_weights = {0: weight_for_0, 1: weight_for_1}
     stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
     output = tuner.search(x_train, y_train, epochs=2, validation_data=(x_val, y_val) , callbacks=[stop_early], class_weight=class_weights)
        
     # Get the optimal hyperparameters for the model as determined from the search
     best_hyperparameters=tuner.get_best_hyperparameters()[0]
     hypermodel = tuner.hypermodel.build(best_hyperparameters)
     history = hypermodel.fit(x_train, y_train, epochs=2, validation_data=(x_val, y_val))
        
     results = hypermodel.evaluate(x_test, y_test)
    
     #### SAVE MODEL
     print(output_model.path)
     model_path = output_model.path
     hypermodel.save(model_path)
        
        
     aiplatform.init(
       project=vertexai_projectid,
       location=vertexai_region,
       experiment=vertexai_experiment_name
     )
    
     run_id = f"run-{datetime.now().strftime('%Y%m%d%H%M%S')}"
     run = aiplatform.start_run(run_id)
    
     training_params = {
        'training_dataset': staged_training_dataset,
        'validation_dataset': staged_validation_dataset,
        'test_dataset': staged_test_dataset,
        'model_type': 'nn',
        'model_path': model_path,
        'trainedby': app_prefix, 
        ##'hp_units': best_hyperparameters.get('units'),
        ##'hp_activation': best_hyperparameters.get('activation'),
        'hp_dropout': best_hyperparameters.get('dropout'),
        ##'hp_lr': best_hyperparameters.get('lr'),
     }
    
     training_metrics = {
        'model_loss': results[0],
        'model_accuracy': results[5],
        'model_precision': results[6],
        'model_recall': results[7],
        'model_auc': results[8],
        'model_prc': results[9],
        'model_tp': results[1],
        'model_fp': results[2],
        'model_tn': results[3],
        'model_fn': results[4]
     }
    
     run.log_params(training_params)
     run.log_metrics(training_metrics)


     classification_metrics = run.log_classification_metrics(
       display_name='classification metrics',
       labels=['Positive', 'Negative'],
       matrix=[[results[1], results[2]], [results[4], results[3]]],
       fpr=[],
       tpr=[],
       threshold=[],
     )

     run.end_run()

### Upload model to Vertex AI Model registry and deploy endpoint

In [215]:
@component(
  packages_to_install=["pandas","fsspec","gcsfs","scikit-learn", "google-cloud-aiplatform"]
)
def simpledeploy(hmodel: Input[Model],
          vertexai_experiment_name:str, 
          vertexai_region: str, 
          vertexai_projectid: str,
          app_prefix: str,
          serving_machine_type: str,
          serving_min_replica: int,
          serving_max_replica: int           
         ):
    
     from google.cloud import aiplatform 
        
     aiplatform.init(
       project=vertexai_projectid,
       location=vertexai_region
     )
     
     ## auxiliary variables
     model_name = f"{app_prefix}_model_exc3b_mlops"
     endpoint_name = f"{app_prefix}_endpoint_exc3b_mlops"
     model_path = hmodel.path
    
     ##check if model is already registered in Vertex AI Model Registry
     model_filter_str='labels.experiment_name="'+vertexai_experiment_name+'"'
     print("Model filter string: "+model_filter_str)
    
     models = aiplatform.Model.list(
        filter=model_filter_str
     )
    
     model_labels = {
          "experiment_name": vertexai_experiment_name
     }
        
     if len(models)>0:
        model_exists = True
        model = models[0]
    
        vertexai_model = aiplatform.Model.upload_tensorflow_saved_model(
          display_name = model_name,
          parent_model = model.resource_name,
          saved_model_dir = model_path,
          labels = model_labels,
          is_default_version = True
        )
     else: 
        vertexai_model = aiplatform.Model.upload_tensorflow_saved_model(
          display_name = model_name,
          saved_model_dir = model_path,
          labels = model_labels,
          is_default_version = True
        )
        
        
     ##same story for endpoint - check if exists - if not create it and then deploy new model to it. 
     endpoint_filter_str='labels.experiment_name="'+vertexai_experiment_name+'"'
     endpoints = aiplatform.Endpoint.list(
       filter=endpoint_filter_str,
     )
    
     endpoint_labels = {
         "experiment_name": vertexai_experiment_name
     }
        
     if len(endpoints)>0:
       endpoint = endpoints[0]
       deployed_models = endpoint.list_models()
       for dmodel in deployed_models:
           print(dmodel.display_name)
     else: 
        endpoint = aiplatform.Endpoint.create(
          display_name = endpoint_name,
          labels = endpoint_labels   
        )
    
     #### Deploy model to endpoint
     endpoint.deploy(
       model = vertexai_model,
       traffic_percentage = 100,
       machine_type=serving_machine_type,
       min_replica_count=serving_min_replica,
       max_replica_count=serving_max_replica
     )

In [216]:
PIPELINE_PARAMETERS = {
    "in_bq_projectid":  BIGQUERY_PROJECT_ID, 
    "in_bq_dataset":    BIGQUERY_DATASET,
    "in_bq_table":      BIGQUERY_TABLE,
    "in_pipeline_root": PIPELINE_ROOT,    ### THIS IS NEW HERE
    "in_app_prefix":    PREFIX,           ### THIS IS NEW HERE
    "in_vertexai_experiment_name": f"{PREFIX}-experiments", ### NEW PIPELINE PARAMETERS
    "in_vertexai_region":    VERTEXAI_REGION,          ### NEW PIPELINE PARAMETERS
    "in_vertexai_projectid": VERTEXAI_PROJECT_ID,       ### NEW PIPELINE PARAMETERS
    "in_serving_machine_type": "n1-standard-4",     ### NEW PIPELINE PARAMETERS
    "in_serving_min_replica": 1,      ### NEW PIPELINE PARAMETERS
    "in_serving_max_replica": 2      ### NEW PIPELINE PARAMETERS
}

LABELS = {}
ENABLE_CACHING=True

dag_json_filename = "dag_ccc_ex3b_e2e_mlops.json"

In [217]:
PIPELINE_PARAMETERS

{'in_bq_projectid': 'datafusionsbox',
 'in_bq_dataset': 'dataset4ccc',
 'in_bq_table': 'df_for_model_ccc_with_weights',
 'in_pipeline_root': 'gs://gcp-demo-ccc-vertexai/pipeline_root',
 'in_app_prefix': 'ccc',
 'in_vertexai_experiment_name': 'ccc-experiments',
 'in_vertexai_region': 'us-central1',
 'in_vertexai_projectid': 'datafusionsbox',
 'in_serving_machine_type': 'n1-standard-4',
 'in_serving_min_replica': 1,
 'in_serving_max_replica': 2}

In [218]:
from kfp.v2.dsl import component, pipeline
from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output, OutputPath)

In [219]:
@pipeline(name="wf-kubeflow-e2e-mlops")  ##WE CHANGE THE NAME e2e
def pipeline(
    in_bq_projectid: str,
    in_bq_dataset: str,
    in_bq_table: str,
    in_pipeline_root: str,    
    in_app_prefix: str,
    in_vertexai_experiment_name: str, ### NEW PIPELINE PARAMETERS    This we need because our tasks will use Vertex AI SDK to log experiments
    in_vertexai_region: str,          ### NEW PIPELINE PARAMETERS
    in_vertexai_projectid: str,       ### NEW PIPELINE PARAMETERS
    in_serving_machine_type: str,     ### NEW PIPELINE PARAMETERS    This we need because we need to let Vertex AI know how much compute we want to allocate for serving
    in_serving_min_replica: int,      ### NEW PIPELINE PARAMETERS
    in_serving_max_replica: int       ### NEW PIPELINE PARAMETERS
):
    stagingTask = stage(bq_projectid = in_bq_projectid,
                                   bq_dataset   = in_bq_dataset, 
                                   bq_table     = in_bq_table)
    
    _user_id_column="synerise_client_id"
    _target_column="y_if_trans"
    _weight_column="weight"
    _excluded_columns = [_user_id_column, _weight_column]
    
    preprocessTask = preprocess(gcs_pipeline_root = in_pipeline_root,
                                app_prefix = in_app_prefix, 
                                user_id_column = _user_id_column,
                                target_column=_target_column,
                                weight_column=_weight_column,
                                excluded_columns = _excluded_columns,
                                staged_bq_table = stagingTask.outputs["output_dataset"]
                               )
    #### new comes here ---->
    """
          staged_training_dataset: InputPath('staged_training_dataset'), 
          staged_validation_dataset: InputPath('staged_validation_dataset'), 
          staged_test_dataset: InputPath('staged_test_dataset'),
          vertexai_experiment_name:str, 
          vertexai_region: str, 
          vertexai_projectid: str,
          gcs_pipeline_root: str,
          app_prefix: str, 
          user_id_column: str,
          target_column: str,
          weight_column: str,
          excluded_columns: [],
          output_model: Output[Model]
    """
    
    trainTask = train(staged_training_dataset = preprocessTask.outputs["staged_training_dataset"],
                      staged_validation_dataset = preprocessTask.outputs["staged_validation_dataset"],
                      staged_test_dataset = preprocessTask.outputs["staged_test_dataset"],
                      vertexai_experiment_name = in_vertexai_experiment_name,
                      vertexai_region = in_vertexai_region,
                      vertexai_projectid = in_vertexai_projectid,
                      gcs_pipeline_root = in_pipeline_root,
                      app_prefix = in_app_prefix,
                      user_id_column = _user_id_column,
                      target_column=_target_column,
                      weight_column=_weight_column,
                      excluded_columns = _excluded_columns
                     )
    """
          hmodel: Input[Model],
          vertexai_experiment_name:str, 
          vertexai_region: str, 
          vertexai_projectid: str,
          app_prefix: str,
          serving_machine_type: str,
          serving_min_replica: int,
          serving_max_replica: int           
         
    """
    deployTask = simpledeploy(trainTask.outputs['output_model'],
                             vertexai_experiment_name = in_vertexai_experiment_name,
                             vertexai_region = in_vertexai_region,
                             vertexai_projectid = in_vertexai_projectid,
                             app_prefix = in_app_prefix,
                             serving_machine_type = in_serving_machine_type,
                             serving_min_replica = in_serving_min_replica,
                             serving_max_replica = in_serving_max_replica
                             )

In [220]:
from kfp.v2 import compiler
compiler.Compiler().compile(
   pipeline_func=pipeline,
   package_path=dag_json_filename
)

In [221]:
from google.cloud import aiplatform

job = aiplatform.PipelineJob(display_name = "kfp_pipeline_e2e_mlops",
                             template_path = dag_json_filename,
                             ##pipeline_root = PIPELINE_ROOT,
                             parameter_values = PIPELINE_PARAMETERS, ## Make sure PIPELINE_PARAMETERS collection does not include parameters that are unknown to pipeline
                             enable_caching = ENABLE_CACHING,
                             labels = LABELS,
                             project = VERTEXAI_PROJECT_ID,
                             location = VERTEXAI_REGION)

job.run(service_account="339239659794-compute@developer.gserviceaccount.com")

Creating PipelineJob
PipelineJob created. Resource name: projects/339239659794/locations/us-central1/pipelineJobs/wf-kubeflow-e2e-mlops-20230221181847
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/339239659794/locations/us-central1/pipelineJobs/wf-kubeflow-e2e-mlops-20230221181847')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/wf-kubeflow-e2e-mlops-20230221181847?project=339239659794
PipelineJob projects/339239659794/locations/us-central1/pipelineJobs/wf-kubeflow-e2e-mlops-20230221181847 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/339239659794/locations/us-central1/pipelineJobs/wf-kubeflow-e2e-mlops-20230221181847 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/339239659794/locations/us-central1/pipelineJobs/wf-kubeflow-e2e-mlops-20230221181847 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/339239659794/l

In [None]:
my_run = aiplatform.ExperimentRun('my-run', experiment='my-experiment')
my_job = aiplatform.PipelineJob(...)
my_job.submit()
my_run.log(my_job)