In [1]:
!pip3 install --upgrade  google-cloud-aiplatform \
                                 google-cloud-storage \
                                 kfp \
                                 google-cloud-pipeline-components



In [2]:
import random
import string


# Generate a uuid of length 8
def generate_uuid():
    return "".join(random.choices(string.ascii_lowercase + string.digits, k=8))


UUID = generate_uuid()

In [3]:
!gcloud config list

[compute]
region = us-central1
[core]
account = 617832854196-compute@developer.gserviceaccount.com
disable_usage_reporting = True
project = end2end-416809
[dataproc]
region = us-central1

Your active configuration is: [default]


In [4]:
PROJECT_ID = "end2end-416809"  # @param {type:"string"}

# Set the project id
! gcloud config set project {PROJECT_ID}

Updated property [core/project].


In [5]:
REGION = "us-central1"  # @param {type: "string"}

In [6]:
SERVICE_ACCOUNT = "617832854196-compute@developer.gserviceaccount.com"  # @param {type:"string"}

In [7]:
import google.cloud.aiplatform as aip
import kfp

In [9]:
BUCKET_URI = f"gs://kfp-bucket-{PROJECT_ID}-unique"  # @param {type:"string"}

In [10]:
PIPELINE_ROOT = f"{BUCKET_URI}/pipeline_root/churn-model"

In [11]:
# Initializing AI platform

aip.init(project=PROJECT_ID, staging_bucket=BUCKET_URI)

In [12]:
TRAIN_FILE_NAME = "Churn_Modelling.csv"
! gsutil cp gs://churn-data-kfp-bucket/Churn_Modelling.csv {PIPELINE_ROOT}/data/

gcs_csv_path = f"{PIPELINE_ROOT}/data/{TRAIN_FILE_NAME}"

Copying gs://churn-data-kfp-bucket/Churn_Modelling.csv [Content-Type=text/csv]...
/ [1 files][668.8 KiB/668.8 KiB]                                                
Operation completed over 1 objects/668.8 KiB.                                    


In [13]:
@kfp.dsl.pipeline(name="automl-tab-training-v2")
def pipeline(project: str = PROJECT_ID, region: str = REGION):
    from google_cloud_pipeline_components.v1.automl.training_job import AutoMLTabularTrainingJobRunOp
    from google_cloud_pipeline_components.v1.dataset import TabularDatasetCreateOp
    from google_cloud_pipeline_components.v1.endpoint import EndpointCreateOp, ModelDeployOp

    dataset_create_op = TabularDatasetCreateOp(
        project=project, display_name="churn", gcs_source=gcs_csv_path
    )

    training_op = AutoMLTabularTrainingJobRunOp(
        project=project,
        display_name="train-automl-churn_data",
        optimization_prediction_type="classification",
        optimization_objective="maximize-au-roc",
        column_transformations=[
            {"numeric": {"column_name": "CustomerId"}},
            {"numeric": {"column_name": "CreditScore"}},
            {"numeric": {"column_name": "Age"}},
            {"numeric": {"column_name": "Tenure"}},
            {"numeric": {"column_name": "Balance"}},
            {"numeric": {"column_name": "NumOfProducts"}},
            {"numeric": {"column_name": "HasCrCard"}},
            {"numeric": {"column_name": "IsActiveMember"}},
            {"numeric": {"column_name": "EstimatedSalary"}},
            {"categorical": {"column_name": "Geography"}}, 
            {"categorical": {"column_name": "Gender"}}
        ],
        dataset=dataset_create_op.outputs["dataset"],
        target_column="Exited",
    )

    endpoint_op = EndpointCreateOp(
        project=project,
        location=region,
        display_name="train-automl-churn_data_endpoint",
    )

    _ = ModelDeployOp(
        model=training_op.outputs["model"],
        endpoint=endpoint_op.outputs["endpoint"],
        dedicated_resources_machine_type="n1-standard-4",
        dedicated_resources_min_replica_count=1,
        dedicated_resources_max_replica_count=1,
    )

In [15]:
from kfp import compiler 

compiler.Compiler().compile(
    pipeline_func=pipeline,
    package_path="tabular_classification_pipeline.yaml"
)

In [16]:
DISPLAY_NAME = "churn_model_" + UUID

job = aip.PipelineJob(
    display_name=DISPLAY_NAME,
    template_path="tabular_regression_pipeline.yaml",
    pipeline_root=PIPELINE_ROOT,
    enable_caching=False,
)

job.run()

# ! rm tabular_regression_pipeline.yaml

Creating PipelineJob
PipelineJob created. Resource name: projects/617832854196/locations/us-central1/pipelineJobs/automl-tab-training-v2-20240314131049
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/617832854196/locations/us-central1/pipelineJobs/automl-tab-training-v2-20240314131049')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/automl-tab-training-v2-20240314131049?project=617832854196
PipelineJob projects/617832854196/locations/us-central1/pipelineJobs/automl-tab-training-v2-20240314131049 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/617832854196/locations/us-central1/pipelineJobs/automl-tab-training-v2-20240314131049 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/617832854196/locations/us-central1/pipelineJobs/automl-tab-training-v2-20240314131049 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/61783285