In [None]:
import os
from pathlib import Path
import requests
from datetime import datetime

import kfp

from kfp.v2.dsl import pipeline
from kfp.v2 import compiler

import google.cloud.aiplatform as aiplatform


In [None]:
# Cache pipeline results to avoid compute the same components for different runs
ENABLE_CACHING = True

PIPELINE_NAME = "my-kfp-on-gcp-demo2-container-based"
# Your Kubeflow's detail
TEMPLATE_PATH = "ml_pipeline_2.json"
# GCS Bucket to store artefacts
PIPELINE_ROOT = f"gs://kfp-demo-bucket-{PROJECT_ID}"

# Run parameters
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
JOBID = f"training-pipeline-{TIMESTAMP}"

PIPELINE_PARAMS = {
    "project_id": PROJECT_ID,
    "original_bucket_id": CSV_FILE_BUCKET_NAME,
    "target_train_filename": "train.csv",
    "target_test_filename": "test.csv",
}

In [None]:
#Load the component
preprocess_op = kfp.components.load_component_from_file('preprocess/component.yaml')

In [None]:
# Define a pipeline and create a task from a component:
@pipeline(name=PIPELINE_NAME, pipeline_root=PIPELINE_ROOT)
def tmp_pipe(
    project_id: str,
    original_bucket_id: str,
    target_train_filename: str,
    target_test_filename: str,
):
    train_preprocess = preprocess_op(
        project_id=project_id,
        original_bucket_id=original_bucket_id,
        target_filename=target_train_filename,
    ).set_display_name("Preprocess train data")

    test_preprocess = preprocess_op(
        project_id=project_id,
        original_bucket_id=original_bucket_id,
        target_filename=target_test_filename,
    ).set_display_name("Preprocess test data")

In [None]:
compiler.Compiler().compile(
    pipeline_func=tmp_pipe, package_path=TEMPLATE_PATH
)

In [None]:
aiplatform.init(project=PROJECT_ID, staging_bucket=PIPELINE_ROOT)

In [None]:
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
JOBID = f"training-pipeline-{TIMESTAMP}"

# Pipeline job
pipeline_ = aiplatform.pipeline_jobs.PipelineJob(
    enable_caching=ENABLE_CACHING,
    display_name=PIPELINE_NAME,
    template_path=TEMPLATE_PATH,
    job_id=JOBID,
    parameter_values=PIPELINE_PARAMS,
)
# Submit
pipeline_.submit(service_account=SERVICE_ACCOUNT)