In [None]:
# import required libraries
import kfp
from kfp import dsl
from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output, HTML, OutputPath, ClassificationMetrics,
                        Metrics, component)
from kfp.v2 import compiler
from datetime import date
from dateutil.relativedelta import relativedelta
import components

from google.cloud.aiplatform import pipeline_jobs
from google_cloud_pipeline_components.v1.batch_predict_job import \
    ModelBatchPredictOp as batch_prediction_op
from google.cloud import storage

from pathlib import Path
import datetime
from datetime import timedelta, date
(datetime.datetime.now()-timedelta(hours=7)).strftime('%Y_%m_%d')

SERVICE_TYPE = 'whsia-churn-v2'
DATASET_ID = 'whsia_churn_dataset_v2'
TABLE_ID = 'bq_whsia_churn_score_v2'
FOLDER_NAME = 'whsia_churn_deploy'
QUERIES_PATH = FOLDER_NAME + '/queries/'

PROJECT_ID = f"{PROJECT_ID}"
RESOURCE_BUCKET = f"{PROJECT_ID}_bkt_whsia_churn_v2"
FILE_BUCKET = f"{PROJECT_ID}_bkt_whsia_churn_v2"
REGION = "northamerica-northeast1"

STACK_NAME = 'whsia_churn_v2'
SERVING_PIPELINE_NAME_PATH = 'serving_pipeline'
UPLOAD_MODEL_PIPELINE_NAME_PATH = 'upload_model_pipeline'
PIPELINE_NAME = 'whsia-churn-v2-serving-pipeline' # Same name as pulumi.yaml
PIPELINE_DESCRIPTION = 'whsia-churn-v2-serving-pipeline'
REGION = "northamerica-northeast1"

UTILS_FILE_PATH = f"{STACK_NAME}/{SERVING_PIPELINE_NAME_PATH}/utils" # Path will be github path
UTILS_FILENAME = 'bq_utils.py'

PROCESSED_SERVING_DATA_TABLENAME = 'processed_serving_data'
INPUT_SERVING_DATA_TABLENAME = 'input_serving_data'

QUERY_DATE = (date.today() - relativedelta(days=1)).strftime('%Y-%m-%d')
wHSIA_QUERY_VIEW_NAME = 'whsia_query_path_view'
wHSIA_QUERY_PATH = QUERIES_PATH + 'whsia_test_qry.txt'
TARGET_TABLE_REF = '{}.{}.{}'.format(PROJECT_ID, DATASET_ID, TABLE_ID)

# download required component files to local
prefix = f'{STACK_NAME}/{SERVING_PIPELINE_NAME_PATH}/components/'
dl_dir = 'components/'

storage_client = storage.Client()
bucket = storage_client.bucket(RESOURCES_BUCKET)
blobs = bucket.list_blobs(prefix=prefix)  # Get list of files
for blob in blobs: # download each file that starts with "prefix" into "dl_dir"
    if blob.name.endswith("/"):
        continue
    print(blob.name)
    file_split = blob.name.split(prefix)
    file_path = f"{dl_dir}{file_split[-1]}"
    directory = "/".join(file_path.split("/")[0:-1])
    Path(directory).mkdir(parents=True, exist_ok=True)
    blob.download_to_filename(file_path) 
    print(file_path)

# import main pipeline components
import components

@dsl.pipeline(
    # A name for the pipeline.
    name="whsia-churn-v2-base-table",
    description='pipeline for whsia churn - v2'
)
def pipeline(
        project_id: str = PROJECT_ID,
        region: str = REGION,
        resource_bucket: str = RESOURCE_BUCKET
):

    # -------------  create ops ---------------
    create_wHSIA_view_op = create_wHSIA_view(
        view_name=wHSIA_QUERY_VIEW_NAME,
        query_date=QUERY_DATE,
        project_id=PROJECT_ID,
        dataset_id=DATASET_ID,
        region=REGION,
        resource_bucket=RESOURCE_BUCKET,
        query_path=wHSIA_QUERY_PATH
    )        
    create_wHSIA_view_op.set_memory_limit('16G')
    create_wHSIA_view_op.set_cpu_limit('4')

    wHSIA_processing_op = wHSIA_processing(
        view_name=wHSIA_QUERY_VIEW_NAME,
        project_id=PROJECT_ID,
        dataset_id=DATASET_ID,
        table_id=TABLE_ID,
        query_date=QUERY_DATE,
        file_bucket=FILE_BUCKET
    )
    wHSIA_processing_op.set_memory_limit('32G')
    wHSIA_processing_op.set_cpu_limit('4')

    wHSIA_processing_op.after(create_wHSIA_view_op)


In [None]:
from kfp.v2 import compiler
from google.cloud.aiplatform import pipeline_jobs

import json

compiler.Compiler().compile(
   pipeline_func=pipeline, package_path="pipeline.json"
)

job = pipeline_jobs.PipelineJob(
                               display_name=PIPELINE_NAME,
                               template_path="pipeline.json",
                               location=REGION,
                               enable_caching=False,
                               pipeline_root = f"gs://{RESOURCE_BUCKET}"
)
job.run(
   service_account = f"bilayer-sa@{PROJECT_ID}.iam.gserviceaccount.com"
)