### Import Libraries

In [None]:
# import required libraries
from kfp import dsl
from kfp.v2 import compiler
from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output, OutputPath, ClassificationMetrics,
                        Metrics, component)
import os
import re
from pathlib import Path

from datetime import date
from datetime import timedelta
from dateutil.relativedelta import relativedelta

import google
from google.oauth2 import credentials
from google.oauth2 import service_account
from google.oauth2.service_account import Credentials
from google.cloud import storage
from google.cloud.aiplatform import pipeline_jobs
from google_cloud_pipeline_components.v1.batch_predict_job import \
    ModelBatchPredictOp as batch_prediction_op


### Parameters

In [None]:
#tag cell with parameters
PROJECT_ID =  ''
BUCKET_NAME=''
DATASET_ID = ''
RESOURCES_BUCKET = ''
FILE_BUCKET = ''
REGION = ''
MODEL_ID = '5090'

In [None]:
#tag cell with parameters
PROJECT_ID =  'divg-josh-pr-d1cc3a'
BUCKET_NAME='divg-josh-pr-d1cc3a-default'
DATASET_ID = 'call_to_retention_dataset'
RESOURCES_BUCKET = 'divg-josh-pr-d1cc3a-default'
FILE_BUCKET = 'divg-josh-pr-d1cc3a-default'
MODEL_ID = '5090'

### Service Parameters

In [None]:
SERVICE_TYPE = 'call-to-retention'
TABLE_ID = 'bq_call_to_retention_targets'
REGION = "northamerica-northeast1"

### Pulumi Parameters

In [None]:
STACK_NAME = 'call_to_retention'
TRAIN_PIPELINE_NAME_PATH = 'train_pipeline'
PREDICT_PIPELINE_NAME_PATH = 'predict_pipeline'
TRAIN_PIPELINE_NAME = 'call-to-retention-train-pipeline' # Same name as pulumi.yaml
PREDICT_PIPELINE_NAME = 'call-to-retention-predict-pipeline' # Same name as pulumi.yaml
TRAIN_PIPELINE_DESCRIPTION = 'call-to-retention-train-pipeline'
PREDICT_PIPELINE_DESCRIPTION = 'call-to-retention-predict-pipeline'
REGION = "northamerica-northeast1"

### Query + Pre-Processing Component Parameters

In [None]:
TRAIN_QUERIES_PATH = f"{STACK_NAME}/{TRAIN_PIPELINE_NAME_PATH}/queries/" 
TRAIN_UTILS_FILE_PATH = f"{STACK_NAME}/{TRAIN_PIPELINE_NAME_PATH}/utils" 
UTILS_FILENAME = 'utils.py'

PROCESSED_SERVING_DATA_TABLENAME = 'processed_serving_data'
INPUT_SERVING_DATA_TABLENAME = 'input_serving_data'

QUERY_DATE = (date.today() - relativedelta(days=1)).strftime('%Y-%m-%d')
TARGET_TABLE_REF = '{}.{}.{}'.format(PROJECT_ID, DATASET_ID, TABLE_ID)

FOLDER_NAME = 'xgb_tos_cross_sell_{}_train_deploy'.format(MODEL_ID)
QUERIES_PATH = 'vertex_pipelines/' + FOLDER_NAME + '/queries/'

ACCOUNT_PROMO_EXPIRY_LIST_QUERY_PATH = QUERIES_PATH + 'create_input_account_promo_expiry_list_query.txt'
ACCOUNT_CONSL_QUERY_PATH = QUERIES_PATH + 'create_input_account_consl_query.txt'
ACCOUNT_FFH_BILLING_QUERY_PATH = QUERIES_PATH + 'create_input_account_ffh_billing_query.txt'
ACCOUNT_FFH_DISCOUNTS_QUERY_PATH = QUERIES_PATH + 'create_input_account_ffh_discounts_query.txt'
ACCOUNT_HS_USAGE_QUERY_PATH = QUERIES_PATH + 'create_input_account_hs_usage_query.txt'
ACCOUNT_DEMO_INCOME_QUERY_PATH = QUERIES_PATH + 'create_input_account_demo_income_query.txt'
ACCOUNT_GPON_COPPER_QUERY_PATH = QUERIES_PATH + 'create_input_account_gpon_copper_query.txt'
ACCOUNT_PRICE_PLAN_QUERY_PATH = QUERIES_PATH + 'create_input_account_price_plan_query.txt'
ACCOUNT_CLCKSTRM_TELUS_QUERY_PATH = QUERIES_PATH + 'create_input_account_clckstrm_telus_query.txt'
ACCOUNT_CALL_HISTORY_QUERY_PATH = QUERIES_PATH + 'create_input_account_call_history_query.txt'


### Import Pipeline Components

In [None]:
# download required component files to local
prefix = f'{STACK_NAME}/{TRAIN_PIPELINE_NAME_PATH}/components/'
dl_dir = 'components/'

storage_client = storage.Client()
bucket = storage_client.bucket(RESOURCES_BUCKET)
blobs = bucket.list_blobs(prefix=prefix)  # Get list of files
for blob in blobs: # download each file that starts with "prefix" into "dl_dir"
    if blob.name.endswith("/"):
        continue
    file_split = blob.name.split(prefix)
    file_path = f"{dl_dir}{file_split[-1]}"
    directory = "/".join(file_path.split("/")[0:-1])
    Path(directory).mkdir(parents=True, exist_ok=True)
    blob.download_to_filename(file_path) 

# import main pipeline components
import components


### Date Parameters

In [None]:
scoringDate = date(2022, 6, 1)  # date.today() - relativedelta(days=2)- relativedelta(months=30)
valScoringDate = date(2022, 7, 1)  # scoringDate - relativedelta(days=2)

# training views
PROMO_EXPIRY_LIST_VIEW_NAME = '{}_pipeline_promo_expiry_list_data_training_bi_layer'.format(SERVICE_TYPE)  
CONSL_VIEW_NAME = '{}_pipeline_consl_data_training_bi_layer'.format(SERVICE_TYPE)  
FFH_BILLING_VIEW_NAME = '{}_pipeline_ffh_billing_data_training_bi_layer'.format(SERVICE_TYPE)  
FFH_DISCOUNTS_VIEW_NAME = '{}_pipeline_ffh_discounts_data_training_bi_layer'.format(SERVICE_TYPE)  
HS_USAGE_VIEW_NAME = '{}_pipeline_hs_usage_data_training_bi_layer'.format(SERVICE_TYPE)  
DEMO_INCOME_VIEW_NAME = '{}_pipeline_demo_income_data_training_bi_layer'.format(SERVICE_TYPE)  
GPON_COPPER_VIEW_NAME = '{}_pipeline_gpon_copper_data_training_bi_layer'.format(SERVICE_TYPE)  
PRICE_PLAN_VIEW_NAME = '{}_pipeline_price_plan_data_training_bi_layer'.format(SERVICE_TYPE)  
CLCKSTRM_TELUS_VIEW_NAME = '{}_pipeline_clckstrm_telus_training_bi_layer'.format(SERVICE_TYPE)
CALL_HISTORY_VIEW_NAME = '{}_pipeline_call_history_data_training_bi_layer'.format(SERVICE_TYPE)  

# validation views
PROMO_EXPIRY_LIST_VIEW_VALIDATION_NAME = '{}_pipeline_promo_expiry_list_data_validation_bi_layer'.format(SERVICE_TYPE)  
CONSL_VIEW_VALIDATION_NAME = '{}_pipeline_consl_data_validation_bi_layer'.format(SERVICE_TYPE)  
FFH_BILLING_VIEW_VALIDATION_NAME = '{}_pipeline_ffh_billing_data_validation_bi_layer'.format(SERVICE_TYPE)  
FFH_DISCOUNTS_VIEW_VALIDATION_NAME = '{}_pipeline_ffh_discounts_data_validation_bi_layer'.format(SERVICE_TYPE)  
HS_USAGE_VIEW_VALIDATION_NAME = '{}_pipeline_hs_usage_data_validation_bi_layer'.format(SERVICE_TYPE)  
DEMO_INCOME_VIEW_VALIDATION_NAME = '{}_pipeline_demo_income_data_validation_bi_layer'.format(SERVICE_TYPE)  
GPON_COPPER_VIEW_VALIDATION_NAME = '{}_pipeline_gpon_copper_data_validation_bi_layer'.format(SERVICE_TYPE)  
PRICE_PLAN_VIEW_VALIDATION_NAME = '{}_pipeline_price_plan_data_validation_bi_layer'.format(SERVICE_TYPE)  
CLCKSTRM_TELUS_VIEW_VALIDATION_NAME = '{}_pipeline_clckstrm_telus_validation_bi_layer'.format(SERVICE_TYPE)
CALL_HISTORY_VIEW_VALIDATION_NAME = '{}_pipeline_call_history_data_validation_bi_layer'.format(SERVICE_TYPE)  

# training dates
SCORE_DATE = scoringDate.strftime('%Y%m%d')  # date.today().strftime('%Y%m%d')
SCORE_DATE_DASH = scoringDate.strftime('%Y-%m-%d')
SCORE_DATE_MINUS_6_MOS_DASH = ((scoringDate - relativedelta(months=6)).replace(day=1)).strftime('%Y-%m-%d')
SCORE_DATE_THIS_MONTH_START_DASH = scoringDate.replace(day=1)
SCORE_DATE_THIS_MONTH_END_DASH = (((scoringDate.replace(day=1)) + relativedelta(months=1)).replace(day=1) - timedelta(days=1)).strftime('%Y-%m-%d')
SCORE_DATE_LAST_MONTH_START_DASH = (scoringDate.replace(day=1) - timedelta(days=1)).replace(day=1).strftime('%Y-%m-%d')
SCORE_DATE_LAST_MONTH_END_DASH = ((scoringDate.replace(day=1)) - timedelta(days=1)).strftime('%Y-%m-%d')
SCORE_DATE_LAST_MONTH_YEAR = ((scoringDate.replace(day=1)) - timedelta(days=1)).year
SCORE_DATE_LAST_MONTH_MONTH = ((scoringDate.replace(day=1)) - timedelta(days=1)).month
PROMO_EXPIRY_START = (scoringDate.replace(day=1) + relativedelta(months=3)).replace(day=1).strftime('%Y-%m-%d')
PROMO_EXPIRY_END = (scoringDate.replace(day=1) + relativedelta(months=4)).replace(day=1).strftime('%Y-%m-%d')

# validation dates
SCORE_DATE_VAL = valScoringDate.strftime('%Y%m%d')
SCORE_DATE_VAL_DASH = valScoringDate.strftime('%Y-%m-%d')
SCORE_DATE_VAL_MINUS_6_MOS_DASH = ((valScoringDate - relativedelta(months=6)).replace(day=1)).strftime('%Y-%m-%d')
SCORE_DATE_VAL_THIS_MONTH_START_DASH = valScoringDate.replace(day=1)
SCORE_DATE_VAL_THIS_MONTH_END_DASH = (((valScoringDate.replace(day=1)) + relativedelta(months=1)).replace(day=1) - timedelta(days=1)).strftime('%Y-%m-%d')
SCORE_DATE_VAL_LAST_MONTH_START_DASH = (valScoringDate.replace(day=1) - timedelta(days=1)).replace(day=1).strftime('%Y-%m-%d')
SCORE_DATE_VAL_LAST_MONTH_END_DASH = ((valScoringDate.replace(day=1)) - timedelta(days=1)).strftime('%Y-%m-%d')
SCORE_DATE_VAL_LAST_MONTH_YEAR = ((valScoringDate.replace(day=1)) - timedelta(days=1)).year
SCORE_DATE_VAL_LAST_MONTH_MONTH = ((valScoringDate.replace(day=1)) - timedelta(days=1)).month
PROMO_EXPIRY_START_VAL = (valScoringDate.replace(day=1) + relativedelta(months=3)).replace(day=1).strftime('%Y-%m-%d')
PROMO_EXPIRY_END_VAL = (valScoringDate.replace(day=1) + relativedelta(months=4)).replace(day=1).strftime('%Y-%m-%d')

SCORE_DATE_DELTA = 0
SCORE_DATE_VAL_DELTA = 0
TICKET_DATE_WINDOW = 30  # Days of ticket data to be queried

# query paths
ACCOUNT_PROMO_EXPIRY_LIST_QUERY_PATH = QUERIES_PATH + 'create_input_account_promo_expiry_list_query.txt'
ACCOUNT_CONSL_QUERY_PATH = QUERIES_PATH + 'create_input_account_consl_query.txt'
ACCOUNT_FFH_BILLING_QUERY_PATH = QUERIES_PATH + 'create_input_account_ffh_billing_query.txt'
ACCOUNT_FFH_DISCOUNTS_QUERY_PATH = QUERIES_PATH + 'create_input_account_ffh_discounts_query.txt'
ACCOUNT_HS_USAGE_QUERY_PATH = QUERIES_PATH + 'create_input_account_hs_usage_query.txt'
ACCOUNT_DEMO_INCOME_QUERY_PATH = QUERIES_PATH + 'create_input_account_demo_income_query.txt'
ACCOUNT_GPON_COPPER_QUERY_PATH = QUERIES_PATH + 'create_input_account_gpon_copper_query.txt'
ACCOUNT_PRICE_PLAN_QUERY_PATH = QUERIES_PATH + 'create_input_account_price_plan_query.txt'
ACCOUNT_CLCKSTRM_TELUS_QUERY_PATH = QUERIES_PATH + 'create_input_account_clckstrm_telus_query.txt'
ACCOUNT_CALL_HISTORY_QUERY_PATH = QUERIES_PATH + 'create_input_account_call_history_query.txt'


In [None]:
def main(mapping):

    @dsl.pipeline(
        # A name for the pipeline.
        name="{}-xgb-pipeline".format(SERVICE_TYPE),
        description=' pipeline for training {} model'.format(SERVICE_TYPE)
    )
    def pipeline(
            project_id: str = PROJECT_ID,
            region: str = REGION,
            resource_bucket: str = RESOURCE_BUCKET,
            file_bucket: str = FILE_BUCKET
    ):
        # ------------- train view ops ---------------
        #1.create_input_account_promo_expiry_list_view
        create_input_account_promo_expiry_list_view_op = create_input_account_promo_expiry_list_view(
            view_name=PROMO_EXPIRY_LIST_VIEW_NAME,
            score_date=SCORE_DATE,
            score_date_delta=SCORE_DATE_DELTA,
            promo_expiry_start = PROMO_EXPIRY_START, 
            promo_expiry_end = PROMO_EXPIRY_END,
            project_id=PROJECT_ID,
            dataset_id=DATASET_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_PROMO_EXPIRY_LIST_QUERY_PATH
        )
        create_input_account_promo_expiry_list_view_op.set_memory_limit('16G')
        create_input_account_promo_expiry_list_view_op.set_cpu_limit('4')
        
        #2.create_input_account_consl_view
        create_input_account_consl_view_op = create_input_account_consl_view(
            view_name=CONSL_VIEW_NAME,
            score_date=SCORE_DATE,
            score_date_delta=SCORE_DATE_DELTA,
            project_id=PROJECT_ID,
            dataset_id=DATASET_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_CONSL_QUERY_PATH
        )
        create_input_account_consl_view_op.set_memory_limit('16G')
        create_input_account_consl_view_op.set_cpu_limit('4')

        #3.create_input_account_ffh_billing_view
        create_input_account_ffh_billing_view_op = create_input_account_ffh_billing_view(
            v_report_date=SCORE_DATE_DASH,
            v_start_date=SCORE_DATE_MINUS_6_MOS_DASH,
            v_end_date=SCORE_DATE_LAST_MONTH_END_DASH,
            v_bill_year=SCORE_DATE_LAST_MONTH_YEAR,
            v_bill_month=SCORE_DATE_LAST_MONTH_MONTH,
            view_name=FFH_BILLING_VIEW_NAME,
            dataset_id=DATASET_ID,
            project_id=PROJECT_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_FFH_BILLING_QUERY_PATH 
        )

        create_input_account_ffh_billing_view_op.set_memory_limit('16G')
        create_input_account_ffh_billing_view_op.set_cpu_limit('4')
        
        #4.create_input_account_ffh_discounts_view
        create_input_account_ffh_discounts_view_op = create_input_account_ffh_discounts_view(
            view_name=FFH_DISCOUNTS_VIEW_NAME,
            score_date=SCORE_DATE,
            score_date_delta=SCORE_DATE_DELTA,
            project_id=PROJECT_ID,
            dataset_id=DATASET_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_FFH_DISCOUNTS_QUERY_PATH
        )

        create_input_account_ffh_discounts_view_op.set_memory_limit('16G')
        create_input_account_ffh_discounts_view_op.set_cpu_limit('4')
    
        #5.create_input_account_hs_usage_view
        create_input_account_hs_usage_view_op = create_input_account_hs_usage_view(
            v_report_date=SCORE_DATE_DASH,
            v_start_date=SCORE_DATE_MINUS_6_MOS_DASH,
            v_end_date=SCORE_DATE_LAST_MONTH_END_DASH,
            v_bill_year=SCORE_DATE_LAST_MONTH_YEAR,
            v_bill_month=SCORE_DATE_LAST_MONTH_MONTH,
            view_name=HS_USAGE_VIEW_NAME,
            dataset_id=DATASET_ID,
            project_id=PROJECT_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_HS_USAGE_QUERY_PATH 
        )

        create_input_account_hs_usage_view_op.set_memory_limit('16G')
        create_input_account_hs_usage_view_op.set_cpu_limit('4')

        #6.create_input_account_demo_income_view
        create_input_account_demo_income_view_op = create_input_account_demo_income_view(
            score_date=SCORE_DATE,
            score_date_delta=SCORE_DATE_DELTA,
            view_name=DEMO_INCOME_VIEW_NAME ,
            dataset_id=DATASET_ID,
            project_id=PROJECT_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_DEMO_INCOME_QUERY_PATH 
        )

        create_input_account_demo_income_view_op.set_memory_limit('16G')
        create_input_account_demo_income_view_op.set_cpu_limit('4')

        #7.create_input_account_gpon_copper_view
        create_input_account_gpon_copper_view_op = create_input_account_gpon_copper_view(
            score_date=SCORE_DATE,
            score_date_delta=SCORE_DATE_DELTA,
            view_name=GPON_COPPER_VIEW_NAME,
            dataset_id=DATASET_ID,
            project_id=PROJECT_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_GPON_COPPER_QUERY_PATH 
        )

        create_input_account_gpon_copper_view_op.set_memory_limit('16G')
        create_input_account_gpon_copper_view_op.set_cpu_limit('4')

        #8.create_input_account_price_plan_view
        create_input_account_price_plan_view_op = create_input_account_price_plan_view(
            score_date=SCORE_DATE,
            score_date_delta=SCORE_DATE_DELTA,
            view_name=PRICE_PLAN_VIEW_NAME ,
            dataset_id=DATASET_ID,
            project_id=PROJECT_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_PRICE_PLAN_QUERY_PATH 
        )

        create_input_account_price_plan_view_op.set_memory_limit('16G')
        create_input_account_price_plan_view_op.set_cpu_limit('4')
        
        #9.create_input_account_clckstrm_telus_view
        create_input_account_clckstrm_telus_view_op = create_input_account_clckstrm_telus_view(
            score_date=SCORE_DATE,
            score_date_delta=SCORE_DATE_DELTA,
            view_name=CLCKSTRM_TELUS_VIEW_NAME,
            dataset_id=DATASET_ID,
            project_id=PROJECT_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_CLCKSTRM_TELUS_QUERY_PATH
        )

        create_input_account_clckstrm_telus_view_op.set_memory_limit('16G')
        create_input_account_clckstrm_telus_view_op.set_cpu_limit('4')
        
        #10.create_input_account_call_history_view
        create_input_account_call_history_view_op = create_input_account_call_history_view(
            score_date=SCORE_DATE,
            score_date_delta=SCORE_DATE_DELTA,
            view_name=CALL_HISTORY_VIEW_NAME,
            dataset_id=DATASET_ID,
            project_id=PROJECT_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_CALL_HISTORY_QUERY_PATH 
        )

        create_input_account_call_history_view_op.set_memory_limit('16G')
        create_input_account_call_history_view_op.set_cpu_limit('4')
        
        # ----- preprocessing train data --------
        preprocess_train_op = preprocess(
            promo_expiry_list_view = PROMO_EXPIRY_LIST_VIEW_NAME, 
            account_consl_view=CONSL_VIEW_NAME,
            account_bill_view=FFH_BILLING_VIEW_NAME,
            account_discounts_view=FFH_DISCOUNTS_VIEW_NAME, 
            hs_usage_view=HS_USAGE_VIEW_NAME,
            demo_income_view=DEMO_INCOME_VIEW_NAME,
            gpon_copper_view=GPON_COPPER_VIEW_NAME,
            price_plan_view=PRICE_PLAN_VIEW_NAME,
            clckstrm_telus_view=CLCKSTRM_TELUS_VIEW_NAME, 
            call_history_view=CALL_HISTORY_VIEW_NAME, 
            save_data_path='gs://{}/{}_train.csv.gz'.format(FILE_BUCKET, SERVICE_TYPE),
            project_id=PROJECT_ID,
            dataset_id=DATASET_ID
        )

        preprocess_train_op.set_memory_limit('128G')
        preprocess_train_op.set_cpu_limit('32')
        
        preprocess_train_op.after(create_input_account_promo_expiry_list_view_op)
        preprocess_train_op.after(create_input_account_consl_view_op)
        preprocess_train_op.after(create_input_account_ffh_billing_view_op)
        preprocess_train_op.after(create_input_account_ffh_discounts_view_op)
        preprocess_train_op.after(create_input_account_hs_usage_view_op)
        preprocess_train_op.after(create_input_account_demo_income_view_op)
        preprocess_train_op.after(create_input_account_gpon_copper_view_op)
        preprocess_train_op.after(create_input_account_price_plan_view_op)
        preprocess_train_op.after(create_input_account_clckstrm_telus_view_op)
        preprocess_train_op.after(create_input_account_call_history_view_op)

        # --------------- validation view ops ---------------
        #1.create_input_account_promo_expiry_list_view
        create_input_account_promo_expiry_list_validation_view_op = create_input_account_promo_expiry_list_view(
            view_name=PROMO_EXPIRY_LIST_VIEW_VALIDATION_NAME,
            score_date=SCORE_DATE_VAL_DASH,
            score_date_delta=SCORE_DATE_VAL_DELTA,
            promo_expiry_start = PROMO_EXPIRY_START_VAL, 
            promo_expiry_end = PROMO_EXPIRY_END_VAL, 
            project_id=PROJECT_ID,
            dataset_id=DATASET_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_PROMO_EXPIRY_LIST_QUERY_PATH
        )
        create_input_account_promo_expiry_list_validation_view_op.set_memory_limit('16G')
        create_input_account_promo_expiry_list_validation_view_op.set_cpu_limit('4')

        #2.create_input_account_consl_view
        create_input_account_consl_validation_view_op = create_input_account_consl_view(
            view_name=CONSL_VIEW_VALIDATION_NAME, 
            score_date=SCORE_DATE_VAL,
            score_date_delta=SCORE_DATE_VAL_DELTA,
            project_id=PROJECT_ID,
            dataset_id=DATASET_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_CONSL_QUERY_PATH
        )
        create_input_account_consl_validation_view_op.set_memory_limit('16G')
        create_input_account_consl_validation_view_op.set_cpu_limit('4')

        #3.create_input_account_ffh_billing_view
        create_input_account_ffh_billing_validation_view_op = create_input_account_ffh_billing_view(
            v_report_date=SCORE_DATE_VAL_DASH,
            v_start_date=SCORE_DATE_VAL_MINUS_6_MOS_DASH,
            v_end_date=SCORE_DATE_VAL_LAST_MONTH_END_DASH,
            v_bill_year=SCORE_DATE_VAL_LAST_MONTH_YEAR,
            v_bill_month=SCORE_DATE_VAL_LAST_MONTH_MONTH,
            view_name=FFH_BILLING_VIEW_VALIDATION_NAME,
            dataset_id=DATASET_ID,
            project_id=PROJECT_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_FFH_BILLING_QUERY_PATH 
        )

        create_input_account_ffh_billing_validation_view_op.set_memory_limit('16G')
        create_input_account_ffh_billing_validation_view_op.set_cpu_limit('4')
        
        #4.create_input_account_ffh_discounts_view
        create_input_account_ffh_discounts_validation_view_op = create_input_account_ffh_discounts_view(
            view_name=FFH_DISCOUNTS_VIEW_VALIDATION_NAME, 
            score_date=SCORE_DATE_VAL,
            score_date_delta=SCORE_DATE_VAL_DELTA,
            project_id=PROJECT_ID,
            dataset_id=DATASET_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_FFH_DISCOUNTS_QUERY_PATH
        )

        create_input_account_ffh_discounts_validation_view_op.set_memory_limit('16G')
        create_input_account_ffh_discounts_validation_view_op.set_cpu_limit('4')

        #5.create_input_account_hs_usage_view
        create_input_account_hs_usage_validation_view_op = create_input_account_hs_usage_view(
            v_report_date=SCORE_DATE_VAL_DASH,
            v_start_date=SCORE_DATE_VAL_MINUS_6_MOS_DASH,
            v_end_date=SCORE_DATE_VAL_LAST_MONTH_END_DASH,
            v_bill_year=SCORE_DATE_VAL_LAST_MONTH_YEAR,
            v_bill_month=SCORE_DATE_VAL_LAST_MONTH_MONTH,
            view_name=HS_USAGE_VIEW_VALIDATION_NAME,
            dataset_id=DATASET_ID,
            project_id=PROJECT_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_HS_USAGE_QUERY_PATH 
        )

        create_input_account_hs_usage_validation_view_op.set_memory_limit('16G')
        create_input_account_hs_usage_validation_view_op.set_cpu_limit('4')
        
        #6.create_input_account_demo_income_view
        create_input_account_demo_income_validation_view_op = create_input_account_demo_income_view(
            score_date=SCORE_DATE_VAL,
            score_date_delta=SCORE_DATE_VAL_DELTA,
            view_name=DEMO_INCOME_VIEW_VALIDATION_NAME,
            dataset_id=DATASET_ID,
            project_id=PROJECT_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_DEMO_INCOME_QUERY_PATH
        )

        create_input_account_demo_income_validation_view_op.set_memory_limit('16G')
        create_input_account_demo_income_validation_view_op.set_cpu_limit('4')

        #7.create_input_account_gpon_copper_view
        create_input_account_gpon_copper_validation_view_op = create_input_account_gpon_copper_view(
            score_date=SCORE_DATE_VAL,
            score_date_delta=SCORE_DATE_VAL_DELTA,
            view_name=GPON_COPPER_VIEW_VALIDATION_NAME,
            dataset_id=DATASET_ID,
            project_id=PROJECT_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_GPON_COPPER_QUERY_PATH
        )

        create_input_account_gpon_copper_validation_view_op.set_memory_limit('16G')
        create_input_account_gpon_copper_validation_view_op.set_cpu_limit('4')

        #8.create_input_account_price_plan_view
        create_input_account_price_plan_validation_view_op = create_input_account_price_plan_view(
            score_date=SCORE_DATE_VAL,
            score_date_delta=SCORE_DATE_VAL_DELTA,
            view_name=PRICE_PLAN_VIEW_VALIDATION_NAME,
            dataset_id=DATASET_ID,
            project_id=PROJECT_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_PRICE_PLAN_QUERY_PATH
        )

        create_input_account_price_plan_validation_view_op.set_memory_limit('16G')
        create_input_account_price_plan_validation_view_op.set_cpu_limit('4')

        #9.create_input_account_clckstrm_telus_view
        create_input_account_clckstrm_telus_validation_view_op = create_input_account_clckstrm_telus_view(
            score_date=SCORE_DATE_VAL,
            score_date_delta=SCORE_DATE_VAL_DELTA,
            view_name=CLCKSTRM_TELUS_VIEW_VALIDATION_NAME,
            dataset_id=DATASET_ID,
            project_id=PROJECT_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_CLCKSTRM_TELUS_QUERY_PATH
        )

        create_input_account_clckstrm_telus_validation_view_op.set_memory_limit('16G')
        create_input_account_clckstrm_telus_validation_view_op.set_cpu_limit('4')

        #10.create_input_account_call_history_view
        create_input_account_call_history_validation_view_op = create_input_account_call_history_view(
            score_date=SCORE_DATE_VAL,
            score_date_delta=SCORE_DATE_VAL_DELTA,
            view_name=CALL_HISTORY_VIEW_VALIDATION_NAME,
            dataset_id=DATASET_ID,
            project_id=PROJECT_ID,
            region=REGION,
            resource_bucket=RESOURCE_BUCKET,
            query_path=ACCOUNT_CALL_HISTORY_QUERY_PATH 
        )

        create_input_account_call_history_validation_view_op.set_memory_limit('16G')
        create_input_account_call_history_validation_view_op.set_cpu_limit('4')
    
        
        # ----- preprocessing validation data --------
        preprocess_validation_op = preprocess(
            promo_expiry_list_view = PROMO_EXPIRY_LIST_VIEW_VALIDATION_NAME, 
            account_consl_view=CONSL_VIEW_VALIDATION_NAME,
            account_bill_view=FFH_BILLING_VIEW_VALIDATION_NAME,
            account_discounts_view=FFH_DISCOUNTS_VIEW_VALIDATION_NAME, 
            hs_usage_view=HS_USAGE_VIEW_VALIDATION_NAME,
            demo_income_view=DEMO_INCOME_VIEW_VALIDATION_NAME,
            gpon_copper_view=GPON_COPPER_VIEW_VALIDATION_NAME,
            price_plan_view=PRICE_PLAN_VIEW_VALIDATION_NAME,
            clckstrm_telus_view=CLCKSTRM_TELUS_VIEW_VALIDATION_NAME, 
            call_history_view=CALL_HISTORY_VIEW_VALIDATION_NAME, 
            save_data_path='gs://{}/{}_validation.csv.gz'.format(FILE_BUCKET, SERVICE_TYPE),
            project_id=PROJECT_ID,
            dataset_id=DATASET_ID
        )
        
        preprocess_validation_op.set_memory_limit('256G')
        preprocess_validation_op.set_cpu_limit('32')
                
        preprocess_train_op.after(create_input_account_promo_expiry_list_validation_view_op)
        preprocess_train_op.after(create_input_account_consl_validation_view_op)
        preprocess_train_op.after(create_input_account_ffh_billing_validation_view_op)
        preprocess_train_op.after(create_input_account_ffh_discounts_validation_view_op)
        preprocess_train_op.after(create_input_account_hs_usage_validation_view_op)
        preprocess_train_op.after(create_input_account_demo_income_validation_view_op)
        preprocess_train_op.after(create_input_account_gpon_copper_validation_view_op)
        preprocess_train_op.after(create_input_account_price_plan_validation_view_op)
        preprocess_train_op.after(create_input_account_clckstrm_telus_validation_view_op)
        preprocess_train_op.after(create_input_account_call_history_validation_view_op)

        train_and_save_model_op = train_and_save_model(file_bucket=FILE_BUCKET,
                                                       service_type=SERVICE_TYPE,
                                                       score_date_dash=SCORE_DATE_DASH,
                                                       score_date_val_dash=SCORE_DATE_VAL_DASH,
                                                       project_id=PROJECT_ID,
                                                       dataset_id=DATASET_ID,
                                                       )
        train_and_save_model_op.set_memory_limit('256G')
        train_and_save_model_op.set_cpu_limit('32')

        train_and_save_model_op.after(preprocess_train_op)
        train_and_save_model_op.after(preprocess_validation_op)

    return pipeline
