### Import Libraries

In [None]:
# import required libraries
import kfp
from kfp import dsl
from kfp.v2 import compiler
from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output, OutputPath, ClassificationMetrics,
                        Metrics, component)
import os
import re
from pathlib import Path

from datetime import date
from datetime import timedelta
from dateutil.relativedelta import relativedelta

import google
from google.oauth2 import credentials
from google.oauth2 import service_account
from google.oauth2.service_account import Credentials
from google.cloud import storage
from google.cloud.aiplatform import pipeline_jobs
from google_cloud_pipeline_components.v1.batch_predict_job import \
    ModelBatchPredictOp


### YAML Parameters

In [None]:
#tag cell with parameters
PROJECT_ID =  ''
DATASET_ID = ''
RESOURCE_BUCKET = ''
FILE_BUCKET = ''
REGION = ''
MODEL_ID = ''
MODEL_NAME = ''


In [None]:
#Workbench only
PROJECT_ID= "divg-groovyhoon-pr-d2eab4"
DATASET_ID= "nba_offer_targeting"
REGION= "northamerica-northeast1"
FILE_BUCKET= "divg-groovyhoon-pr-d2eab4-default"

### Service Parameters

In [None]:
SERVICE_TYPE = 'nba_offer_targeting_checker'
SERVICE_TYPE_NAME = 'nba-offer-targeting-checker'
REGION = "northamerica-northeast1"

### Pipeline Parameters

In [None]:
STACK_NAME = 'nba_offer_targeting_checker'
SERVING_PIPELINE_NAME_PATH = 'nba_offer_targeting_checker_pipeline/serving_pipeline'
SERVING_PIPELINE_NAME = 'nba-offer-targeting-checker-serving-pipeline' # Same name as pulumi.yaml
SERVING_PIPELINE_DESCRIPTION = 'nba-offer-targeting-checker-serving-pipeline'
PIPELINE_ROOT = f"gs://{FILE_BUCKET}"
REGION = "northamerica-northeast1"

### Import Text Files (.sql) (Workbench)

In [None]:
import os

# Specify the directory path
directory = 'queries/'

queries = [] 

# Iterate over files in the directory
for filename in os.listdir(directory):
    # Check if the file is a text file
    if filename.endswith('.sql'):
        # Construct the full file path
        filepath = os.path.join(directory, filename)
        
        # Open the file and read its contents
        with open(filepath, 'r') as file:
            # Read the contents of the file
            content = file.read()
            queries.append(content)
            
test_1 = queries[0]
test_2 = queries[1] 
test_3 = queries[2] 
test_4 = queries[3] 
test_5 = queries[4] 
test_6 = queries[5] 
test_7 = queries[6] 
test_8 = queries[7] 
test_9 = queries[8] 
test_10 = queries[9]
test_11 = queries[10] 
test_12 = queries[11] 



In [None]:
dataset_id = "nba_offer_targeting_np"

test_1.format(dataset_id=dataset_id)

### Queries Paths

In [None]:
QUERIES_PATH = f'{STACK_NAME}/' + SERVING_PIPELINE_NAME_PATH + '/queries/'

TEST_1 = QUERIES_PATH + 'test1.sql'
TEST_2 = QUERIES_PATH + 'test2.sql'
TEST_3 = QUERIES_PATH + 'test3.sql'
TEST_4 = QUERIES_PATH + 'test4.sql'
TEST_5 = QUERIES_PATH + 'test5.sql'
TEST_6 = QUERIES_PATH + 'test6.sql'
TEST_7 = QUERIES_PATH + 'test7.sql'
TEST_8 = QUERIES_PATH + 'test8.sql'
TEST_9 = QUERIES_PATH + 'test9.sql'
TEST_10 = QUERIES_PATH + 'test10.sql'
TEST_11 = QUERIES_PATH + 'test11.sql'
TEST_12 = QUERIES_PATH + 'test12.sql'

### Import Text Files (.sql) (BI Layer)

In [None]:
# load query from .txt file
storage_client = storage.Client()
bucket = storage_client.get_bucket(RESOURCE_BUCKET)

queries = [] 
n = 0

for i in range(n): 
    blob = bucket.get_blob(QUERIES_PATH + f'test_{i}.sql')
    content = blob.download_as_string()
    content = str(content, 'utf-8')
    queries.append(content) 

In [None]:
test_1 = queries[0]
test_2 = queries[1] 
test_3 = queries[2] 
test_4 = queries[3] 
test_5 = queries[4] 
test_6 = queries[5] 
test_7 = queries[6] 
test_8 = queries[7] 
test_9 = queries[8] 
test_10 = queries[9]
test_11 = queries[10] 
test_12 = queries[11] 

### Import Pipeline Components

In [None]:
# # download required component files to local
# prefix = f'{STACK_NAME}/{SERVING_PIPELINE_NAME_PATH}/components/'
# dl_dir = 'components/'

# storage_client = storage.Client()
# bucket = storage_client.bucket(RESOURCE_BUCKET)
# blobs = bucket.list_blobs(prefix=prefix)  # Get list of files
# for blob in blobs: # download each file that starts with "prefix" into "dl_dir"
#     if blob.name.endswith("/"):
#         continue
#     file_split = blob.name.split(prefix)
#     file_path = f"{dl_dir}{file_split[-1]}"
#     directory = "/".join(file_path.split("/")[0:-1])
#     Path(directory).mkdir(parents=True, exist_ok=True)
#     blob.download_to_filename(file_path) 

# import main pipeline components
from components.output_validation import output_validation

### Pipeline

In [None]:
# library imports
from kfp.v2 import compiler
from google.cloud.aiplatform import pipeline_jobs
@dsl.pipeline(
    name=SERVING_PIPELINE_NAME, 
    description=SERVING_PIPELINE_DESCRIPTION
    )
def pipeline(
        project_id: str = PROJECT_ID,
        region: str = REGION,
        resource_bucket: str = RESOURCE_BUCKET,
        file_bucket: str = FILE_BUCKET
    ):

    #### this code block is only for a personal workbench 
    
    import google.oauth2.credentials
    token = !gcloud auth print-access-token
    token_str = token[0]
    
    #### the end
    
    from datetime import datetime
    update_ts = datetime.now()
    update_ts_str = update_ts.strftime('%Y-%m-%d %H:%M:%S')
    
    #nba offer targeting checker function
    output_validation_op_1 = output_validation(        
        project_id=PROJECT_ID
      , dataset_id=DATASET_ID
      , query=test_1
       , token=token_str) 
    
    output_validation_op_1.set_memory_limit('16G')
    output_validation_op_1.set_cpu_limit('4')
    
    #nba offer targeting checker function
    output_validation_op_2 = output_validation(        
        project_id=PROJECT_ID
      , dataset_id=DATASET_ID
      , query=test_2
       , token=token_str) 
    
    output_validation_op_2.set_memory_limit('16G')
    output_validation_op_2.set_cpu_limit('4')
    
    #nba offer targeting checker function
    output_validation_op_3 = output_validation(        
        project_id=PROJECT_ID
      , dataset_id=DATASET_ID
      , query=test_3
       , token=token_str) 
    
    output_validation_op_3.set_memory_limit('16G')
    output_validation_op_3.set_cpu_limit('4')
    
    output_validation_op_2.after(output_validation_op_1)
    output_validation_op_3.after(output_validation_op_2)
    

In [None]:
#     nba offer targeting checker function
#     output_validation_op_4 = output_validation(        
#         project_id=PROJECT_ID
#       , dataset_id=DATASET_ID
#       , query=test_4
#        , token=token[0]) 
    
#     output_validation_op_4.set_memory_limit('16G')
#     output_validation_op_4.set_cpu_limit('4')
    
#     #nba offer targeting checker function
#     output_validation_op_5 = output_validation(        
#         project_id=PROJECT_ID
#       , dataset_id=DATASET_ID
#       , query=test_5
#        , token=token[0]) 
    
#     output_validation_op_5.set_memory_limit('16G')
#     output_validation_op_5.set_cpu_limit('4')
    
#     #nba offer targeting checker function
#     output_validation_op_6 = output_validation(        
#         project_id=PROJECT_ID
#       , dataset_id=DATASET_ID
#       , query=test_6
#        , token=token[0]) 
    
#     output_validation_op_6.set_memory_limit('16G')
#     output_validation_op_6.set_cpu_limit('4')
    
#     #nba offer targeting checker function
#     output_validation_op_7 = output_validation(        
#         project_id=PROJECT_ID
#       , dataset_id=DATASET_ID
#       , query=test_7
#        , token=token[0]) 
    
#     output_validation_op_7.set_memory_limit('16G')
#     output_validation_op_7.set_cpu_limit('4')
    
#     #nba offer targeting checker function
#     output_validation_op_8 = output_validation(        
#         project_id=PROJECT_ID
#       , dataset_id=DATASET_ID
#       , query=test_8
#        , token=token[0]) 
    
#     output_validation_op_8.set_memory_limit('16G')
#     output_validation_op_8.set_cpu_limit('4')
    
#     #nba offer targeting checker function
#     output_validation_op_9 = output_validation(        
#         project_id=PROJECT_ID
#       , dataset_id=DATASET_ID
#       , query=test_9
#        , token=token[0]) 
    
#     output_validation_op_9.set_memory_limit('16G')
#     output_validation_op_9.set_cpu_limit('4')
    
#     #nba offer targeting checker function
#     output_validation_op_10 = output_validation(        
#         project_id=PROJECT_ID
#       , dataset_id=DATASET_ID
#       , query=test_10
#        , token=token[0]) 
    
#     output_validation_op_10.set_memory_limit('16G')
#     output_validation_op_10.set_cpu_limit('4')
    
#     #nba offer targeting checker function
#     output_validation_op_11 = output_validation(        
#         project_id=PROJECT_ID
#       , dataset_id=DATASET_ID
#       , query=test_11
#        , token=token[0]) 
    
#     output_validation_op_11.set_memory_limit('16G')
#     output_validation_op_11.set_cpu_limit('4')
    
#     #nba offer targeting checker function
#     output_validation_op_12 = output_validation(        
#         project_id=PROJECT_ID
#       , dataset_id=DATASET_ID
#       , query=test_12
#        , token=token[0]) 
    
#     output_validation_op_12.set_memory_limit('16G')
#     output_validation_op_12.set_cpu_limit('4')

### Run the Pipeline Job

In [None]:
# from kfp.v2 import compiler
# from google.cloud.aiplatform import pipeline_jobs
# import json

# compiler.Compiler().compile(
#    pipeline_func=pipeline, package_path="pipeline.json"
# )

# job = pipeline_jobs.PipelineJob(
#                                    display_name=SERVING_PIPELINE_NAME,
#                                    template_path="pipeline.json",
#                                    location=REGION,
#                                    enable_caching=False,
#                                    pipeline_root = PIPELINE_ROOT
#                                 )
# job.run(service_account = f"bilayer-sa@{PROJECT_ID}.iam.gserviceaccount.com")


In [None]:
import google.oauth2.credentials
import json

token = !gcloud auth print-access-token
CREDENTIALS = google.oauth2.credentials.Credentials(token[0])

compiler.Compiler().compile(
   pipeline_func=pipeline, package_path="pipeline.json"
)

job = pipeline_jobs.PipelineJob(
   display_name=SERVING_PIPELINE_NAME,
   template_path="pipeline.json",
   credentials = CREDENTIALS,
   pipeline_root = PIPELINE_ROOT,
   location=REGION,
   enable_caching=False # I encourage you to enable caching when testing as it will reduce resource use
)

job.run()