# 03 - Deploy and Run the Training Pipeline to Vertex AI

The notebook covers the following tasks:

1. Build the container image for the ETL and Training steps.
2. Compile and upload the KFP pipelne to `Vertex AI`.
2. Run the KFP pipeline using `Vertex Pipelines`.
3. Execute the pipeline deployment CI/CD steps using `Cloud Build`.

## Setup

In [None]:
import os
import kfp
from src.kfp_pipelines import runner

print("KFP Version:", kfp.__version__)

In [None]:
PROJECT = 'merlin-on-gcp'
REGION = 'us-central1'
BUCKET = 'merlin-on-gcp'
VERTEX_SERVICE_ACCOUNT = f'vertex-sa-mlops@{PROJECT}.iam.gserviceaccount.com'

VERSION = 'v1'
MOVIES_DATASET_DISPLAY_NAME = 'movielens25m-movies'
RATINGS_DATASET_DISPLAY_NAME = 'movielens25m-ratings'
MODEL_DISPLAY_NAME = f'movielens25m-recommender-{VERSION}'
PIPELINE_NAME = f'{MODEL_DISPLAY_NAME}-train-pipeline'

## 1. Build Container Image

In [None]:
NVT_IMAGE_NAME = "movielens-nvt0.5-tf2.4"
NVT_IMAGE_URI = f"gcr.io/{PROJECT}/{NVT_IMAGE_NAME}"
print(NVT_IMAGE_URI)

In [None]:
! gcloud builds submit --tag {IMAGE_URI} . --timeout=45m --machine-type=e2-highcpu-8

## 2. Compile and upload the KFP Pipeline 

### Set pipeline configurations

In [None]:
os.environ["PROJECT"] = PROJECT
os.environ["REGION"] =  REGION
os.environ["GCS_LOCATION"] = f"gs://{BUCKET}/movielens25m"
os.environ["NVT_IMAGE_URI"] = NVT_IMAGE_URI

In [None]:
from src.kfp_pipelines import config
import importlib
importlib.reload(config)

for key, value in config.__dict__.items():
    if key.isupper(): print(f'{key}: {value}')

### Compile pipeline

In [None]:
pipeline_definition_file = f'{config.PIPELINE_NAME}.json'
runner.compile_pipeline(pipeline_definition_file)

### Upload pipeline to GCS

In [None]:
PIPELINES_STORE = os.path.join(config.GCS_LOCATION, "compilies_pipelines/")

! gsutil cp {pipeline_definition_file} {PIPELINES_STORE}
! gsutil ls {PIPELINES_STORE}

## 3. Run the KFP pipeline using Vertex Pipelines

In [None]:
parameter_values = {}
gcs_pipeline_definition_file = os.path.join(PIPELINES_STORE, pipeline_definition_file)

_ = runner.run_pipeline(
    gcs_pipeline_definition_file,
    parameter_values=parameter_values,
)

## 4. Execute the pipeline deployment CI/CD steps using Cloud Build.

### Build CICD container image

In [None]:
CICD_IMAGE_NAME = 'cicd:latest'
CICD_IMAGE_URI = f"gcr.io/{PROJECT}/{CICD_IMAGE_NAME}"
print(CICD_IMAGE_URI)

In [None]:
! gcloud builds submit --tag $CICD_IMAGE_URI build/. --timeout=15m --machine-type=e2-highcpu-8

### Run CI/CD from pipeline deployment using Cloud Build 

In [None]:
REPO_URL = "https://github.com/ksalama/merlin-on-labs.git" # Change to your github repo.
BRANCH = "main"

GCS_LOCATION = f"gs://{BUCKET}/movielens25m"
VERSION = 'latest'
PIPELINE_NAME = f'{MODEL_DISPLAY_NAME}-train-pipeline'
PIPELINES_STORE = os.path.join(GCS_LOCATION, "compiled_pipelines")
NVT_IMAGE_URI = f"gcr.io/{PROJECT}/{NVT_IMAGE_NAME}:{VERSION}"

SUBSTITUTIONS=f"""\
_REPO_URL='{REPO_URL}',\
_BRANCH={BRANCH},\
_CICD_IMAGE_URI={CICD_IMAGE_URI},\
_PROJECT={PROJECT},\
_REGION={REGION},\
_GCS_LOCATION={GCS_LOCATION},\
_MOVIES_DATASET_DISPLAY_NAME={MOVIES_DATASET_DISPLAY_NAME},\
_RATINGS_DATASET_DISPLAY_NAME={RATINGS_DATASET_DISPLAY_NAME},\
_MODEL_DISPLAY_NAME={MODEL_DISPLAY_NAME},\
_NVT_IMAGE_URI={NVT_IMAGE_URI},\
_PIPELINE_NAME={PIPELINE_NAME},\
_PIPELINES_STORE={PIPELINES_STORE}\
"""

! echo $SUBSTITUTIONS

In [None]:
! gcloud builds submit --no-source --timeout=60m --config build/pipeline-deployment.yaml --substitutions {SUBSTITUTIONS} --machine-type=e2-highcpu-8