In [None]:
# Copyright 2024 Forusone(shins777@gmail.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Kubeflow pipeline on Vertex AI

This notebook simplifies the [pipelines_intro_kfp](https://colab.sandbox.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/pipelines/pipelines_intro_kfp.ipynb) in the Google official site for a MLOps workshop.  
You can see more pipeline examplses in [this site](https://github.com/GoogleCloudPlatform/vertex-ai-samples/tree/main/notebooks/official/pipelines)

* [Vertex AI Pipelines](https://cloud.google.com/vertex-ai/docs/pipelines)
* [the Kubeflow Pipelines (KFP) SDK](https://www.kubeflow.org/docs/components/pipelines/)
* [Vertex AI Pipelines](https://cloud.google.com/vertex-ai/docs/pipelines/introduction)


## Install Vertex AI SDK

In [None]:
# Install the packages
! pip install --upgrade --quiet google-cloud-aiplatform \
                         google-cloud-storage \
                         google-cloud-pipeline-components \
                         kfp

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/345.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m337.9/345.4 kB[0m [31m27.7 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m337.9/345.4 kB[0m [31m27.7 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m337.9/345.4 kB[0m [31m27.7 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m337.9/345.4 kB[0m [31m27.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m345.4/345.4 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m343.6/343.6 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25h

In [None]:
! python3 -c "import kfp; print('KFP SDK version: {}'.format(kfp.__version__))"
! python3 -c "import google_cloud_pipeline_components; print('google_cloud_pipeline_components version: {}'.format(google_cloud_pipeline_components.__version__))"

KFP SDK version: 2.10.1
google_cloud_pipeline_components version: 2.18.0


## Configuration

### Authenticate your notebook environment

In [None]:
import sys
from IPython.display import Markdown, display

PROJECT_ID="ai-hangsik"
LOCATION="us-central1"

# For only colab user, no need this process for Colab Enterprise in Vertex AI.
if "google.colab" in sys.modules:
    from google.colab import auth
    auth.authenticate_user(project_id=PROJECT_ID)

# set project.
!gcloud config set project {PROJECT_ID}

Updated property [core/project].


### Initialize Vertex AI SDK

In [None]:
import json
from typing import NamedTuple

from google.cloud import aiplatform
from kfp import compiler, dsl
from kfp.dsl import component
from google.cloud import aiplatform, bigquery

aiplatform.init(project=PROJECT_ID, location=LOCATION)

In [None]:
# @title Enable Cloud translation API
!gcloud services enable translate.googleapis.com

### Create a bucket

In [None]:
# Create a bucket.
BUCKET_URI = f"gs://mlops-{PROJECT_ID}-1209"
! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI}

Creating gs://mlops-ai-hangsik-1209/...
ServiceException: 409 A Cloud Storage bucket named 'mlops-ai-hangsik-1209' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.


### Specifying a service account to use for a pipeline run

In [None]:
shell_output = ! gcloud projects describe  $PROJECT_ID
project_number = shell_output[-1].split(":")[1].strip().replace("'", "")

SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"

print(f"SERVICE_ACCOUNT: {SERVICE_ACCOUNT}")

SERVICE_ACCOUNT: 721521243942-compute@developer.gserviceaccount.com


### Set access for Service account

In [None]:
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectCreator $BUCKET_URI
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectViewer $BUCKET_URI

No changes made to gs://mlops-ai-hangsik-1209/
No changes made to gs://mlops-ai-hangsik-1209/


### Define constants

In [None]:
# API service endpoint
API_ENDPOINT = f"{LOCATION}-aiplatform.googleapis.com"
# Pipelne root dir
PIPELINE_ROOT = f"{BUCKET_URI}/pipeline/translate"

## Define Python function-based pipeline components

#### Define get_input component

In [None]:
@component(base_image="python:3.10")
def get_input(text: str) -> str:
    print(text)
    return text

compiler.Compiler().compile(get_input, "get_input.yaml")

# Can load from yaml file in the pipeline
# loaded_component = components.load_component_from_file('./get_input.yaml')

### Define *translation* component

In [None]:
@component(base_image="python:3.10",
           packages_to_install=["google-cloud-translate"])

def translate(
    text: str,
    project:str,

) -> NamedTuple(
    "Outputs",
    [
        ("output_1", str),
        ("output_2", str),
    ],
):

    from google.cloud import translate_v3

    client = translate_v3.TranslationServiceClient()
    parent = f"projects/{project}/locations/global"

    response_en = client.translate_text(
        contents=[text],
        source_language_code="ko-KR",
        target_language_code="en",
        parent=parent,
        mime_type="text/plain",
    )

    response_ja = client.translate_text(
        contents=[text],
        source_language_code="ko",
        target_language_code="ja",
        parent=parent,
        mime_type="text/plain",
    )

    o1 = f"translation 1: {response_en.translations[0].translated_text}"
    o2 = f"translation 2: {response_ja.translations[0].translated_text}"

    print(f"output 1: {o1}; output 2: {o2}")

    return (o1, o2)

compiler.Compiler().compile(translate, "translate.yaml")


### Define *collect* component

In [None]:
@component(base_image="python:3.10")

def collect(original: str, tran_output_1: str, tran_output_2: str) -> str:

    print(f"original: {original}; translation_1: {tran_output_1}; translation_2: {tran_output_2}")
    return f"original: {original}; translation_1: {tran_output_1}; translation_2: {tran_output_2}"

compiler.Compiler().compile(collect, "collect.yaml")


### Define a pipeline that uses the components

In [None]:
@dsl.pipeline(
    name="translation-pipeline",
    description="pipeline to translate and collect",
    pipeline_root=PIPELINE_ROOT,
)

def translation_pipeline(text: str,
                         project: str,
):

  input_text = get_input(text=text)

  translated_texts = translate(text=input_text.output,
                               project=project)

  consumer_task = collect(original= input_text.output,
                          tran_output_1 = translated_texts.outputs["output_1"],
                          tran_output_2 = translated_texts.outputs["output_2"],)

## Compile the pipeline

In [None]:
compiler.Compiler().compile(pipeline_func=translation_pipeline, package_path="translation_pipeline.json")

## Run the pipeline

### Run a pipeline

In [None]:

text ="머신러닝에서 파이프라인을 만들기 위한 방법들이 무엇이 있나요 ?"

job = aiplatform.PipelineJob(

    display_name="translation_pipeline",
    template_path="translation_pipeline.json",
    parameter_values = {"text": text,
                        "project": PROJECT_ID},
    pipeline_root=PIPELINE_ROOT,

)

job.run(service_account = SERVICE_ACCOUNT)

INFO:google.cloud.aiplatform.pipeline_jobs:Creating PipelineJob
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob created. Resource name: projects/721521243942/locations/us-central1/pipelineJobs/translation-pipeline-20250102135630
INFO:google.cloud.aiplatform.pipeline_jobs:To use this PipelineJob in another session:
INFO:google.cloud.aiplatform.pipeline_jobs:pipeline_job = aiplatform.PipelineJob.get('projects/721521243942/locations/us-central1/pipelineJobs/translation-pipeline-20250102135630')
INFO:google.cloud.aiplatform.pipeline_jobs:View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/translation-pipeline-20250102135630?project=721521243942
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob projects/721521243942/locations/us-central1/pipelineJobs/translation-pipeline-20250102135630 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob projects/721521243942/locations/us-central

### Enable_caching

In [None]:
text ="머신러닝에서 파이프라인을 만들기 위한 방법들이 무엇이 있나요 ?"

job = aiplatform.PipelineJob(

    display_name="translation_pipeline",
    template_path="translation_pipeline.json",
    parameter_values = {"text": text,
                        "project": PROJECT_ID},
    pipeline_root=PIPELINE_ROOT,
    enable_caching = True

)

job.run(service_account = SERVICE_ACCOUNT)

INFO:google.cloud.aiplatform.pipeline_jobs:Creating PipelineJob
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob created. Resource name: projects/721521243942/locations/us-central1/pipelineJobs/translation-pipeline-20250102135831
INFO:google.cloud.aiplatform.pipeline_jobs:To use this PipelineJob in another session:
INFO:google.cloud.aiplatform.pipeline_jobs:pipeline_job = aiplatform.PipelineJob.get('projects/721521243942/locations/us-central1/pipelineJobs/translation-pipeline-20250102135831')
INFO:google.cloud.aiplatform.pipeline_jobs:View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/translation-pipeline-20250102135831?project=721521243942
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob run completed. Resource name: projects/721521243942/locations/us-central1/pipelineJobs/translation-pipeline-20250102135831
