In [66]:
# Copyright 2024 Forusone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. 

# Kubeflow pipeline on Vertex AI

This notebook simplifies the [pipelines_intro_kfp](https://colab.sandbox.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/pipelines/pipelines_intro_kfp.ipynb) in the Google official site for a MLOps workshop.  
You can see more pipeline examplses in [this site](https://github.com/GoogleCloudPlatform/vertex-ai-samples/tree/main/notebooks/official/pipelines)

* [Vertex AI Pipelines](https://cloud.google.com/vertex-ai/docs/pipelines)
* [the Kubeflow Pipelines (KFP) SDK](https://www.kubeflow.org/docs/components/pipelines/)
* [Vertex AI Pipelines](https://cloud.google.com/vertex-ai/docs/pipelines/introduction)


In [67]:
%pwd

'/home/jupyter/mlops_vertexai/02.pipeline/kfp_basic'

## Install Vertex AI SDK

In [68]:
# Install the packages
%pip install --user --quiet google-cloud-aiplatform \
                         google-cloud-storage \
                         google-cloud-pipeline-components \
                         kfp


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [69]:
! python3 -c "import kfp; print('KFP SDK version: {}'.format(kfp.__version__))"
! python3 -c "import google_cloud_pipeline_components; print('google_cloud_pipeline_components version: {}'.format(google_cloud_pipeline_components.__version__))"

KFP SDK version: 2.10.1
google_cloud_pipeline_components version: 2.19.0


## Configuration

### Authenticate your notebook environment

In [70]:
import sys
from IPython.display import Markdown, display

PROJECT_ID="ai-hangsik"
LOCATION="us-central1"

# For only colab user, no need this process for Colab Enterprise in Vertex AI.
if "google.colab" in sys.modules:
    from google.colab import auth
    auth.authenticate_user(project_id=PROJECT_ID)

# set project.
!gcloud config set project {PROJECT_ID}

Updated property [core/project].


### Initialize Vertex AI SDK

In [71]:
import json
from typing import NamedTuple

from google.cloud import aiplatform
from kfp import compiler, dsl
from kfp.dsl import component
from google.cloud import aiplatform, bigquery

aiplatform.init(project=PROJECT_ID, location=LOCATION)

In [72]:
# @title Enable Cloud translation API
!gcloud services enable translate.googleapis.com

### Create a bucket

In [73]:
# Create a bucket.
BUCKET_URI = f"gs://mlops-0221"
! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI}

Creating gs://mlops-0221/...
ServiceException: 409 A Cloud Storage bucket named 'mlops-0221' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.


### Specifying a service account to use for a pipeline run

In [74]:
shell_output = ! gcloud projects describe  $PROJECT_ID
project_number = shell_output[-1].split(":")[1].strip().replace("'", "")

SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"
print(f"SERVICE_ACCOUNT: {SERVICE_ACCOUNT}")

SERVICE_ACCOUNT: 721521243942-compute@developer.gserviceaccount.com


### Set access for Service account

In [75]:
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectCreator $BUCKET_URI
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectViewer $BUCKET_URI
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectViewerroles/logging.logWriter

No changes made to gs://mlops-0221/
No changes made to gs://mlops-0221/


### Define constants

In [76]:
# API service endpoint
API_ENDPOINT = f"{LOCATION}-aiplatform.googleapis.com"
# Pipelne root dir
PIPELINE_ROOT = f"{BUCKET_URI}/pipeline/translate"

## Define Python function-based pipeline components

#### Define get_input component

In [77]:
@component(base_image="python:3.10")
def get_input(text: str) -> str:
    
    print(f"### input: {text}")
    
    return text

compiler.Compiler().compile(get_input, "get_input.yaml")

# You can load from yaml file in the pipeline. 
# loaded_component = components.load_component_from_file('./get_input.yaml')

### Define *translation* component

In [78]:
@component(base_image="python:3.10",
           packages_to_install=["google-cloud-translate"])
def translate(
    text: str,
    project:str,

) -> NamedTuple(
    "Outputs",
    [
        ("output_1", str),
        ("output_2", str),
    ],
):

    from google.cloud import translate_v3

    client = translate_v3.TranslationServiceClient()
    parent = f"projects/{project}/locations/global"
    
     # Call translation api
    response_en = client.translate_text(
        contents=[text],
        source_language_code="ko-KR",
        target_language_code="en",
        parent=parent,
        mime_type="text/plain",
    )

    # Call translation api
    response_ja = client.translate_text(
        contents=[text],
        source_language_code="ko",
        target_language_code="ja",
        parent=parent,
        mime_type="text/plain",
    )

    o1 = f"translation 1: {response_en.translations[0].translated_text}"
    o2 = f"translation 2: {response_ja.translations[0].translated_text}"

    print(f"### output 1: {o1}; output 2: {o2}")

    return (o1, o2)

compiler.Compiler().compile(translate, "translate.yaml")


### Define *collect* component

In [79]:
@component(base_image="python:3.10")

def collect(original: str, tran_output_1: str, tran_output_2: str) -> str:
    
    import logging
    
    logger = logging.getLogger()
    output = f"original: {original}; translation_1: {tran_output_1}; translation_2: {tran_output_2}"

    logger.info(f"### original: {original}")
    logger.info(f"### tran_output_1: {tran_output_1}")
    logger.info(f"### translation_2: {tran_output_2}")
    
    return output

compiler.Compiler().compile(collect, "collect.yaml")


### Define a pipeline that uses the components

In [80]:
@dsl.pipeline(
    name="translation-pipeline",
    description="pipeline to translate and collect",
    pipeline_root=PIPELINE_ROOT,
)

def translation_pipeline(text: str,
                         project: str,):
    
    input_text = get_input(text=text)
    translated_texts = translate(text=input_text.output, project=project)
    consumer_task = collect(original= input_text.output,
                              tran_output_1 = translated_texts.outputs["output_1"],
                              tran_output_2 = translated_texts.outputs["output_2"],)

## Compile the pipeline

In [81]:
compiler.Compiler().compile(pipeline_func=translation_pipeline, package_path="translation_pipeline.json")

## Run the pipeline

### Run a pipeline

In [None]:

text ="머신러닝에서 파이프라인을 만들기 위한 방법들이 무엇이 있나요 ?"

job = aiplatform.PipelineJob(

    display_name="translation_pipeline",
    template_path="translation_pipeline.json",
    parameter_values = {"text": text,
                        "project": PROJECT_ID},
    pipeline_root=PIPELINE_ROOT,

)

job.run(service_account = SERVICE_ACCOUNT)

Creating PipelineJob
PipelineJob created. Resource name: projects/721521243942/locations/us-central1/pipelineJobs/translation-pipeline-20250221055257
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/721521243942/locations/us-central1/pipelineJobs/translation-pipeline-20250221055257')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/translation-pipeline-20250221055257?project=721521243942
PipelineJob projects/721521243942/locations/us-central1/pipelineJobs/translation-pipeline-20250221055257 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/721521243942/locations/us-central1/pipelineJobs/translation-pipeline-20250221055257 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/721521243942/locations/us-central1/pipelineJobs/translation-pipeline-20250221055257 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/721521243942/locatio

### Enable_caching

In [None]:
text ="머신러닝에서 파이프라인을 만들기 위한 방법들이 무엇이 있나요 ?"

job = aiplatform.PipelineJob(

    display_name="translation_pipeline",
    template_path="translation_pipeline.json",
    parameter_values = {"text": text,
                        "project": PROJECT_ID},
    pipeline_root=PIPELINE_ROOT,
    enable_caching = True

)

job.run(service_account = SERVICE_ACCOUNT)