In [None]:
# Copyright 2024 Forusone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. 

## KFP modularization

In [1]:
# Install the packages
%pip install --user --quiet google-cloud-aiplatform \
                         google-cloud-storage \
                         google-cloud-pipeline-components \
                         kfp


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
! python3 -c "import kfp; print('KFP SDK version: {}'.format(kfp.__version__))"
! python3 -c "import google_cloud_pipeline_components; print('google_cloud_pipeline_components version: {}'.format(google_cloud_pipeline_components.__version__))"

KFP SDK version: 2.10.1
google_cloud_pipeline_components version: 2.19.0


In [3]:
import sys
from IPython.display import Markdown, display

PROJECT_ID="ai-hangsik"
LOCATION="us-central1"

# For only colab user, no need this process for Colab Enterprise in Vertex AI.
if "google.colab" in sys.modules:
    from google.colab import auth
    auth.authenticate_user(project_id=PROJECT_ID)

# set project.
!gcloud config set project {PROJECT_ID}

Updated property [core/project].


In [4]:
import json
from typing import NamedTuple

from google.cloud import aiplatform
from kfp import compiler, dsl
from kfp.dsl import component
from google.cloud import aiplatform, bigquery

aiplatform.init(project=PROJECT_ID, location=LOCATION)

In [5]:
# Create a bucket.
BUCKET_URI = f"gs://mlops-poc-0303"
! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI}

Creating gs://mlops-poc-0303/...


In [11]:
PIPELINE_ROOT = f"{BUCKET_URI}/pipeline/"

In [6]:
shell_output = ! gcloud projects describe  $PROJECT_ID
project_number = shell_output[-1].split(":")[1].strip().replace("'", "")

SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"
print(f"SERVICE_ACCOUNT: {SERVICE_ACCOUNT}")

SERVICE_ACCOUNT: 721521243942-compute@developer.gserviceaccount.com


In [7]:
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectCreator $BUCKET_URI
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectViewer $BUCKET_URI
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectViewerroles/logging.logWriter

### Modules

In [19]:
@component(base_image="python:3.10")
def data_prep(text: str) -> str:
    
    print(f"### data_prep: {text}")
    
    return text

compiler.Compiler().compile(data_prep, "data_prep.yaml")

In [20]:
@component(base_image="python:3.10")
def training(text: str) -> str:
    
    print(f"### training: {text}")
    
    return text

compiler.Compiler().compile(training, "training.yaml")

In [21]:
@component(base_image="python:3.10")
def serving(text: str) -> str:
    
    print(f"### serving: {text}")
    
    return text

compiler.Compiler().compile(serving, "serving.yaml")

In [22]:
@component(base_image="python:3.10")
def option(text: str) -> str:
    
    print(f"### option: {text}")
    
    return text

compiler.Compiler().compile(option, "option.yaml")

In [15]:
# @dsl.pipeline(
#     name="module-pipeline",
#     description="pipeline for modularization",
#     pipeline_root=PIPELINE_ROOT,
# )

# def pipeline(text: str,):
    
#     module1_output = module1(text=text)
#     module2_output = module2(text=module1_output.output)

# compiler.Compiler().compile(pipeline_func=translation_pipeline, package_path="module_pipeline.yaml")

In [16]:
# text = "Moudule pipeline test"

# job = aiplatform.PipelineJob(

#     display_name="module_pipeline",
#     template_path="module_pipeline.yaml",
#     parameter_values = {"text": text,},
#     pipeline_root=PIPELINE_ROOT,
# )

# job.run(service_account = SERVICE_ACCOUNT)

Creating PipelineJob
PipelineJob created. Resource name: projects/721521243942/locations/us-central1/pipelineJobs/module-pipeline-20250305034604
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/721521243942/locations/us-central1/pipelineJobs/module-pipeline-20250305034604')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/module-pipeline-20250305034604?project=721521243942
PipelineJob projects/721521243942/locations/us-central1/pipelineJobs/module-pipeline-20250305034604 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/721521243942/locations/us-central1/pipelineJobs/module-pipeline-20250305034604 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/721521243942/locations/us-central1/pipelineJobs/module-pipeline-20250305034604 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/721521243942/locations/us-central1/pipelineJobs/mo