<a href="https://colab.research.google.com/github/silverstar0727/ML-Pipeline-Tutorial/blob/main/vertexai_pipeline_tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 설치 및 라이브러리 임포트

In [1]:
# 해당 셀을 실행한 후에 반드시 "런타임 다시시작"을 해주세요
!pip install -q kfp

In [2]:
from typing import NamedTuple
import json 

import kfp
from kfp import dsl
from kfp.v2 import compiler
from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output,
                        OutputPath, component, ClassificationMetrics)
from kfp.v2.google.client import AIPlatformClient

## GCP 연결

In [3]:
# gcp 연결
from google.colab import auth as google_auth

google_auth.authenticate_user() # 사용할 gcp 계정으로 연결해주세요

# 경로변수 설정
아래 항목들을 본인 환경에 맞게 수정해주세요.

* PROJECT_ID = <프로젝트 ID>
* REGION = <리전>
* BUCKET_NAME = <bucket 이름>
* USER = <user 이름>

In [4]:
from datetime import datetime

PROJECT_ID = 'mlops-210515'
REGION = "us-central1"

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
BUCKET_NAME = "gs://pipeline-129332"

USER = "JeongMin-Do"
PIPELINE_ROOT = "{}/pipeline_root/{}".format(BUCKET_NAME, USER)

## Simple Sum

In [5]:
@component
def sum_(a: int, b: int, c: int) -> int:
    result = a + b + c
    return result

@dsl.pipeline(
    name = "simple-sum",
    description = "A simple sum pipeline",
    pipeline_root=PIPELINE_ROOT
)
def sum_pipeline(a: int, b: int, c: int):
    result1 = sum_(a, 1, 1)
    result2 = sum_(b, 2, 2)
    result3 = sum_(c, 3, 3)
    result4 = sum_(result1.output, result2.output, result3.output)
    return result4

compiler.Compiler().compile(
    pipeline_func = sum_pipeline, 
    package_path = "sum.json"
)

api_client = AIPlatformClient(
    project_id=PROJECT_ID,
    region=REGION,
)

response = api_client.create_run_from_job_spec(
    job_spec_path="sum.json", 
    parameter_values = {"a": 1, "b": 2, "c": 3}
)

## Hello World (I/O)

In [6]:
@component
def hello_world(text: str) -> str:
    print(text)
    return text


@component(packages_to_install=["google-cloud-storage"])
def two_outputs(text: str) -> NamedTuple("Outputs", [("output_one", str), ("output_two", str)]):
    from google.cloud import storage

    o1 = f"output one from text: {text}"
    o2 = f"output two from text: {text}"
    print("output one: {}; output_two: {}".format(o1, o2))
    return (o1, o2)


@component
def consumer(text1: str, text2: str, text3: str):
    print(f"text1: {text1}; text2: {text2}; text3: {text3}")

@dsl.pipeline(
    name="hello-world-v2",
    description="A simple intro pipeline",
    pipeline_root=PIPELINE_ROOT,
)
def intro_pipeline(text: str = "hi there"):
    hw_task = hello_world(text)
    two_outputs_task = two_outputs(text)
    consumer_task = consumer( 
        hw_task.output,
        two_outputs_task.outputs["output_one"],
        two_outputs_task.outputs["output_two"],
    )

compiler.Compiler().compile(
    pipeline_func=intro_pipeline, 
    package_path="hw_pipeline_job.json"
)

api_client = AIPlatformClient(
    project_id=PROJECT_ID,
    region=REGION,
)

response = api_client.create_run_from_job_spec(
    job_spec_path="hw_pipeline_job.json"
)

## Flip Coin(condition)

In [13]:
@component
def args_generator_op() -> str:
    import json

    return json.dumps(
        [{"cats": "1", "dogs": "2"}, {"cats": "10", "dogs": "20"}],
        sort_keys=True,
    )


@component
def print_op(msg: str):
    print(msg)


@component
def flip_coin_op() -> str:
    """Flip a coin and output heads or tails randomly."""
    import random

    result = "heads" if random.randint(0, 1) == 0 else "tails"
    return result


@dsl.pipeline(
    name="flipcoin",
    pipeline_root=PIPELINE_ROOT,
)
def my_pipeline(
    json_string: str = json.dumps(
        [
            {
                "snakes": "anaconda",
                "lizards": "anole",
                "bunnies": [{"cottontail": "bugs"}, {"cottontail": "thumper"}],
            },
            {
                "snakes": "cobra",
                "lizards": "gecko",
                "bunnies": [{"cottontail": "roger"}],
            },
            {
                "snakes": "boa",
                "lizards": "iguana",
                "bunnies": [
                    {"cottontail": "fluffy"},
                    {"fuzzy_lop": "petunia", "cottontail": "peter"},
                ],
            },
        ],
        sort_keys=True,
    )
):
    flip1 = flip_coin_op()

    with dsl.Condition(
        flip1.output != "no-such-result", name="alwaystrue"
    ):  # always true

        args_generator = args_generator_op()
        
        # {"cats": "1", "dogs": "2"}, {"cats": "10", "dogs": "20"}를 두번 반복하는 loop
        with dsl.ParallelFor(args_generator.output) as item:
            print_op(json_string)
            
            with dsl.Condition(flip1.output == "heads", name="heads"):
                print_op(item.cats)
            
            with dsl.Condition(flip1.output == "tails", name="tails"):
                print_op(item.dogs)

'''
    with dsl.ParallelFor(json_string) as item:
        with dsl.Condition(item.snakes == "boa", name="snakes"):
            print_op(item.snakes)
            print_op(item.lizards)
            print_op(item.bunnies)

    # it is possible to access sub-items
    with dsl.ParallelFor(json_string) as item:
        with dsl.ParallelFor(item.bunnies) as item_bunnies:
            print_op(item_bunnies.cottontail)
'''

compiler.Compiler().compile(
    pipeline_func = my_pipeline, package_path = "flipcoin.json"
)

api_client = AIPlatformClient(
    project_id=PROJECT_ID,
    region=REGION,
)

response = api_client.create_run_from_job_spec(
    job_spec_path="flipcoin.json", pipeline_root=PIPELINE_ROOT
)