<a href="https://colab.research.google.com/github/silverstar0727/ML-Pipeline-Tutorial/blob/main/mnist_simple_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 설치 및 라이브러리 임포트

In [None]:
# 해당 셀을 실행한 후에 반드시 "런타임 다시시작"을 해주세요
!pip install -q kfp

In [1]:
from typing import NamedTuple
import json 

import kfp
from kfp import dsl
from kfp.v2 import compiler
from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output,
                        OutputPath, component, ClassificationMetrics)
from kfp.v2.google.client import AIPlatformClient

## GCP 연결

In [2]:
# gcp 연결
from google.colab import auth as google_auth

google_auth.authenticate_user()

# 경로변수 설정
아래 항목들을 본인 환경에 맞게 수정해주세요.

* PROJECT_ID = <프로젝트 ID>
* REGION = <리전>
* BUCKET_NAME = <bucket 이름>
* USER = <user 이름>

In [13]:
from datetime import datetime

PROJECT_ID = 'mlops-210515'
REGION = "us-central1"

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
BUCKET_NAME = "gs://pipeline-129332"

USER = "JeongMin-Do"
PIPELINE_ROOT = "{}/pipeline_root/{}".format(BUCKET_NAME, USER)

# Simple Pipeline

#### download component

In [31]:
# outputpath를 활용하여 load한 mnist 데이터를 저장합니다.
@component(base_image="tensorflow/tensorflow", output_component_file='download_mnist.yaml')
def download_mnist(output_dir_path: OutputPath("dataset")):
    import tensorflow as tf

    tf.keras.datasets.mnist.load_data(output_dir_path)

#### train component

In [32]:
# inputpath에 위에서 저장한 인자를 넣어 data를 받아오고 model을 저장할 outputpath를 지정합니다.
@component(base_image="tensorflow/tensorflow", output_component_file='train_mnist.yaml')
def train_mnist(data_path: InputPath("dataset"), model_output: OutputPath("Model")):
    import tensorflow as tf
    import numpy as np
    with np.load(data_path, allow_pickle=True) as f:
        x_train, y_train = f['x_train'], f['y_train']
        x_test, y_test = f['x_test'], f['y_test']
    print(x_train.shape)
    print(y_train.shape)

    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(10)
    ])
    model.compile(
        optimizer=tf.keras.optimizers.Adam(0.001),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
    )

    model.fit(
        x_train, y_train,
    )
    results = model.evaluate(x_test, y_test)

    print(f'loss: {results[0]}, acc: {results[1]}')

    model.save(model_output)

#### pipeline 정의

In [33]:
# 파이프라인을 정의합니다.
@dsl.pipeline(
    name = "mnist-simple-1",
    description = "A simple mnist pipeline",
    pipeline_root = PIPELINE_ROOT
)
def tf_mnist_pipeline():
    download_op = download_mnist()
    train_mnist_op = train_mnist(download_op.output)

#### compile & run

In [34]:
compiler.Compiler().compile(
    pipeline_func = tf_mnist_pipeline, 
    package_path = "mnist-simple-1.json"
)
api_client = AIPlatformClient(
    project_id=PROJECT_ID,
    region=REGION,
)
response = api_client.create_run_from_job_spec(
    job_spec_path="mnist-simple-1.json",
)