In [10]:
import kfp.dsl as dsl
import kfp
from kfp.v2 import compiler
from kfp.v2.google.client import AIPlatformClient
from kfp.v2.dsl import component,Model,Output,Dataset
from typing import List,Union,Tuple,NamedTuple
from kfp.v2.dsl import Input, Model, Output, Dataset, Metrics, ClassificationMetrics, component, Artifact
from typing import *

In [11]:
@component(
    base_image='gcr.io/deeplearning-platform-release/sklearn-cpu:latest'
)
def dump_data_op(
    output_dataset: Output[Dataset]
)->None:
    
    from sklearn.datasets import load_wine
    import pandas as pd
    import os
    import pickle

    X, y = load_wine(return_X_y=True)
    y = y == 1

    os.makedirs(output_dataset.path, exist_ok=True)
    with open(os.path.join(output_dataset.path, 'x.pkl'), "wb") as f:
        pickle.dump(pd.DataFrame(X), f)
    with open(os.path.join(output_dataset.path, 'y.pkl'), "wb") as f:
        pickle.dump(pd.DataFrame(y), f)
    
    


In [12]:
PROJECT_ID = "XXXX"
BUCKET_NAME = 'XXXX'
PIPELINE_NAME = 'pipeline-test-dockernise-models'
PIPELINE_ROOT = 'gs://XXXX'
@dsl.pipeline(
    pipeline_root=PIPELINE_ROOT,
    name=PIPELINE_NAME,
)
def pipeline():
    data = dump_data_op()
    model_build_op = kfp.components.load_component_from_file('model_build_component.yaml')
    model_build_op(input_dataset_path=data.outputs['output_dataset'],n_estimators=2)

In [None]:
from kfp.v2 import compiler  # noqa: F811

compiler.Compiler().compile(
    pipeline_func=pipeline,
    package_path=f"{PIPELINE_NAME}.json",
)

In [None]:
from google.cloud import aiplatform
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
job = aiplatform.PipelineJob(
    display_name="custom-test-dockernise-models-pipeline",
    template_path=f"{PIPELINE_NAME}.json",
    job_id="test-dockernise-models-pipeline-{}".format(TIMESTAMP),
    pipeline_root=PIPELINE_ROOT,
    enable_caching=True,
)

job.submit()
