In [2]:
import kfp
from kfp.v2.compiler import Compiler
from kfp import components as comp
from kfp.v2.google.client import AIPlatformClient
from google.cloud.aiplatform.pipeline_jobs import PipelineJob
from google.cloud.aiplatform import Model

project_id = "cde-ds-enablement-8k1r"
region = "europe-west1"
pipeline_root_path = "gs://cde-dse-test-artifacts/penguin-model"


def _train_model():
    """Trains our penguin model."""
    
    import os
    from pathlib import Path
    
    # from google.cloud import storage
    from google.cloud import bigquery
    
    import joblib
    from sklearn.compose import ColumnTransformer
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.preprocessing import OneHotEncoder
    from sklearn.pipeline import Pipeline

    client = bigquery.Client(project=os.environ["CLOUD_ML_PROJECT_ID"])    
    data = client.query("SELECT * FROM `bigquery-public-data`.ml_datasets.penguins").to_dataframe()
    
    train = data[["species", "island", "sex"]].dropna().loc[lambda row: row.sex != "."]

    x = train.drop("sex", axis=1)
    y = train["sex"].map({'MALE':0, 'FEMALE':1}).astype(int)

    model = Pipeline(
        steps=[
            (
                "feature_engineering", 
                ColumnTransformer(
                  transformers=[("one_hot", OneHotEncoder(), ["species", "island"])],
                  remainder="drop"
                )
            ),
            (
                "model",
                RandomForestClassifier()
            )       
        ]
    )

    model.fit(x, y)
    
    # Save model artifact to local filesystem (doesn't persist)
    with Path("/gcs/cde-dse-test-outputs/model.pkl").open("wb") as file_:
        joblib.dump(model, file_)


train_model = comp.create_component_from_func(
    _train_model,
    packages_to_install=[
        "scikit-learn", 
        "google-cloud-bigquery", 
        "google-cloud-storage", 
        "pandas", 
        "pyarrow"
    ]
)
    

@kfp.dsl.pipeline(
    name="penguins",
    pipeline_root=pipeline_root_path)
def pipeline():
    train_model()

    
Compiler().compile(
    pipeline_func=pipeline, 
    package_path='pipeline.json'
)


job = PipelineJob(
    display_name="My first penguin pipeline",
    enable_caching=False,
    template_path="pipeline.json",
    parameter_values={},
    pipeline_root=pipeline_root_path,
    location=region,
)

job.run(service_account="cde-dse-test-pipeline@cde-ds-enablement-8k1r.iam.gserviceaccount.com")

INFO:google.cloud.aiplatform.pipeline_jobs:Creating PipelineJob
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob created. Resource name: projects/409590257761/locations/europe-west1/pipelineJobs/penguins-20211027121754
INFO:google.cloud.aiplatform.pipeline_jobs:To use this PipelineJob in another session:
INFO:google.cloud.aiplatform.pipeline_jobs:pipeline_job = aiplatform.PipelineJob.get('projects/409590257761/locations/europe-west1/pipelineJobs/penguins-20211027121754')
INFO:google.cloud.aiplatform.pipeline_jobs:View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/europe-west1/pipelines/runs/penguins-20211027121754?project=409590257761
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob projects/409590257761/locations/europe-west1/pipelineJobs/penguins-20211027121754 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob projects/409590257761/locations/europe-west1/pipelineJobs/penguins-20211027121754 curr

In [2]:
%pip install kfp google-cloud-aiplatform

Collecting kfp
  Downloading kfp-1.8.6.tar.gz (266 kB)
     |████████████████████████████████| 266 kB 11.8 MB/s            
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting absl-py<=0.11,>=0.9
  Downloading absl_py-0.11.0-py3-none-any.whl (127 kB)
     |████████████████████████████████| 127 kB 51.4 MB/s            
[?25hCollecting PyYAML<6,>=5.3
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
     |████████████████████████████████| 636 kB 52.4 MB/s            
Collecting google-api-python-client<2,>=1.7.8
  Downloading google_api_python_client-1.12.8-py2.py3-none-any.whl (61 kB)
     |████████████████████████████████| 61 kB 47 kB/s              
[?25hCollecting google-auth<2,>=1.6.1
  Downloading google_auth-1.35.0-py2.py3-none-any.whl (152 kB)
     |████████████████████████████████| 152 kB 60.9 MB/s            
[?25hCollecting requests-toolbelt<1,>=0.8.0
  Downloading requests_toolbelt-0.9.1-py2.py3-none-any.whl (54 kB)
     |████████████████████