In [None]:
from google.cloud import aiplatform

job = aiplatform.CustomContainerTrainingJob(
    display_name="train-penguin-model",
    command=[
        "penguin-model", 
        "bigquery-public-data.ml_datasets.penguins"
    ],
    container_uri="europe-west1-docker.pkg.dev/cde-ds-enablement-8k1r/vertex/penguin_model",
    staging_bucket="gs://cde-dse-penguin-artifacts",
    location="europe-west1",
    model_serving_container_image_uri="europe-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.0-24:latest"
)

job

In [None]:
job.run(
    model_display_name="penguin-model",
    service_account="pipeline-penguin@cde-ds-enablement-8k1r.iam.gserviceaccount.com"
)

In [None]:
job.run?

In [None]:
from typing import Optional, NamedTuple

import kfp
from kfp.v2.google import experimental
import kfp.components as comp
from kfp.v2 import compiler  # noqa: F811
from kfp.v2.dsl import (
    component,
    Input,
    InputPath,
    OutputPath,
    Output,
    Dataset,
    Metrics,
)


@component(
    base_image="python:3.9-slim",
    packages_to_install=["google-cloud-bigquery", "pandas", "pyarrow"],
    output_component_file="query.yaml"
)
def query(
    query: str, output_path: Dataset["Parquet"], project_id: Optional[str] = None
) -> None:
    """Calculates sum of two arguments"""

    from google.cloud import bigquery

    client = bigquery.Client(project=project_id)
    job = client.query(query)

    df = job.to_dataframe()
    df.to_parquet(output_path)


# query = comp.create_component_from_func(
#     _query,
#     base_image="python:3.9-slim",
#     packages_to_install=["google-cloud-bigquery", "pandas", "pyarrow"],
#     output_component_file="query.yaml",
# )


train_model = comp.load_component_from_text(
    """
name: Train model
description: Trains our model

inputs:
- {name: train_dataset, type: Parquet, description: 'Train dataset'}

outputs:
- {name: model, type: Model, description: 'Output model'}

implementation:
  container:
    image: europe-west1-docker.pkg.dev/cde-ds-enablement-8k1r/vertex/penguin_model
    command: [
      penguin-model, 
      {inputPath: train_dataset},
      {outputPath: model}
    ]
"""
)

@component(
    base_image="python:3.9-slim",
    output_component_file="eval_model.yaml"
)
def eval_model(model_path: InputPath("Model"), metrics: Output[Metrics]) -> NamedTuple(
    'EvalModelOutput',
    [
      ('roc', float)
    ]):
  print(model_path)

  metrics.log_metric("roc", 0.9)
  

@kfp.dsl.pipeline(name="penguin")
def pipeline():

    query_task = query(
        "SELECT * FROM bigquery-public-data.ml_datasets.penguins",
        project_id="cde-ds-enablement-8k1r",
    )

    train_task = (
        train_model(query_task.outputs["output_path"])
        # TODO: Figure out how to set constaints in the v2 API.
        # Docs: https://www.kubeflow.org/docs/distributions/gke/pipelines/enable-gpu-and-tpu/
        # .set_gpu_limit(1).add_node_selector_constraint(
        #     "cloud.google.com/gke-accelerator", "nvidia-tesla-k80"
        # )
    )

    eval_task = eval_model(train_task.outputs["model"])

    # model_upload_op = gcc_aip.ModelUploadOp(
    #     project=project,
    #     display_name=model_display_name,
    #     artifact_uri=WORKING_DIR,
    #     serving_container_image_uri=serving_container_image_uri,
    #     # serving_container_environment_variables={"NOT_USED": "NO_VALUE"},
    # )
    # model_upload_op.after(train_task)


compiler.Compiler().compile(
    pipeline_func=pipeline,
    package_path="pipeline.json",
)


In [None]:
from google.cloud.aiplatform.pipeline_jobs import PipelineJob

job = PipelineJob(
    display_name="penguins",
    enable_caching=False,
    template_path="pipeline.json",
    parameter_values={},
    pipeline_root="gs://cde-dse-penguin-artifacts/pipelines",
    location="europe-west1",
)

job.run(service_account="pipeline-penguin@cde-ds-enablement-8k1r.iam.gserviceaccount.com")

In [None]:
job.run(
    model_display_name="penguin-model",
    service_account="pipeline-penguin@cde-ds-enablement-8k1r.iam.gserviceaccount.com"
)

In [None]:
PipelineJob?

In [None]:
experimental.run_as_aiplatform_custom_job?