In [1]:
from google.cloud import aiplatform
from google_cloud_pipeline_components import aiplatform as gcc_aip


from kfp.dsl import pipeline
from kfp.v2 import compiler
from kfp.v2.google.client import AIPlatformClient
import os

In [2]:
PROJECT_ID = "famous-momentum-338708"
REGION = 'us-central1'
GCBUCKET = "gs://long_cat_classification"
PIPELINE_ROOT = os.path.join(GCBUCKET, "pipeline")
DATASET_META_PATH = "gs://long_cat_classification/annotations/export-data-animal_faces-2022-02-11T05:08:16.516040Z/image_classification_single_label/animal_faces_icn-3222541037662633984/data-00001-of-00001.jsonl"
PIPELINE_SPEC_PATH = os.path.join("./", "pipeline", "spec", "pipeline.json")

In [3]:
@pipeline(name="cat-classfication-pipeline-v2")
def pipeline(project:str=PROJECT_ID, region:str=REGION):
    image_ds_op = gcc_aip.ImageDatasetCreateOp(
        project=project,
        display_name="cat_classification_ds",
        gcs_source=f"{DATASET_META_PATH}",
        import_schema_uri=aiplatform.schema.dataset.ioformat.image.single_label_classification 
    )
    # https://googleapis.dev/python/aiplatform/latest/aiplatform.html#google.cloud.aiplatform.AutoMLImageTrainingJob    
    train_job_op = gcc_aip.AutoMLImageTrainingJobRunOp(
        project=project,
        display_name="cat_automl_training",         
        prediction_type="classification",
        model_type="CLOUD", # Auto ML automatically finds which model to use (Latency vs Accuracy)
        dataset=image_ds_op.outputs["dataset"], # Dataset to train on
        model_display_name="cat_classification_automl",        
        training_fraction_split=0.7,
        validation_fraction_split=0.15,
        test_fraction_split=0.15,
        budget_milli_node_hours=8000 # When to stop training
    )
    
    # Create endpoiant and deploy model to endpoint
    endpoint_op = gcc_aip.EndpointCreateOp(
        project=project,        
        location=region,
        display_name="train_cat_automl"
    )
    
    gcc_aip.ModelDeployOp(
        model=train_job_op.outputs["model"],
        endpoint=endpoint_op.outputs["endpoint"],
        automatic_resources_min_replica_count=1,
        automatic_resources_max_replica_count=1
    )

In [4]:
compiler.Compiler().compile(
    pipeline_func=pipeline, package_path=f"{PIPELINE_SPEC_PATH}"    
)

api_client = AIPlatformClient(project_id=PROJECT_ID, region=REGION)

response = api_client.create_run_from_job_spec(
    f"{PIPELINE_SPEC_PATH}",
    pipeline_root=f"{PIPELINE_ROOT}",
    parameter_values={"project": PROJECT_ID}
)

