# Image Classification using Vertex AI Pipeline - Hasan Version

In [157]:
import os

# The Vertex AI Workbench Notebook product has specific requirements
IS_WORKBENCH_NOTEBOOK = os.getenv("DL_ANACONDA_HOME")
IS_USER_MANAGED_WORKBENCH_NOTEBOOK = os.path.exists(
    "/opt/deeplearning/metadata/env_version"
)
# Vertex AI Notebook requires dependencies to be installed with '--user'
USER_FLAG = ""
if IS_WORKBENCH_NOTEBOOK:
    USER_FLAG = "--user"

! pip3 install --upgrade --quiet {USER_FLAG} google-cloud-aiplatform kfp google-cloud-pipeline-components google-cloud-storage

In [158]:
PROJECT_ID = "[your-project-id]"  

if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = ! gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)

Project ID: infra-sublime-368305


In [159]:
! gcloud config set project $PROJECT_ID 

Updated property [core/project].


In [160]:
REGION = "[your-region]"  # @param {type: "string"}

if REGION == "[your-region]":
    REGION = "us-central1"

In [161]:
import random
import string


# Generate a uuid of a specifed length(default=8)
def generate_uuid(length: int = 8) -> str:
    return "".join(random.choices(string.ascii_lowercase + string.digits, k=length))


UUID = generate_uuid()

In [162]:

# If you are running this notebook in Colab, run this cell and follow the
# instructions to authenticate your GCP account. This provides access to your
# Cloud Storage bucket and lets you submit training jobs and prediction
# requests.

import os
import sys

# If on Vertex AI Workbench, then don't execute this code
IS_COLAB = "google.colab" in sys.modules
if not os.path.exists("/opt/deeplearning/metadata/env_version") and not os.getenv(
    "DL_ANACONDA_HOME"
):
    if "google.colab" in sys.modules:
        from google.colab import auth as google_auth

        google_auth.authenticate_user()

    # If you are running this notebook locally, replace the string below with the
    # path to your service account key and run this cell to authenticate your GCP
    # account.
    elif not os.getenv("IS_TESTING"):
        %env GOOGLE_APPLICATION_CREDENTIALS ''

## Create a Cloud Storage bucket

In [163]:
BUCKET_NAME = "vertexai-image-classification-demo"  
BUCKET_URI = f"gs://{BUCKET_NAME}"

In [164]:
if BUCKET_NAME == "" or BUCKET_NAME is None or BUCKET_NAME == "[your-bucket-name]":
    BUCKET_NAME = PROJECT_ID + "aip-" + UUID
    BUCKET_URI = "gs://" + BUCKET_NAME

In [165]:
!gsutil mb -l $REGION -p $PROJECT_ID $BUCKET_URI

Creating gs://vertexai-image-classification-demo/...
ServiceException: 409 A Cloud Storage bucket named 'vertexai-image-classification-demo' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.


In [166]:
! gsutil ls -al $BUCKET_URI

                                 gs://vertexai-image-classification-demo/pipeline_root/


### Service Account

In [167]:
SERVICE_ACCOUNT = "[your-service-account]"  # @param {type:"string"}

if (
    SERVICE_ACCOUNT == ""
    or SERVICE_ACCOUNT is None
    or SERVICE_ACCOUNT == "[your-service-account]"
):
    # Get your service account from gcloud
    if not IS_COLAB:
        shell_output = !gcloud auth list 2>/dev/null
        SERVICE_ACCOUNT = shell_output[2].replace("*", "").strip()

    if IS_COLAB:
        shell_output = ! gcloud projects describe  $PROJECT_ID
        project_number = shell_output[-1].split(":")[1].strip().replace("'", "")
        SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"

    print("Service Account:", SERVICE_ACCOUNT)
     

Service Account: 433111697035-compute@developer.gserviceaccount.com


### Set service account access for Vertex AI Pipelines

In [168]:
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectCreator $BUCKET_URI
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectViewer $BUCKET_URI

No changes made to gs://vertexai-image-classification-demo/
No changes made to gs://vertexai-image-classification-demo/


### Import libraries and define constants

In [169]:
import os
import typing
from typing import Any, Dict, List

import google.cloud.aiplatform as aip
from google_cloud_pipeline_components.types.artifact_types import VertexDataset
import kfp
from kfp.v2 import compiler
from kfp.v2.dsl import component, pipeline, Artifact, ClassificationMetrics, Input, Output, Model, Metrics, Dataset
from kfp.components import create_component_from_func

### Vertex AI Pipelines constants

In [170]:
PIPELINE_ROOT = "{}/pipeline_root/flowers".format(BUCKET_URI)

### Initialize Vertex AI SDK

In [171]:
aip.init(project=PROJECT_ID, staging_bucket=BUCKET_URI, location=REGION)

### AutoML image classification model pipeline

In [172]:
## Light weight component to create an Image DS
@component(
    base_image="python:3.9-slim",
    packages_to_install=["google-api-core==2.10.2", "google-cloud", "google-cloud-aiplatform", "typing", "kfp"],
)
def create_ds(project: str, 
              display_name: str, 
              gcs_source: str, 
              import_schema_uri: str, 
              timeout: int, 
              dataset: Output[Dataset]):
    
    from google.cloud import aiplatform
    from google.cloud.aiplatform import datasets
    from kfp.v2.dsl import Dataset
    
    aiplatform.init(project=project)

    obj_dataset = datasets.ImageDataset.create(
        display_name=display_name,
        gcs_source=gcs_source,
        import_schema_uri=import_schema_uri,
        create_request_timeout=timeout,
    )

    obj_dataset.wait()
    
    dataset.uri = obj_dataset.gca_resource.name
    dataset.metadata = {
        'resourceName': obj_dataset.gca_resource.name
    }

In [173]:
@kfp.dsl.pipeline(name="automl-image-training-v2")
def pipeline(project: str = PROJECT_ID, region: str = REGION):
    
    from google_cloud_pipeline_components import aiplatform as gcc_aip
    from google_cloud_pipeline_components.v1.endpoint import (EndpointCreateOp, ModelDeployOp)

    # ds_op = gcc_aip.ImageDatasetCreateOp(
    #     project=project,
    #     display_name="flowers",
    #     gcs_source="gs://cloud-samples-data/vision/automl_classification/flowers/all_data_v2.csv",
    #     import_schema_uri=aip.schema.dataset.ioformat.image.single_label_classification,
    # )
    
    ds_op = create_ds(
        project=project,
        display_name="flowers",
        gcs_source="gs://cloud-samples-data/vision/automl_classification/flowers/all_data_v2.csv",
        import_schema_uri=aip.schema.dataset.ioformat.image.single_label_classification,
        timeout=3600
    )

    training_job_run_op = gcc_aip.AutoMLImageTrainingJobRunOp(
        project=project,
        display_name="train-automl-flowers",
        prediction_type="classification",
        model_type="CLOUD",
        dataset=ds_op.outputs["dataset"].ignore_type(),
        model_display_name="train-automl-flowers",
        training_fraction_split=0.6,
        validation_fraction_split=0.2,
        test_fraction_split=0.2,
        budget_milli_node_hours=8000,
    )

    endpoint_op = EndpointCreateOp(
        project=project,
        location=region,
        display_name="train-automl-flowers",
    )

    ModelDeployOp(
        model=training_job_run_op.outputs["model"],
        endpoint=endpoint_op.outputs["endpoint"],
        automatic_resources_min_replica_count=1,
        automatic_resources_max_replica_count=1,
    )

### Compile the pipeline

In [174]:
compiler.Compiler().compile(
    pipeline_func=pipeline,
    package_path="image classification_pipeline.json".replace(" ", "_"),
)

### Run the pipeline

In [None]:
DISPLAY_NAME = "flowers_" + UUID

job = aip.PipelineJob(
    display_name=DISPLAY_NAME,
    template_path="image classification_pipeline.json".replace(" ", "_"),
    pipeline_root=PIPELINE_ROOT,
    enable_caching=False,
)

job.run()

! rm image_classification_pipeline.json

Creating PipelineJob
PipelineJob created. Resource name: projects/433111697035/locations/us-central1/pipelineJobs/automl-image-training-v2-20221229154843
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/433111697035/locations/us-central1/pipelineJobs/automl-image-training-v2-20221229154843')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/automl-image-training-v2-20221229154843?project=433111697035
PipelineJob projects/433111697035/locations/us-central1/pipelineJobs/automl-image-training-v2-20221229154843 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/433111697035/locations/us-central1/pipelineJobs/automl-image-training-v2-20221229154843 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/433111697035/locations/us-central1/pipelineJobs/automl-image-training-v2-20221229154843 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob proje

## Get the predictions

In [None]:
def create_batch_prediction_job_sample(
    project: str,
    location: str,
    model_resource_name: str,
    job_display_name: str,
    gcs_source: Union[str, Sequence[str]],
    gcs_destination: str,
    sync: bool = True,
):
    aiplatform.init(project=project, location=location)

    my_model = aiplatform.Model(model_resource_name)

    batch_prediction_job = my_model.batch_predict(
        job_display_name=job_display_name,
        gcs_source=gcs_source,
        gcs_destination_prefix=gcs_destination,
        sync=sync,
    )

    batch_prediction_job.wait()

    print(batch_prediction_job.display_name)
    print(batch_prediction_job.resource_name)
    print(batch_prediction_job.state)
    return batch_prediction_job