In [1]:
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Pipelines: Iris Dataset Image classification pipelines using google-cloud-pipeline-components and AutoML

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/shivajid/MLOpsCICD/blob/master/notebooks/Irisflowersautomlkubeflowpieline.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Run in Colab
    </a>
  </td>
  <td>
    <a href="https://github.com/shivajid/MLOpsCICD/blob/master/notebooks/Irisflowersautomlkubeflowpieline.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
  <td>
    <a href="https://console.cloud.google.com/ai/platform/notebooks/deploy-notebook?download_url=https://github.com/shivajid/MLOpsCICD/blob/master/notebooks/Irisflowersautomlkubeflowpieline.ipynb">
      Open in Google Cloud Notebooks
    </a>
  </td>
</table>
<br/><br/><br/>

## Overview

This is notebook is part of a 4 day workshop. This notebook shows how you can run a pipeline to train using Vertex AI Pipelines.


### Dataset

The dataset used is IRIS Dataset for flower classification


### Objective
In this tutorial, you create a custom image classification using a pipeline with components from  `google_cloud_pipeline_components`. This is just pre-cursor to the rest of the tutorial. We will use the same template of the code to run the next steps of automated training.

The steps performed include:

- Pull from an existing `Dataset` resource.
- Train a Custom `Model` resource.
- Create an `Endpoint` resource.
- Deploy the `Model` resource to the `Endpoint` resource.

The components are [documented here](https://google-cloud-pipeline-components.readthedocs.io/en/latest/google_cloud_pipeline_components.aiplatform.html#module-google_cloud_pipeline_components.aiplatform).



### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI
pricing](https://cloud.google.com/vertex-ai/pricing) and [Cloud Storage
pricing](https://cloud.google.com/storage/pricing), and use the [Pricing
Calculator](https://cloud.google.com/products/calculator/)
to generate a cost estimate based on your projected usage.

### Set up your local development environment

If you are using Colab or Google Cloud Notebook, your environment already meets all the requirements to run this notebook. You can skip this step.

Otherwise, make sure your environment meets this notebook's requirements. You need the following:

- The Cloud Storage SDK
- Git
- Python 3
- virtualenv
- Jupyter notebook running in a virtual environment with Python 3

The Cloud Storage guide to [Setting up a Python development environment](https://cloud.google.com/python/setup) and the [Jupyter installation guide](https://jupyter.org/install) provide detailed instructions for meeting these requirements. The following steps provide a condensed set of instructions:

1. [Install and initialize the SDK](https://cloud.google.com/sdk/docs/).

2. [Install Python 3](https://cloud.google.com/python/setup#installing_python).

3. [Install virtualenv](Ihttps://cloud.google.com/python/setup#installing_and_using_virtualenv) and create a virtual environment that uses Python 3.

4. Activate that environment and run `pip3 install Jupyter` in a terminal shell to install Jupyter.

5. Run `jupyter notebook` on the command line in a terminal shell to launch Jupyter.

6. Open this notebook in the Jupyter Notebook Dashboard.


### Installation

Install the latest version of Vertex SDK for Python

In [1]:
import os

# Google Cloud Notebook
if os.path.exists("/opt/deeplearning/metadata/env_version"):
    USER_FLAG = "--user"
else:
    USER_FLAG = ""

In [3]:
# Run this cells if you have not installed the sdks before.

! pip3 install --upgrade google-cloud-aiplatform $USER_FLAG
! pip3 install $USER kfp google-cloud-pipeline-components==0.1.7 --upgrade

import os

if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

Collecting google-cloud-pipeline-components==0.1.7
  Downloading google_cloud_pipeline_components-0.1.7-py3-none-any.whl (128 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m128.7/128.7 KB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
Collecting google-api-core<2dev,>=1.26.0
  Downloading google_api_core-1.31.5-py2.py3-none-any.whl (93 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.3/93.3 KB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
Collecting google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5
  Downloading google_api_core-2.7.0-py3-none-any.whl (114 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.7/114.7 KB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Downloading google_api_core-2.6.1-py3-none-any.whl (114 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.6/114.6 KB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Downloading google_api_core-2.6.0-py2.py3-none-any.whl (11

In [1]:
#Check the version of the KFP

! python3 -c "import kfp; print('KFP SDK version: {}'.format(kfp.__version__))"
! python3 -c "import google_cloud_pipeline_components; print('google_cloud_pipeline_components version: {}'.format(google_cloud_pipeline_components.__version__))"

KFP SDK version: 1.8.11
google_cloud_pipeline_components version: 0.1.7


## Before you begin

### GPU runtime

This tutorial does not require a GPU runtime.

### Set up your Google Cloud project

**The following steps are required, regardless of your notebook environment.**

1. [Select or create a Google Cloud project](https://console.cloud.google.com/cloud-resource-manager). When you first create an account, you get a $300 free credit towards your compute/storage costs.

2. [Make sure that billing is enabled for your project.](https://cloud.google.com/billing/docs/how-to/modify-project)

3. [Enable the Vertex AI APIs, Compute Engine APIs, and Cloud Storage.](https://console.cloud.google.com/flows/enableapi?apiid=ml.googleapis.com,compute_component,storage-component.googleapis.com)

4. [The Google Cloud SDK](https://cloud.google.com/sdk) is already installed in Google Cloud Notebook.

5. Enter your project ID in the cell below. Then run the  cell to make sure the
Cloud SDK uses the right project for all the commands in this notebook.

**Note**: Jupyter runs lines prefixed with `!` as shell commands, and it interpolates Python variables prefixed with `$`.

### Pipeline Code Begins here

In next steps we will create a pipeline

In [2]:
#Import the needed packages
import kfp
from kfp.v2 import compiler
from kfp.v2.google.client import AIPlatformClient
from google.cloud import aiplatform
from google.cloud import aiplatform as aip
from google_cloud_pipeline_components import aiplatform as gcc_aip


In [3]:
#Change the project and the storage bucket that you want to use. You can reuse an old bucket or create a new bucke. Look at the Google Cloud documenation.
aip.init(project="[Your Project Name]", staging_bucket="[Your Storage Bucker]")

### Build the Pipeline

It Creates a very simple pipeline of 
- Creating a Dataset
- Creating a Training Job
- Deploying the Model


In [3]:
PROJECT_ID="[Your Project Name]"
@kfp.dsl.pipeline(name="[Your Pipeline Name]")
def pipeline(project: str = PROJECT_ID):
    
    #Create the dataset
    ds_op = gcc_aip.ImageDatasetCreateOp(
        project=project,
        display_name="flowers",
        gcs_source="gs://cloud-samples-data/vision/automl_classification/flowers/all_data_v2.csv",
        import_schema_uri=aip.schema.dataset.ioformat.image.single_label_classification,
    )

    #Train the pipeline
    training_job_run_op = gcc_aip.AutoMLImageTrainingJobRunOp(
        project=project,
        display_name="train-automl-flowers",
        prediction_type="classification",
        model_type="CLOUD",
        base_model=None,
        dataset=ds_op.outputs["dataset"],
        model_display_name="train-automl-flowers",
        training_fraction_split=0.7,
        validation_fraction_split=0.2,
        test_fraction_split=0.1,
        budget_milli_node_hours=8000,
    )
    # 0.1.7 is needed currently to address a bug in the latest container image
    gcc_aip.ModelDeployOp.component_spec.implementation.container.image = ("gcr.io/ml-pipeline/google-cloud-pipeline-components:0.1.7")
 
    #Deploy the model
    endpoint_op = gcc_aip.ModelDeployOp(  
         model=training_job_run_op.outputs["model"],
         project=PROJECT_ID
    )
  

### Compile the pipeline

In [46]:
from kfp.v2 import compiler

compiler.Compiler().compile(
    pipeline_func=pipeline,
    package_path="image classification_pipeline.json".replace(" ", "_"),
)

### Create a job config 

In [51]:
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
DISPLAY_NAME = "flowers_" + TIMESTAMP
PIPELINE_ROOT="gs://demogct/vipipelines/"

job = aip.PipelineJob(
    display_name=DISPLAY_NAME,
    template_path="image classification_pipeline.json".replace(" ", "_"),
    pipeline_root=PIPELINE_ROOT,
)


### Execute the Pipeline

In [None]:
job.run()

INFO:google.cloud.aiplatform.pipeline_jobs:Creating PipelineJob
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob created. Resource name: projects/313681173937/locations/us-central1/pipelineJobs/automl-image-training-v2-20211030074450
INFO:google.cloud.aiplatform.pipeline_jobs:To use this PipelineJob in another session:
INFO:google.cloud.aiplatform.pipeline_jobs:pipeline_job = aiplatform.PipelineJob.get('projects/313681173937/locations/us-central1/pipelineJobs/automl-image-training-v2-20211030074450')
INFO:google.cloud.aiplatform.pipeline_jobs:View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/automl-image-training-v2-20211030074450?project=313681173937
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob projects/313681173937/locations/us-central1/pipelineJobs/automl-image-training-v2-20211030074450 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob projects/313681173937/loca

## Cleaning up


