In [None]:
# Copyright 2024 Forusone (shins777@gmail.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# AutoML tabular training - Classification
This notebook is simplified version of the below notebook in the official Google github. You can find more divese codes and detailed information from the link.
*  https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/automl/automl-tabular-classification.ipynb
*  https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/automl/


### Install Python packages


In [1]:
! pip3 install --quiet --upgrade google-cloud-aiplatform \
                                 google-cloud-storage

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.5/6.5 MB[0m [31m41.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.8/131.8 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25h

### Set configuration

#### Authentication to access to the GCP

In [2]:
import sys
from IPython.display import Markdown, display

PROJECT_ID="ai-hangsik"
LOCATION="asia-northeast3"

# For only colab user, no need this process for Colab Enterprise in Vertex AI.
if "google.colab" in sys.modules:
    from google.colab import auth
    auth.authenticate_user(project_id=PROJECT_ID)

# set project.
!gcloud config set project {PROJECT_ID}


Updated property [core/project].


#### Initialize Vertex AI SDK

In [5]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=LOCATION)

### Data preparation

#### Create a bucket

In [3]:
# Create a bucket.
BUCKET_URI = f"gs://mlops-{PROJECT_ID}-1207"
! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI}

Creating gs://mlops-ai-hangsik-1207/...


### Copy dta set into the bucket

In [4]:
IMPORT_FILE = "petfinder-tabular-classification.csv"
! gsutil cp gs://cloud-samples-data/ai-platform-unified/datasets/tabular/{IMPORT_FILE} {BUCKET_URI}/data/

gcs_source = f"{BUCKET_URI}/data/{IMPORT_FILE}"

Copying gs://cloud-samples-data/ai-platform-unified/datasets/tabular/petfinder-tabular-classification.csv [Content-Type=text/csv]...
- [1 files][872.8 KiB/872.8 KiB]                                                
Operation completed over 1 objects/872.8 KiB.                                    


### Create a Managed tabular dataset from a CSV

In [6]:
ds = dataset = aiplatform.TabularDataset.create(
    display_name="petfinder-tabular-dataset",
    gcs_source=gcs_source,
)

ds.resource_name

INFO:google.cloud.aiplatform.datasets.dataset:Creating TabularDataset
INFO:google.cloud.aiplatform.datasets.dataset:Create TabularDataset backing LRO: projects/721521243942/locations/asia-northeast3/datasets/5353566096106455040/operations/2516156493863059456
INFO:google.cloud.aiplatform.datasets.dataset:TabularDataset created. Resource name: projects/721521243942/locations/asia-northeast3/datasets/5353566096106455040
INFO:google.cloud.aiplatform.datasets.dataset:To use this TabularDataset in another session:
INFO:google.cloud.aiplatform.datasets.dataset:ds = aiplatform.TabularDataset('projects/721521243942/locations/asia-northeast3/datasets/5353566096106455040')


'projects/721521243942/locations/asia-northeast3/datasets/5353566096106455040'

### Model training

In [7]:
job = aiplatform.AutoMLTabularTrainingJob(
    display_name="train-petfinder-automl-1",
    optimization_prediction_type="classification",
    column_transformations=[
        {"categorical": {"column_name": "Type"}},
        {"numeric": {"column_name": "Age"}},
        {"categorical": {"column_name": "Breed1"}},
        {"categorical": {"column_name": "Color1"}},
        {"categorical": {"column_name": "Color2"}},
        {"categorical": {"column_name": "MaturitySize"}},
        {"categorical": {"column_name": "FurLength"}},
        {"categorical": {"column_name": "Vaccinated"}},
        {"categorical": {"column_name": "Sterilized"}},
        {"categorical": {"column_name": "Health"}},
        {"numeric": {"column_name": "Fee"}},
        {"numeric": {"column_name": "PhotoAmt"}},
    ],
)

# This takes about an hour to run
model = job.run(
    dataset=ds,
    target_column="Adopted",
    training_fraction_split=0.8,
    validation_fraction_split=0.1,
    test_fraction_split=0.1,
    model_display_name="adopted-prediction-model",
    disable_early_stopping=False,
)

INFO:google.cloud.aiplatform.training_jobs:View Training:
https://console.cloud.google.com/ai/platform/locations/asia-northeast3/training/3291437535770705920?project=721521243942
INFO:google.cloud.aiplatform.training_jobs:AutoMLTabularTrainingJob projects/721521243942/locations/asia-northeast3/trainingPipelines/3291437535770705920 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.training_jobs:AutoMLTabularTrainingJob projects/721521243942/locations/asia-northeast3/trainingPipelines/3291437535770705920 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.training_jobs:AutoMLTabularTrainingJob projects/721521243942/locations/asia-northeast3/trainingPipelines/3291437535770705920 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.training_jobs:AutoMLTabularTrainingJob projects/721521243942/locations/asia-northeast3/trainingPipelines/3291437535770705920 current state:
PipelineState.PIPELINE_STATE_RUNNI

### Create endpoint

In [8]:

endpoint = model.deploy(
    machine_type="n1-standard-4",
)


INFO:google.cloud.aiplatform.models:Creating Endpoint
INFO:google.cloud.aiplatform.models:Create Endpoint backing LRO: projects/721521243942/locations/asia-northeast3/endpoints/565153374723375104/operations/5970417408056229888
INFO:google.cloud.aiplatform.models:Endpoint created. Resource name: projects/721521243942/locations/asia-northeast3/endpoints/565153374723375104
INFO:google.cloud.aiplatform.models:To use this Endpoint in another session:
INFO:google.cloud.aiplatform.models:endpoint = aiplatform.Endpoint('projects/721521243942/locations/asia-northeast3/endpoints/565153374723375104')
INFO:google.cloud.aiplatform.models:Deploying model to Endpoint : projects/721521243942/locations/asia-northeast3/endpoints/565153374723375104
INFO:google.cloud.aiplatform.models:Deploy Endpoint model backing LRO: projects/721521243942/locations/asia-northeast3/endpoints/565153374723375104/operations/1524238675934707712
INFO:google.cloud.aiplatform.models:Endpoint model deployed. Resource name: proje

### Prediction

In [9]:
prediction = endpoint.predict(
    [
        {
            "Type": "Cat",
            "Age": "3",
            "Breed1": "Tabby",
            "Gender": "Male",
            "Color1": "Black",
            "Color2": "White",
            "MaturitySize": "Small",
            "FurLength": "Short",
            "Vaccinated": "No",
            "Sterilized": "No",
            "Health": "Healthy",
            "Fee": "100",
            "PhotoAmt": "2",
        }
    ]
)

print(prediction)


Prediction(predictions=[{'classes': ['Yes', 'No'], 'scores': [0.640623152256012, 0.3593768179416656]}], deployed_model_id='9110654502821691392', metadata=None, model_version_id='1', model_resource_name='projects/721521243942/locations/asia-northeast3/models/3517315707022868480', explanations=None)


### Undeploy the model

In [None]:
#endpoint.undeploy(deployed_model_id=prediction.deployed_model_id)


### Cleaning up

In [None]:

# Delete the training job
job.delete()

# Delete the model
model.delete()

# Delete the endpoint
endpoint.delete()

# Warning: Setting this to true will delete everything in your bucket
delete_bucket = False

if delete_bucket:
    ! gsutil -m rm -r $BUCKET_URI