In [1]:
# Install the packages
! pip3 install --quiet --upgrade google-cloud-aiplatform \
                                 google-cloud-storage


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
# set up the project id
PROJECT_ID = "fsi-env2"
! /opt/app-root/src/google-cloud-sdk/bin/gcloud config set project {PROJECT_ID}

Updated property [core/project].


In [3]:
REGION = "us-central1" 

In [4]:
BUCKET_URI = f"gs://mp-automl-data-{PROJECT_ID}-unique"

In [5]:
! /opt/app-root/src/google-cloud-sdk/bin/gsutil mb -l $REGION $BUCKET_URI

Creating gs://mp-automl-data-fsi-env2-unique/...
ServiceException: 409 A Cloud Storage bucket named 'mp-automl-data-fsi-env2-unique' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.


## Save the data in cloud storage

In [6]:
IMPORT_FILE = "petfinder-tabular-classification.csv"
! /opt/app-root/src/google-cloud-sdk/bin/gsutil cp gs://cloud-samples-data/ai-platform-unified/datasets/tabular/{IMPORT_FILE} {BUCKET_URI}/data/

gcs_source = f"{BUCKET_URI}/data/{IMPORT_FILE}"

Copying gs://cloud-samples-data/ai-platform-unified/datasets/tabular/petfinder-tabular-classification.csv [Content-Type=text/csv]...
/ [1 files][872.8 KiB/872.8 KiB]                                                
Operation completed over 1 objects/872.8 KiB.                                    


## Import Vertex AI SDK for python

In [7]:
import os

from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=REGION)

## Create a Managed Tabular Dataset from a CSV

In [8]:
ds = dataset = aiplatform.TabularDataset.create(
    display_name="petfinder-tabular-dataset",
    gcs_source=gcs_source,
)

ds.resource_name

Creating TabularDataset
Create TabularDataset backing LRO: projects/536618027487/locations/us-central1/datasets/5178436983545921536/operations/3987784294322929664
TabularDataset created. Resource name: projects/536618027487/locations/us-central1/datasets/5178436983545921536
To use this TabularDataset in another session:
ds = aiplatform.TabularDataset('projects/536618027487/locations/us-central1/datasets/5178436983545921536')


'projects/536618027487/locations/us-central1/datasets/5178436983545921536'

In [9]:
## Launch a training job to create a Model

In [10]:
job = aiplatform.AutoMLTabularTrainingJob(
    display_name="train-petfinder-automl-1",
    optimization_prediction_type="classification",
    column_transformations=[
        {"categorical": {"column_name": "Type"}},
        {"numeric": {"column_name": "Age"}},
        {"categorical": {"column_name": "Breed1"}},
        {"categorical": {"column_name": "Color1"}},
        {"categorical": {"column_name": "Color2"}},
        {"categorical": {"column_name": "MaturitySize"}},
        {"categorical": {"column_name": "FurLength"}},
        {"categorical": {"column_name": "Vaccinated"}},
        {"categorical": {"column_name": "Sterilized"}},
        {"categorical": {"column_name": "Health"}},
        {"numeric": {"column_name": "Fee"}},
        {"numeric": {"column_name": "PhotoAmt"}},
    ],
)

# This will take around an hour to run
model = job.run(
    dataset=ds,
    target_column="Adopted",
    training_fraction_split=0.8,
    validation_fraction_split=0.1,
    test_fraction_split=0.1,
    model_display_name="adopted-prediction-model",
    disable_early_stopping=False,
)

View Training:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/6943761794531852288?project=536618027487
AutoMLTabularTrainingJob projects/536618027487/locations/us-central1/trainingPipelines/6943761794531852288 current state:
PipelineState.PIPELINE_STATE_PENDING
AutoMLTabularTrainingJob projects/536618027487/locations/us-central1/trainingPipelines/6943761794531852288 current state:
PipelineState.PIPELINE_STATE_PENDING
AutoMLTabularTrainingJob projects/536618027487/locations/us-central1/trainingPipelines/6943761794531852288 current state:
PipelineState.PIPELINE_STATE_PENDING
AutoMLTabularTrainingJob projects/536618027487/locations/us-central1/trainingPipelines/6943761794531852288 current state:
PipelineState.PIPELINE_STATE_PENDING
AutoMLTabularTrainingJob projects/536618027487/locations/us-central1/trainingPipelines/6943761794531852288 current state:
PipelineState.PIPELINE_STATE_PENDING
AutoMLTabularTrainingJob projects/536618027487/locations/us-central1/trai

## Deploy your model

In [11]:
endpoint = model.deploy(
    machine_type="n1-standard-4",
)

Creating Endpoint
Create Endpoint backing LRO: projects/536618027487/locations/us-central1/endpoints/4428448007612006400/operations/6486578400071778304
Endpoint created. Resource name: projects/536618027487/locations/us-central1/endpoints/4428448007612006400
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/536618027487/locations/us-central1/endpoints/4428448007612006400')
Deploying model to Endpoint : projects/536618027487/locations/us-central1/endpoints/4428448007612006400
Deploy Endpoint model backing LRO: projects/536618027487/locations/us-central1/endpoints/4428448007612006400/operations/4185238990485454848
Endpoint model deployed. Resource name: projects/536618027487/locations/us-central1/endpoints/4428448007612006400


## Predict on your endpoint

In [12]:
prediction = endpoint.predict(
    [
        {
            "Type": "Cat",
            "Age": "3",
            "Breed1": "Tabby",
            "Gender": "Male",
            "Color1": "Black",
            "Color2": "White",
            "MaturitySize": "Small",
            "FurLength": "Short",
            "Vaccinated": "No",
            "Sterilized": "No",
            "Health": "Healthy",
            "Fee": "100",
            "PhotoAmt": "2",
        }
    ]
)
print(prediction)

Prediction(predictions=[{'classes': ['Yes', 'No'], 'scores': [0.6323009133338928, 0.3676990568637848]}], deployed_model_id='2933261727418023936', model_version_id='1', model_resource_name='projects/536618027487/locations/us-central1/models/2323019579862810624', explanations=None)


## Undeploy the model

In [14]:
endpoint.undeploy(deployed_model_id=prediction.deployed_model_id)

Undeploying Endpoint model: projects/536618027487/locations/us-central1/endpoints/4428448007612006400
Undeploy Endpoint model backing LRO: projects/536618027487/locations/us-central1/endpoints/4428448007612006400/operations/6765238627015327744
Endpoint model undeployed. Resource name: projects/536618027487/locations/us-central1/endpoints/4428448007612006400


## Cleaning Up

In [16]:
# Warning: Setting this to true will delete everything in your bucket
delete_bucket = True

# Delete the training job
job.delete()

# Delete the model
model.delete()

# Delete the endpoint
endpoint.delete()



if delete_bucket or os.getenv("IS_TESTING"):
    ! gsutil -m rm -r $BUCKET_URI

Deleting AutoMLTabularTrainingJob : projects/536618027487/locations/us-central1/trainingPipelines/6943761794531852288
Delete AutoMLTabularTrainingJob  backing LRO: projects/536618027487/locations/us-central1/operations/620076935468285952
AutoMLTabularTrainingJob deleted. . Resource name: projects/536618027487/locations/us-central1/trainingPipelines/6943761794531852288
Deleting Model : projects/536618027487/locations/us-central1/models/2323019579862810624
Delete Model  backing LRO: projects/536618027487/locations/us-central1/operations/4846705185755496448
Model deleted. . Resource name: projects/536618027487/locations/us-central1/models/2323019579862810624
Deleting Endpoint : projects/536618027487/locations/us-central1/endpoints/4428448007612006400
Delete Endpoint  backing LRO: projects/536618027487/locations/us-central1/operations/686505029972000768
Endpoint deleted. . Resource name: projects/536618027487/locations/us-central1/endpoints/4428448007612006400
/usr/bin/sh: line 1: gsutil: 