In [1]:
from google.cloud import aiplatform

In [3]:
aiplatform.init(
    # your Google Cloud Project ID or number
    # environment default used is not set
    project='tii-sac-platform-sandbox-alpha',

    # the Vertex AI region you will use
    # defaults to us-central1
    location='europe-west4',

    # Google Cloud Storage bucket in same region as location
    # used to stage artifacts
    staging_bucket='gs://vertexai_staging_bucket',

    # custom google.auth.credentials.Credentials
    # environment default creds used if not set
    # credentials=my_credentials,

    # customer managed encryption key resource name
    # will be applied to all Vertex AI resources if set
    # encryption_spec_key_name=my_encryption_key_name,

    # the name of the experiment to use to track
    # logged metrics and parameters
    experiment='titanic-classifier',

    # description of the experiment above
    experiment_description='VertexAI Demo for IIT'
)

# Select federated dataset from VertexAI to use
#### For autoML the dataset needs to have 1000 rows minimum

In [36]:
dataset = aiplatform.TabularDataset(dataset_name="6639106295209132032")

# Run AutoML Job

In [43]:
from datetime import datetime
JOB_PREFIX="wine-quality-classifier"
JOB_NAME=f"{JOB_PREFIX}-{datetime.now()}-autoML"
print(JOB_NAME)

wine-quality-classifier-2021-12-31 10:41:39.228760-autoML


In [44]:
job = aiplatform.AutoMLTabularTrainingJob(
  display_name=JOB_NAME,
  optimization_prediction_type="classification",
  optimization_objective="minimize-log-loss",
  column_transformations=[
        {"numeric": {"column_name": "alcohol"}},
        {"numeric": {"column_name": "chlorides"}},
        {"numeric": {"column_name": "citric acid"}},
        {"numeric": {"column_name": "density"}},
        {"numeric": {"column_name": "fixed acidity"}},
        {"numeric": {"column_name": "free sulfur dioxide"}},
        {"numeric": {"column_name": "pH"}},
        {"categorical": {"column_name": "quality"}},
        {"numeric": {"column_name": "residual sugar"}},
        {"numeric": {"column_name": "sulphates"}},
        {"numeric": {"column_name": "total sulfur dioxide"}},
        {"numeric": {"column_name": "volatile acidity"}},
    ],
)

  column_specs, column_transformations


In [45]:
model = job.run(
    dataset=dataset,
    target_column="quality",
    training_fraction_split=0.6,
    validation_fraction_split=0.2,
    test_fraction_split=0.2,
    budget_milli_node_hours=1,
    model_display_name="wine-quality-automl-model",
    disable_early_stopping=False,
)

INFO:google.cloud.aiplatform.training_jobs:View Training:
https://console.cloud.google.com/ai/platform/locations/europe-west4/training/8746274150353469440?project=427665163432
INFO:google.cloud.aiplatform.training_jobs:AutoMLTabularTrainingJob projects/427665163432/locations/europe-west4/trainingPipelines/8746274150353469440 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.training_jobs:AutoMLTabularTrainingJob projects/427665163432/locations/europe-west4/trainingPipelines/8746274150353469440 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.training_jobs:AutoMLTabularTrainingJob projects/427665163432/locations/europe-west4/trainingPipelines/8746274150353469440 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.training_jobs:AutoMLTabularTrainingJob projects/427665163432/locations/europe-west4/trainingPipelines/8746274150353469440 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.

RuntimeError: Training failed with:
code: 3
message: "Invalid column names: ,free sulfur dioxide,volatile acidity,total sulfur dioxide,fixed acidity,residual sugar,citric acid"


# Deploy your autoML model as API

In [None]:
endpoint = model.deploy(
    machine_type="n1-standard-4",
)

# Predict with the endpoint using the test data

In [26]:
import pandas as pd

In [31]:
test = pd.read_csv('winequality-white.csv')
test.head()

Unnamed: 0.1,Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [None]:
single_feature_vector = test.iloc[0].to_dict(orient='records')
single_feature_vector

In [None]:
prediction = endpoint.predict(
    [
        single_feature_vector
    ]
)

print(prediction)

# UnDeploy your model (destroy the API)

In [None]:
endpoint.undeploy(deployed_model_id=prediction.deployed_model_id)