In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Quick start with Model Garden - HeAR

<table><tbody><tr>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2Fgoogle-health%2Fhear%2Fmaster%2Fnotebooks%2Fquick_start_with_model_garden.ipynb">
      <img alt="Google Cloud Colab Enterprise logo" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" width="32px"><br> Run in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/google-health/hear/blob/master/notebooks/quick_start_with_model_garden.ipynb">
      <img alt="GitHub logo" src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" width="32px"><br> View on GitHub
    </a>
  </td>
</tr></tbody></table>

## Overview

This notebook demonstrates how to use HeAR in Vertex AI to generate embeddings from audio clips of health-related sounds using two methods for getting predictions:

* **Online predictions** are synchronous requests that are made to the endpoint deployed from Model Garden and are served with low latency. Online predictions are useful if the embeddings are being used in production. The cost for online prediction is based on the time a virtual machine spends waiting in an active state (an endpoint with a deployed model) to handle prediction requests.

* **Batch predictions** are asynchronous requests that are run on a set number of audio clips specified in a single job. They are made directly to an uploaded model and do not use an endpoint deployed from Model Garden. Batch predictions are useful if you want to generate embeddings for a large number of clips for use in training and don't require low latency. The cost for batch prediction is based on the time a virtual machine spends running your prediction job.

Vertex AI makes it easy to serve your model and make it accessible to the world. Learn more about [Vertex AI](https://cloud.google.com/vertex-ai/docs/start/introduction-unified-platform).

### Objectives

- Deploy HeAR to a Vertex AI Endpoint and get online predictions.
- Upload HeAR to Vertex AI Model Registry and get batch predictions.

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing), [Cloud Storage pricing](https://cloud.google.com/storage/pricing), and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Before you begin

In [None]:
# @title Import packages and define common functions

import base64
import datetime
import importlib
import io
import json
import os
import uuid

import numpy as np
from google.cloud import aiplatform, storage
from IPython.display import Audio, display
from scipy import signal
from scipy.io import wavfile

if not os.path.isdir("vertex-ai-samples"):
    ! git clone https://github.com/GoogleCloudPlatform/vertex-ai-samples.git

common_util = importlib.import_module(
    "vertex-ai-samples.community-content.vertex_model_garden.model_oss.notebook_util.common_util"
)

models, endpoints = {}, {}

SAMPLE_RATE = 16000  # Samples per second (Hz)
CLIP_DURATION = 2  # Duration of the audio clip in seconds
CLIP_LENGTH = SAMPLE_RATE * CLIP_DURATION  # Total number of samples


def resample_audio_and_convert_to_mono(
    audio_array: np.ndarray,
    sampling_rate: float,
    new_sampling_rate: float = SAMPLE_RATE,
) -> np.ndarray:
    """
    Resamples an audio array and converts it to mono if it has multiple channels.

    Args:
      audio_array: A numpy array representing the audio data.
      sampling_rate: The original sampling rate of the audio.
      new_sampling_rate: Target sampling rate.

    Returns:
      resampled_audio_mono: A numpy array representing the resampled mono audio.
    """
    # Convert to mono if it's multi-channel
    if audio_array.ndim > 1:
        audio_mono = np.mean(audio_array, axis=1)
    else:
        audio_mono = audio_array

    # Resample
    original_sample_count = audio_mono.shape[0]
    new_sample_count = int(
        round(original_sample_count * (new_sampling_rate / sampling_rate))
    )
    resampled_audio_mono = signal.resample(audio_mono, new_sample_count)

    return resampled_audio_mono

In [None]:
# @title Set up Google Cloud environment

# @markdown #### Prerequisites

# @markdown Make sure that:

# @markdown 1. [Billing is enabled](https://cloud.google.com/billing/docs/how-to/modify-project) for your project.

# @markdown 2. Either the Compute Engine API is enabled or you have the [Service Usage Admin](https://cloud.google.com/iam/docs/understanding-roles#serviceusage.serviceUsageAdmin) (`roles/serviceusage.serviceUsageAdmin`) role to enable the API.

# @markdown 3. You have the [Storage Admin](https://cloud.google.com/iam/docs/understanding-roles#storage.admin) (`roles/storage.admin`) role to create and use Cloud Storage buckets.

# @markdown ---

# @markdown This section:

# @markdown 1. Sets the default Google Cloud project and region, enables the Compute Engine API (if not already enabled), and initializes the Vertex AI API.

# @markdown 2. Sets up a Cloud Storage bucket for storing prediction artifacts.
# @markdown - A new bucket will automatically be created for you.
# @markdown - [Optional] To use an existing bucket, specify the `gs://` bucket URI. The specified Cloud Storage bucket should be located in the same region as where the notebook was launched. Note that a multi-region bucket (e.g. "us") is not considered a match for a single region (e.g. "us-central1") covered by the multi-region range.

# Get the default project ID.
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]

# Get the default region for launching jobs.
REGION = os.environ["GOOGLE_CLOUD_REGION"]

# Enable the Compute Engine API, if not already.
print("Enabling Compute Engine API.")
! gcloud services enable compute.googleapis.com

# Initialize Vertex AI API.
print("Initializing Vertex AI API.")
aiplatform.init(project=PROJECT_ID, location=REGION)

BUCKET_URI = ""  # @param {type:"string", placeholder:"[Optional] Cloud Storage bucket URI"}

# Cloud Storage bucket for storing prediction artifacts.
# A unique bucket will be created for the purpose of this notebook. If you
# prefer using your own GCS bucket, change the value of BUCKET_URI above.
if BUCKET_URI is None or BUCKET_URI.strip() == "":
    now = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
    BUCKET_URI = f"gs://{PROJECT_ID}-tmp-{now}-{str(uuid.uuid4())[:4]}"
    BUCKET_NAME = "/".join(BUCKET_URI.split("/")[:3])
    ! gcloud storage buckets create --location {REGION} {BUCKET_URI}
else:
    assert BUCKET_URI.startswith("gs://"), "BUCKET_URI must start with `gs://`."
    BUCKET_NAME = "/".join(BUCKET_URI.split("/")[:3])
    shell_output = ! gcloud storage buckets describe {BUCKET_NAME} | grep "location:" | sed "s/location://"
    bucket_region = shell_output[0].strip().lower()
    if bucket_region != REGION:
        raise ValueError(
            f"Bucket region {bucket_region} is different from notebook region {REGION}"
        )
print(f"Using this Cloud Storage Bucket: {BUCKET_URI}")

In [None]:
# @title Prepare sample cough audio

# @markdown The expected prediction input for HeAR is 2 second clips of audio sampled at 16kHz. This section prepares a clip using a sample cough audio file from Wikimedia Commons and stores it in the following formats to be used for prediction in the next sections:

# @markdown 1. WAV audio file (`sample_cough.wav` in the Cloud Storage bucket `BUCKET_URI`)

# @markdown 2. WAV bytes (in memory)

# @markdown 3. Audio array (in memory)

# @markdown Click "Show Code" to see more details.

# Attribution: Ashe Kirk, CC0, via Wikimedia Commons
audio_file_url = (
    "https://upload.wikimedia.org/wikipedia/commons/b/be/Woman_coughing_three_times.wav"
)

! wget -nc {audio_file_url}
filename = os.path.basename(audio_file_url)
with open(filename, "rb") as f:
    original_sampling_rate, audio_array = wavfile.read(f)
audio_array = resample_audio_and_convert_to_mono(audio_array, original_sampling_rate)
# This index corresponds to the start of a health acoustic event
# (e.g. cough in this case) and was determined by hand. In practice,
# you would need a detector.
start = 0
clip = audio_array[start : start + CLIP_LENGTH]
# Pad clip with zeros if less than the required CLIP_LENGTH
if len(clip) < CLIP_LENGTH:
    clip = np.pad(clip, (0, CLIP_LENGTH - len(clip)))
display(Audio(clip, rate=SAMPLE_RATE))

# Save the audio data to a BytesIO object (in-memory file)
bytes_io = io.BytesIO()
wavfile.write(bytes_io, SAMPLE_RATE, clip)

# Sample cough audio prepared as a 2 second clip sampled at 16kHz and stored in
# the formats below to be used for prediction in the next sections
SAMPLE_AUDIO_ARRAY = clip.tolist()
SAMPLE_WAV_BYTES = bytes_io.getvalue()
SAMPLE_GCS_URI = f"{BUCKET_URI}/sample_cough.wav"

# Upload the audio file to Cloud Storage
storage_client = storage.Client()
blob = storage.blob.Blob.from_string(SAMPLE_GCS_URI, client=storage_client)
blob.upload_from_string(SAMPLE_WAV_BYTES)

## Get online predictions

In [None]:
# @title Import deployed model

# @markdown To get [online predictions](https://cloud.google.com/vertex-ai/docs/predictions/get-online-predictions), you will need a HeAR [Vertex AI Endpoint](https://cloud.google.com/vertex-ai/docs/general/deployment) that has been deployed from Model Garden. If you have not already done so, go to the [HeAR model card](https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/hear) in Model Garden and click "Deploy" to deploy the model.

# @markdown This section gets the Vertex AI Endpoint resource that you deployed from Model Garden to use for online predictions.

# @markdown Fill in the endpoint ID and region below. You can find your deployed endpoint on the [Vertex AI online prediction page](https://console.cloud.google.com/vertex-ai/online-prediction/endpoints).

ENDPOINT_ID = ""  # @param {type: "string", placeholder:"e.g. 123456789"}
ENDPOINT_REGION = ""  # @param {type: "string", placeholder:"e.g. us-central1"}

endpoints["endpoint"] = aiplatform.Endpoint(
    endpoint_name=ENDPOINT_ID,
    project=PROJECT_ID,
    location=ENDPOINT_REGION,
)

### Predict

You can send [online prediction](https://cloud.google.com/vertex-ai/docs/predictions/get-online-predictions) requests to the endpoint with audio clips of health-related sounds (2 seconds sampled at 16kHz) to generate embeddings.

The following examples demonstrate using HeAR to generate embeddings from:

* A WAV audio file stored in [Cloud Storage](https://cloud.google.com/storage/docs)
* Base64-encoded WAV bytes
* Array of 32000 floats

In [None]:
# @title #### Generate an embedding from an audio file in Cloud Storage

# @markdown This section shows an example of generating an embedding using a sample audio file stored in Cloud Storage.

# @markdown The prediction request instance contains the following fields:
# @markdown - `gcs_uri`: `gs://` URI specifying the location of a WAV audio file stored in Cloud Storage
# @markdown - `bearer_token`: Bearer token used to access data in Cloud Storage (optional for public buckets)

# @markdown You can specify `GCS_URI` below to use your own data.

# @markdown Click "Show Code" to see more details.

GCS_URI = ""  # @param {type:"string", placeholder:"Cloud Storage file URI (leave blank to use sample data)"}

if not GCS_URI:
    GCS_URI = SAMPLE_GCS_URI

bearer_token = ! gcloud auth print-access-token
bearer_token = bearer_token[0]

instances = [
    {
        "gcs_uri": GCS_URI,
        "bearer_token": bearer_token,
    },
]

response = endpoints["endpoint"].predict(instances=instances)
predictions = response.predictions

embedding_vector = np.array(predictions[0]["embedding"]).flatten()
print("Size of embedding vector:", len(embedding_vector))

In [None]:
# @title #### Generate an embedding from WAV bytes

# @markdown This section shows an example of generating an embedding from sample WAV bytes.

# @markdown The prediction request instance contains the following field:
# @markdown - `input_bytes`: Base-64 encoded WAV bytes

# @markdown Click "Show Code" to see more details.

instances = [{"input_bytes": base64.b64encode(SAMPLE_WAV_BYTES).decode("utf-8")}]

response = endpoints["endpoint"].predict(instances=instances)
predictions = response.predictions

embedding_vector = np.array(predictions[0]["embedding"]).flatten()
print("Size of embedding vector:", len(embedding_vector))

In [None]:
# @title #### Generate an embedding from an audio array

# @markdown This section shows an example of generating an embedding from a sample audio array.

# @markdown The prediction request instance contains the following field:
# @markdown - `input_array`: Array of 32000 floats

# @markdown Click "Show Code" to see more details.

instances = [{"input_array": SAMPLE_AUDIO_ARRAY}]

response = endpoints["endpoint"].predict(instances=instances)
predictions = response.predictions

embedding_vector = np.array(predictions[0]["embedding"]).flatten()
print("Size of embedding vector:", len(embedding_vector))

## Get batch predictions

In [None]:
# @title Upload model to Vertex AI Model Registry

# @markdown To get [batch predictions](https://cloud.google.com/vertex-ai/docs/predictions/get-batch-predictions), you must first upload the prebuilt HeAR model to [Vertex AI Model Registry](https://cloud.google.com/vertex-ai/docs/model-registry/introduction). Batch prediction requests are made directly to a model in Model Registry without deploying to an endpoint.

MODEL_ID = "hear"
MODEL_ARTIFACT_URI = "gs://vertex-model-garden-restricted-us/hear"

# The pre-built serving docker image.
SERVE_DOCKER_URI = "us-docker.pkg.dev/deeplearning-platform-release/vertex-model-garden/health-ai-hear.cpu.1-0.ubuntu2004.py312.tf218:20250311-1800-rc0"


def upload_model(model_name: str, artifact_uri: str) -> aiplatform.Model:
    model = aiplatform.Model.upload(
        display_name=model_name,
        artifact_uri=artifact_uri,
        serving_container_image_uri=SERVE_DOCKER_URI,
        serving_container_ports=[8080],
        serving_container_predict_route="/predict",
        serving_container_health_route="/health",
    )
    return model


models["model"] = upload_model(
    model_name=common_util.get_job_name_with_datetime(prefix=MODEL_ID),
    artifact_uri=MODEL_ARTIFACT_URI,
)

In [None]:
# @title Set the service account for batch prediction

# @markdown This section gets the [Compute Engine default service account](https://cloud.google.com/compute/docs/access/service-accounts#default_service_account) which will be used to run the batch prediction jobs.

# @markdown Make sure that you have the [Service Account User](https://cloud.google.com/iam/docs/understanding-roles#iam.serviceAccountUser) (`roles/iam.serviceAccountUser`) role on either the project or the Compute Engine default service account.

# Service account used for running the prediction container.
# Gets the Compute Engine default service account. If you prefer using your own
# custom service account, change the value of SERVICE_ACCOUNT below.
shell_output = ! gcloud projects describe $PROJECT_ID
project_number = shell_output[-1].split(":")[1].strip().replace("'", "")
SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"
print("Using this service account:", SERVICE_ACCOUNT)

### Predict

You can send [batch prediction requests](https://cloud.google.com/vertex-ai/docs/predictions/get-batch-predictions#request_a_batch_prediction) to the model using a [JSON Lines](https://jsonlines.org/) file to specify a list of input instances with audio clips of health-related sounds (2 seconds sampled at 16kHz) to generate embeddings. For more details on configuring batch prediction jobs, see how to [format your input data](https://cloud.google.com/vertex-ai/docs/predictions/get-batch-predictions#input_data_requirements) and [choose compute settings](https://cloud.google.com/vertex-ai/docs/predictions/get-batch-predictions#choose_machine_type_and_replica_count).

The following examples demonstrate using HeAR to generate embeddings in batch from:

* WAV audio files stored in [Cloud Storage](https://cloud.google.com/storage/docs)
* Base64-encoded WAV bytes
* Arrays of 32000 floats

In [None]:
# @title #### Generate embeddings in batch from audio files in Cloud Storage

# @markdown This section shows an example of generating embeddings in batch using sample audio files stored in Cloud Storage.

# @markdown Each line in the input JSON Lines file is a prediction request instance that contains the following field:
# @markdown - `gcs_uri`: `gs://` URI specifying the location of a WAV audio file stored in Cloud Storage

# @markdown You can specify `GCS_URIS` below use your own data.

# @markdown **Note:** The custom service account used to launch the batch prediction job must have permission to read the data from Cloud Storage.

# @markdown Click "Show Code" to see more details.

# Comma-separated list of Cloud Storage URIs
GCS_URIS = ""  # @param {type:"string", placeholder:"Comma-separated list of Cloud Storage file URIs (leave blank to use sample data)"}

if not GCS_URIS:
    gcs_uris_list = [SAMPLE_GCS_URI, SAMPLE_GCS_URI]
    # Grant the custom service account permission to read data from Cloud Storage BUCKET_URI
    ! gcloud storage buckets add-iam-policy-binding {BUCKET_URI} \
        --member=serviceAccount:{SERVICE_ACCOUNT} \
        --role=roles/storage.objectViewer
else:
    gcs_uris_list = GCS_URIS.split(",")

batch_predict_instances = [{"gcs_uri": uri} for uri in gcs_uris_list]

# Write instances to JSON Lines file
os.makedirs("batch_predict_input", exist_ok=True)
instances_filename = "gcs_instances.jsonl"
with open(f"batch_predict_input/{instances_filename}", "w") as f:
   for line in batch_predict_instances:
       json_str = json.dumps(line)
       f.write(json_str)
       f.write("\n")

# Copy the file to Cloud Storage
batch_predict_prefix = f"batch-predict-{MODEL_ID}"
! gcloud storage cp ./batch_predict_input/{instances_filename} {BUCKET_URI}/{batch_predict_prefix}/input/{instances_filename}

batch_predict_job_name = common_util.get_job_name_with_datetime(prefix=f"batch-predict-{MODEL_ID}")

gcs_batch_predict_job = models["model"].batch_predict(
    job_display_name=batch_predict_job_name,
    gcs_source=os.path.join(BUCKET_URI, batch_predict_prefix, f"input/{instances_filename}"),
    gcs_destination_prefix=os.path.join(BUCKET_URI, batch_predict_prefix, "output"),
    machine_type="n1-standard-4",
    service_account=SERVICE_ACCOUNT,
)

gcs_batch_predict_job.wait()

print(gcs_batch_predict_job.display_name)
print(gcs_batch_predict_job.resource_name)
print(gcs_batch_predict_job.state)

In [None]:
# @title #### Generate embeddings in batch from WAV bytes

# @markdown This section shows an example of generating embeddings in batch from sample WAV bytes.

# @markdown Each line in the input JSON Lines file is a prediction request instance that contains the following field:
# @markdown - `input_bytes`: Base-64 encoded WAV bytes

# @markdown Click "Show Code" to see more details.

batch_predict_instances = [
    {"input_bytes": base64.b64encode(SAMPLE_WAV_BYTES).decode("utf-8")},
    {"input_bytes": base64.b64encode(SAMPLE_WAV_BYTES).decode("utf-8")},
]

# Write instances to JSON Lines file
os.makedirs("batch_predict_input", exist_ok=True)
instances_filename = "bytes_instances.jsonl"
with open(f"batch_predict_input/{instances_filename}", "w") as f:
    for line in batch_predict_instances:
        json_str = json.dumps(line)
        f.write(json_str)
        f.write("\n")

# Copy the file to Cloud Storage
batch_predict_prefix = f"batch-predict-{MODEL_ID}"
! gcloud storage cp ./batch_predict_input/{instances_filename} {BUCKET_URI}/{batch_predict_prefix}/input/{instances_filename}

batch_predict_job_name = common_util.get_job_name_with_datetime(
    prefix=f"batch-predict-{MODEL_ID}"
)

bytes_batch_predict_job = models["model"].batch_predict(
    job_display_name=batch_predict_job_name,
    gcs_source=os.path.join(
        BUCKET_URI, batch_predict_prefix, f"input/{instances_filename}"
    ),
    gcs_destination_prefix=os.path.join(BUCKET_URI, batch_predict_prefix, "output"),
    machine_type="n1-standard-4",
    service_account=SERVICE_ACCOUNT,
)

bytes_batch_predict_job.wait()

print(bytes_batch_predict_job.display_name)
print(bytes_batch_predict_job.resource_name)
print(bytes_batch_predict_job.state)

In [None]:
# @title #### Generate embeddings in batch from audio arrays

# @markdown This section shows an example of generating embeddings in batch from sample audio arrays.

# @markdown Each line in the input JSON Lines file is a prediction request instance that contains the following field:
# @markdown - `input_array`: Array of 32000 floats

# @markdown Click "Show Code" to see more details.

batch_predict_instances = [
    {"input_array": SAMPLE_AUDIO_ARRAY},
    {"input_array": SAMPLE_AUDIO_ARRAY},
]

# Write instances to JSON Lines file
os.makedirs("batch_predict_input", exist_ok=True)
instances_filename = "bytes_instances.jsonl"
with open(f"batch_predict_input/{instances_filename}", "w") as f:
    for line in batch_predict_instances:
        json_str = json.dumps(line)
        f.write(json_str)
        f.write("\n")

# Copy the file to Cloud Storage
batch_predict_prefix = f"batch-predict-{MODEL_ID}"
! gcloud storage cp ./batch_predict_input/{instances_filename} {BUCKET_URI}/{batch_predict_prefix}/input/{instances_filename}

batch_predict_job_name = common_util.get_job_name_with_datetime(
    prefix=f"batch-predict-{MODEL_ID}"
)

array_batch_predict_job = models["model"].batch_predict(
    job_display_name=batch_predict_job_name,
    gcs_source=os.path.join(
        BUCKET_URI, batch_predict_prefix, f"input/{instances_filename}"
    ),
    gcs_destination_prefix=os.path.join(BUCKET_URI, batch_predict_prefix, "output"),
    machine_type="n1-standard-4",
    service_account=SERVICE_ACCOUNT,
)

array_batch_predict_job.wait()

print(array_batch_predict_job.display_name)
print(array_batch_predict_job.resource_name)
print(array_batch_predict_job.state)

In [None]:
# @title #### Get prediction results

# @markdown This section shows an example of [retrieving batch prediction results](https://cloud.google.com/vertex-ai/docs/predictions/get-batch-predictions#retrieve_batch_prediction_results) from the JSON Lines file(s) in the output Cloud Storage location.

# @markdown Click "Show Code" to see more details.


def download_gcs_files_as_json(gcs_files_prefix):
    """Download specified files from Cloud Storage and convert content to JSON."""
    lines = []
    client = storage.Client()
    bucket = storage.bucket.Bucket.from_string(BUCKET_NAME, client)
    blobs = bucket.list_blobs(prefix=gcs_files_prefix)
    for blob in blobs:
        with blob.open("r") as f:
            for line in f:
                lines.append(json.loads(line))
    return lines


# Get results from the first batch prediction job (with Cloud Storage inputs)
# You can replace this variable to get results from another batch prediction job
batch_predict_job = gcs_batch_predict_job
batch_predict_output_dir = batch_predict_job.output_info.gcs_output_directory
batch_predict_output_files_prefix = os.path.join(
    batch_predict_output_dir.replace(f"{BUCKET_NAME}/", ""), "prediction.results"
)
batch_predict_results = download_gcs_files_as_json(
    gcs_files_prefix=batch_predict_output_files_prefix
)

# Display first two batch prediction results
for i, line in enumerate(batch_predict_results[:2]):
    embedding_vector = np.array(line["prediction"]["embedding"]).flatten()
    print(f"Size of embedding vector {i}:", len(embedding_vector))

## Next steps

Explore the other [notebooks](https://github.com/google-health/hear/blob/master/notebooks) to learn what else you can do with the model.

## Clean up resources

In [None]:
# @markdown  Delete the experiment models and endpoints to recycle the resources
# @markdown  and avoid unnecessary continuous charges that may incur.

# Undeploy model and delete endpoint.
for endpoint in endpoints.values():
    endpoint.delete(force=True)

# Delete models.
for model in models.values():
    model.delete()

delete_bucket = False  # @param {type:"boolean"}
if delete_bucket:
    ! gsutil -m rm -r $BUCKET_NAME