In [None]:
# Copyright 2024 Forusone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Model Registry on Vertex AI
* [Get started with Vertex AI Model Registry](https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/model_registry/get_started_with_model_registry.ipynb)
* [ResNet V2 pretained model](https://tfhub.dev/google/imagenet/resnet_v2_101/classification/5).

In [1]:
# @title Install Vertex AI SDK for Python and other required packages
! pip3 install --upgrade --quiet --user google-cloud-aiplatform \
                                        tensorflow \
                                        tensorflow-hub

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/6.9 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.4/6.9 MB[0m [31m13.1 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/6.9 MB[0m [31m42.2 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m6.9/6.9 MB[0m [31m65.0 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m6.9/6.9 MB[0m [31m65.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m45.7 MB/s[0m eta [36m0:00:00[0m
[0m

In [2]:

# @title Define constants
PROJECT_ID = "ai-hangsik"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}
BUCKET_URI = "gs://sllm_0107"  # @param {type:"string"}
ARTIFACT_URI="gs://sllm_0107/llama3.1_3b_inst" # @param {type:"string"}
MODEL_DISPLAY_NAME = "meta-llama-3b-it"  # @param {type:"string"}
CONTAINER_URI = "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124.2-3.ubuntu2204.py311" # @param {type:"string"}

In [3]:
# @title GCP Authentication

# Use OAuth to access the GCP environment.
import sys
if "google.colab" in sys.modules:
    from google.colab import auth
    auth.authenticate_user(project_id=PROJECT_ID)

In [4]:
# @title Create a bucket.
! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI}

Creating gs://sllm_0107/...
ServiceException: 409 A Cloud Storage bucket named 'sllm_0107' already exists. Try another name. Bucket names must be globally unique across all Google Cloud projects, including those outside of your organization.


In [5]:
# @title Import libraries
import os

import google.cloud.aiplatform as aiplatform
import tensorflow as tf
import tensorflow_hub as hub

In [6]:
# @title Initialize Vertex AI SDK for Python

aiplatform.init(project=PROJECT_ID, staging_bucket=BUCKET_URI)

### Upload version 1 of the TensorFlow Hub model to a Vertex AI model resource

Next, upload the first version of the model (`MODEL_DIR_V1`) as a model resource in the Vertex AI Model Registry, with the additional following parameters:

- `is_default_version`: Whether this is the default version for the model resource.
- `version_ailiases`: User defined list of alternative alias names for the model version, such as `production`.
- `version_description`: User description of the model version.

When the first model version is created in the Vertex AI Model Registry, the property `version_id` is automatically set to 1.

In [7]:
model_v1 = aiplatform.Model.upload(
    display_name=f"{MODEL_DISPLAY_NAME}",
    artifact_uri=ARTIFACT_URI,
    serving_container_image_uri=CONTAINER_URI,
    is_default_version=True,
    version_aliases=["v1"],
    version_description="This is the first version of the model",
)

print(model_v1)

INFO:google.cloud.aiplatform.models:Creating Model
INFO:google.cloud.aiplatform.models:Create Model backing LRO: projects/721521243942/locations/us-central1/models/6056977560464523264/operations/3072726977740800000
INFO:google.cloud.aiplatform.models:Model created. Resource name: projects/721521243942/locations/us-central1/models/6056977560464523264@1
INFO:google.cloud.aiplatform.models:To use this Model in another session:
INFO:google.cloud.aiplatform.models:model = aiplatform.Model('projects/721521243942/locations/us-central1/models/6056977560464523264@1')


<google.cloud.aiplatform.models.Model object at 0x78459457f010> 
resource name: projects/721521243942/locations/us-central1/models/6056977560464523264


### Upload version 2 of the TensorFlow Hub model to a Vertex AI model resource

Next, upload the second version of the model (`MODEL_DIR_V1`) as a model resource in the Vertex AI Model Registry, with the additional following parameters:

- `parent_model`: The existing model resource for which to add this model as the next model version.
- `is_default_version`: Whether this is the default version for the model resource. In this example, you change from the default from the first version to the second version of the model.
- `version_ailiases`: User defined list of alternative alias names for the model version, such as `production`.
- `version_description`: User description of the model version.

When a subsequent model version is created in the Vertex AI Model Registry, the property `version_id` is automatically incremented. In this example, it's set to 2 (2nd version).

In [8]:
model_v2 = aiplatform.Model.upload(
    display_name=f"{MODEL_DISPLAY_NAME}",
    artifact_uri=ARTIFACT_URI,
    serving_container_image_uri=CONTAINER_URI,
    parent_model=model_v1.resource_name,
    is_default_version=True,
    version_aliases=["v2"],
    version_description="This is the second version of the model",
)

print(model_v2)



INFO:google.cloud.aiplatform.models:Creating Model
INFO:google.cloud.aiplatform.models:Create Model backing LRO: projects/721521243942/locations/us-central1/models/6056977560464523264/operations/3496065342713626624
INFO:google.cloud.aiplatform.models:Model created. Resource name: projects/721521243942/locations/us-central1/models/6056977560464523264@2
INFO:google.cloud.aiplatform.models:To use this Model in another session:
INFO:google.cloud.aiplatform.models:model = aiplatform.Model('projects/721521243942/locations/us-central1/models/6056977560464523264@2')


<google.cloud.aiplatform.models.Model object at 0x78459457fcd0> 
resource name: projects/721521243942/locations/us-central1/models/6056977560464523264


In [9]:
# @title Get all versions of the parent model
versions = model_v1.versioning_registry.list_versions()
for version in versions:
    print(version)

INFO:google.cloud.aiplatform.models:Getting versions for projects/721521243942/locations/us-central1/models/6056977560464523264


VersionInfo(version_id='1', version_create_time=DatetimeWithNanoseconds(2025, 1, 5, 23, 18, 4, 463483, tzinfo=datetime.timezone.utc), version_update_time=DatetimeWithNanoseconds(2025, 1, 5, 23, 19, 28, 421785, tzinfo=datetime.timezone.utc), model_display_name='meta-llama-3b-it', model_resource_name='projects/721521243942/locations/us-central1/models/6056977560464523264', version_aliases=['v1'], version_description='This is the first version of the model')
VersionInfo(version_id='2', version_create_time=DatetimeWithNanoseconds(2025, 1, 5, 23, 19, 20, 338000, tzinfo=datetime.timezone.utc), version_update_time=DatetimeWithNanoseconds(2025, 1, 5, 23, 19, 28, 421785, tzinfo=datetime.timezone.utc), model_display_name='meta-llama-3b-it', model_resource_name='projects/721521243942/locations/us-central1/models/6056977560464523264', version_aliases=['v2', 'default'], version_description='This is the second version of the model')


### Get all versions of a non-parent model

You repeat the same, but this time you use a non-parent model. As you can see, you get all the versions regardless if the model you specified is the parent or non-parent model.

In [10]:
# @title Get all versions of a non-parent model
versions = model_v2.versioning_registry.list_versions()
for version in versions:
    print(version)

INFO:google.cloud.aiplatform.models:Getting versions for projects/721521243942/locations/us-central1/models/6056977560464523264


VersionInfo(version_id='1', version_create_time=DatetimeWithNanoseconds(2025, 1, 5, 23, 18, 4, 463483, tzinfo=datetime.timezone.utc), version_update_time=DatetimeWithNanoseconds(2025, 1, 5, 23, 19, 28, 421785, tzinfo=datetime.timezone.utc), model_display_name='meta-llama-3b-it', model_resource_name='projects/721521243942/locations/us-central1/models/6056977560464523264', version_aliases=['v1'], version_description='This is the first version of the model')
VersionInfo(version_id='2', version_create_time=DatetimeWithNanoseconds(2025, 1, 5, 23, 19, 20, 338000, tzinfo=datetime.timezone.utc), version_update_time=DatetimeWithNanoseconds(2025, 1, 5, 23, 19, 28, 421785, tzinfo=datetime.timezone.utc), model_display_name='meta-llama-3b-it', model_resource_name='projects/721521243942/locations/us-central1/models/6056977560464523264', version_aliases=['v2', 'default'], version_description='This is the second version of the model')


### Listing a model resource

The remaining Vertex AI SDK methods relating to a model resource automatically use the default version of the model resource.

Next, use the `list()` method with a filter to get the model resources you created above. In this example, there are two versions. Version 2 is set as the default version, so list() only returns the information on version 2 (default version).

In [17]:
models = aiplatform.Model.list(filter=f"display_name={MODEL_DISPLAY_NAME}")
print("Number of models:", len(models))
print("Version ID:", models[0].version_id)

for model in models:
    print(f"Model : {model}")

model = models[0]

Number of models: 2
Version ID: 2
Model : <google.cloud.aiplatform.models.Model object at 0x7845947042e0> 
resource name: projects/721521243942/locations/us-central1/models/6056977560464523264
Model : <google.cloud.aiplatform.models.Model object at 0x784594707550> 
resource name: projects/721521243942/locations/us-central1/models/3023803211430494208


### Change the default model version

Next, change which version of the model resource is the default model version using the `versioning_registry.add_version_aliases()` method, with the following parameters:

- `version`: Which version, specified by the version_id, this operation applies to.
- `new_aliases`: The additional aliases to assign to the specified version.

In this example, you refer to the special alias `default` to change this model (version 1) as the default.

Next, use the `list()` method which now returns version 1 instead of version 2 of the model.

In [16]:
model_v2.versioning_registry.add_version_aliases(new_aliases=["default"], version="2")

models = aiplatform.Model.list(filter=f"display_name={MODEL_DISPLAY_NAME}")
print("Number of models:", len(models))
print("Version ID:", models[0].version_id)

for model in models:
    print(f"Model : {model}")

model = models[0]

INFO:google.cloud.aiplatform.models:Merging version aliases for projects/721521243942/locations/us-central1/models/6056977560464523264
INFO:google.cloud.aiplatform.models:Completed merging version aliases for projects/721521243942/locations/us-central1/models/6056977560464523264


Number of models: 2
Version ID: 2
Model : <google.cloud.aiplatform.models.Model object at 0x78459484fd30> 
resource name: projects/721521243942/locations/us-central1/models/6056977560464523264
Model : <google.cloud.aiplatform.models.Model object at 0x78459484d4b0> 
resource name: projects/721521243942/locations/us-central1/models/3023803211430494208


## Creating an endpoint resource

You create an endpoint resource using the `Endpoint.create()` method. At a minimum, you specify the display name for the endpoint. Optionally, you can specify the project and location (region); otherwise the settings are inherited by the values you set when you initialized the Vertex AI SDK with the `init()` method.

In this example, the following parameters are specified:

- `display_name`: A human readable name for the endpoint resource.
- `project`: Your project ID.
- `location`: Your region.
- `labels`: (optional) User defined metadata for the endpoint in the form of key/value pairs.

This method returns an endpoint object.

Learn more about [Vertex AI endpoints](https://cloud.google.com/vertex-ai/docs/predictions/deploy-model-api).

In [13]:
endpoint = aiplatform.Endpoint.create(
    display_name=f"{MODEL_DISPLAY_NAME}_endpoint",
    project=PROJECT_ID,
    location=LOCATION,
    labels={"model_name": "llama3_1"},
)

print(endpoint)

INFO:google.cloud.aiplatform.models:Creating Endpoint
INFO:google.cloud.aiplatform.models:Create Endpoint backing LRO: projects/721521243942/locations/us-central1/endpoints/7626539205207785472/operations/7910718877443555328
INFO:google.cloud.aiplatform.models:Endpoint created. Resource name: projects/721521243942/locations/us-central1/endpoints/7626539205207785472
INFO:google.cloud.aiplatform.models:To use this Endpoint in another session:
INFO:google.cloud.aiplatform.models:endpoint = aiplatform.Endpoint('projects/721521243942/locations/us-central1/endpoints/7626539205207785472')


<google.cloud.aiplatform.models.Endpoint object at 0x78459457d030> 
resource name: projects/721521243942/locations/us-central1/endpoints/7626539205207785472


## Deploying model resources to an endpoint resource.

You can deploy one of more Vertex AI model resource instances to the same endpoint. Each Vertex AI model resource that is deployed has its own deployment container for the serving binary.

*Note:* For this example, you specified the deployment container for the TFHub model in the previous step of uploading the model artifacts to a Vertex AI model resource.

### Deploying a single endpoint resource

In the next example, you deploy the default version of the Vertex AI model resource to a Vertex AI endpoint resource. The Vertex AI model resource already has defined for it the deployment container image. To deploy, you specify the following additional configuration settings:

- The machine type.
- The (if any) type and number of GPUs.
- Static, manual or auto-scaling of VM instances.

In this example, you deploy the model with the minimal amount of specified parameters, as follows:

- `model`: The model resource.
- `deployed_model_displayed_name`: The human readable name for the deployed model instance.
- `machine_type`: The machine type for each VM instance.

Do to the requirements to provision the resource, this may take upto a few minutes.

In [14]:
# @title Deploy model

# https://cloud.google.com/vertex-ai/docs/general/deployment

response = endpoint.deploy(
    model=model,
    deployed_model_display_name=MODEL_DISPLAY_NAME,
    machine_type="g2-standard-4",
    accelerator_type="NVIDIA_L4",
    accelerator_count=1,

)

print(endpoint)

INFO:google.cloud.aiplatform.models:Deploying Model projects/721521243942/locations/us-central1/models/6056977560464523264 to Endpoint : projects/721521243942/locations/us-central1/endpoints/7626539205207785472
INFO:google.cloud.aiplatform.models:Deploy Endpoint model backing LRO: projects/721521243942/locations/us-central1/endpoints/7626539205207785472/operations/500045690605404160
INFO:google.cloud.aiplatform.models:Endpoint model deployed. Resource name: projects/721521243942/locations/us-central1/endpoints/7626539205207785472


<google.cloud.aiplatform.models.Endpoint object at 0x78459457d030> 
resource name: projects/721521243942/locations/us-central1/endpoints/7626539205207785472


#### Get information on the deployed model

You can get the deployment settings of the deployed model from the endpoint resource configuration data `gca_resource.deployed_models`. In this example, only one model is deployed -- hence the reference to the subscript `[0]`.

In [None]:
endpoint.gca_resource.deployed_models[0]

### Undeploy model resource from endpoint resource

When a model resource is deployed to an endpoint resource, the deployed model resource instance is assigned an ID -- commonly referred to as the deployed model ID.

You can undeploy a specific model resource instance with the `undeploy()` method, with the following parameters:

- `deployed_model_id`: The ID assigned to the deployed model.

In [None]:
deployed_model_id = endpoint.gca_resource.deployed_models[0].id
print(deployed_model_id)

endpoint.undeploy(deployed_model_id)

### Deleting a model version

To delete a version of a model from the Vertex AI Model Registry, you use the `versioning_registry.delete_version()` method, with the following parameters:

- `version`: Can be either the `version_id` or an alias from `version_alias`.

*Note:* You cannot delete the default model version.

In [None]:
# Deletes a specific model version, as long as it isn't the default version.
model.versioning_registry.delete_version("2")

versions = model.versioning_registry.list_versions()
for version in versions:
    print(version)

### Deleting a model resource

When you delete a model resource, all the associated versions are deleted.

In [None]:
model.delete()

### Specifying model versions for Vertex AI Training

When training models using the Vertex AI Training service, you can specify the trained model to be a parent model in the Vertex AI Model Registry, or a version of the parent model, with the following additional parameters to the `run()` method:

- `parent_model`: The existing model resource for which to add this model as the next model version.
- `is_default_version`: Whether this is the default version for the model resource.
- `model_version_ailiases`: User defined list of alternative alias names for the model version, such as `production`.
- `model_version_description`: User description of the model version.

Below is an example usage:

```
# This job uploads a new, non-default version of the my-training-job model
job = aiplatform.CustomTrainingJob(
   display_name="my-training-job",
   script_path="training_script.py",
   container_uri="us-docker.pkg.dev/vertex-ai/training/tf-cpu.2-2:latest",
   requirements=["gcsfs==0.7.1"],
   model_serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-2:latest",
)

model = job.run(
   my_dataset,
   replica_count=1,
   machine_type="n1-standard-4",
   accelerator_type='NVIDIA_TESLA_T4',
   accelerator_count=1,
   model_id='my-model'
   model_version_aliases=["myalias"],
   model_version_description="My description",
   is_default_version=False,
)

```


## Cleaning up

To clean up all Google Cloud resources used in this project, you can [delete the Google Cloud
project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.

Otherwise, you can delete the individual resources you created in this tutorial:

In [None]:
# Set this to true only if you'd like to delete your bucket
delete_bucket = False

if delete_bucket:
    ! gsutil rm -rf {BUCKET_URI}

# Undeploy the models and delete the endpoint resources
try:
    endpoint.undeploy_all()
    endpoint.delete()
except Exception as e:
    print(e)