In [1]:
from google.cloud import aiplatform
import json

# Input Parameters

In [2]:
import os

import dotenv
dotenv.load_dotenv()

LOCATION=os.environ['LOCATION']
PROJECT=os.environ['PROJECT']
REPO_NAME=os.environ['REPO_NAME']
IMAGE_NAME=os.environ['IMAGE_NAME']

MODEL_IMAGE_URI = f"{LOCATION}-docker.pkg.dev/{PROJECT}/{REPO_NAME}/{IMAGE_NAME}:latest"
MODEL_IMAGE_URI

'us-central1-docker.pkg.dev/pg-cvs-sandbox/recsys-servers/scann_index:latest'

# Configure authorization credentials for Artifact Registry

In [3]:
!gcloud auth configure-docker --quiet $LOCATION-docker.pkg.dev


{
  "credHelpers": {
    "gcr.io": "gcloud",
    "us.gcr.io": "gcloud",
    "eu.gcr.io": "gcloud",
    "asia.gcr.io": "gcloud",
    "staging-k8s.gcr.io": "gcloud",
    "marketplace.gcr.io": "gcloud",
    "us-central1-docker.pkg.dev": "gcloud"
  }
}
Adding credentials for: us-central1-docker.pkg.dev
gcloud credential helpers already registered correctly.


To take a quick anonymous survey, run:
  $ gcloud survey



# Create Artifact Registry Repository

Note: Only needs to be ran once

In [4]:
!gcloud artifacts repositories create $REPO_NAME \
    --repository-format=docker \
    --location=$LOCATION

[1;31mERROR:[0m (gcloud.artifacts.repositories.create) ALREADY_EXISTS: the repository already exists


# Build and upload container

In [83]:
!gcloud builds submit --config cloudbuild.yaml \
    --region=$LOCATION \
    --substitutions="_IMAGE_NAME=$MODEL_IMAGE_URI" 

Creating temporary tarball archive of 42 file(s) totalling 1.9 MiB before compression.
Uploading tarball of [.] to [gs://pg-cvs-sandbox_cloudbuild/source/1697651266.013933-3883d8dcd8b2426390fcb75bb47f7f4d.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/pg-cvs-sandbox/locations/us-central1/builds/206ba670-1fd3-498c-848a-31e46a694d93].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds;region=us-central1/206ba670-1fd3-498c-848a-31e46a694d93?project=939813598428 ].
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "206ba670-1fd3-498c-848a-31e46a694d93"

FETCHSOURCE
Fetching storage object: gs://pg-cvs-sandbox_cloudbuild/source/1697651266.013933-3883d8dcd8b2426390fcb75bb47f7f4d.tgz#1697651266597046
Copying gs://pg-cvs-sandbox_cloudbuild/source/1697651266.013933-3883d8dcd8b2426390fcb75bb47f7f4d.tgz#1697651266597046...
/ [1 files][  1.1 MiB/  1.1 MiB]                                                
Operation c

# Test evaluation on LocalEndpoint

## Create Local Model

In [3]:
from google.cloud.aiplatform.prediction import LocalModel

local_model = LocalModel(
    serving_container_image_uri=MODEL_IMAGE_URI,
    serving_container_health_route='/health',
    serving_container_predict_route='/predict',
)

## "Deploy" model to local endpoint

In [4]:
local_endpoint = local_model.deploy_to_local_endpoint(
    gpu_count=0,
)

## Start serving endpoint and run health check

In [5]:
local_endpoint.serve()

health_check_response = local_endpoint.run_health_check()
print(health_check_response, health_check_response.content)

<Response [200]> b'OK'


## Test prediction on local endpoint

In [6]:
from IPython.display import display, HTML

with open('request.json', 'r') as f:
    request = f.read()
    prediction_res = local_endpoint.predict(
        request=request, headers={"Content-Type": "application/json"}
    )

if prediction_res.status_code == 200:
    prediction = prediction_res.json()
    print(json.dumps(prediction, indent=2))
else:
    display(HTML(prediction_res.text))

{
  "predictions": [
    {
      "movie_scores": [
        9.653619766235352,
        7.183511734008789,
        7.077798843383789,
        6.192534446716309,
        6.092672348022461,
        5.85479736328125,
        5.565959930419922,
        5.52648401260376,
        5.500728607177734,
        5.386192321777344
      ],
      "movie_titles": [
        "Winnie the Pooh and the Blustery Day (1968)",
        "Alien (1979)",
        "Deconstructing Harry (1997)",
        "Patton (1970)",
        "To Wong Foo, Thanks for Everything! Julie Newmar (1995)",
        "Of Love and Shadows (1994)",
        "Miracle on 34th Street (1994)",
        "Mask, The (1994)",
        "Bed of Roses (1996)",
        "Fly Away Home (1996)"
      ]
    },
    {
      "movie_scores": [
        7.910170078277588,
        7.693970203399658,
        7.138026714324951,
        6.736512660980225,
        6.736512660980225,
        6.564693927764893,
        6.489426612854004,
        5.966603755950928,
        5

## Stop serving local endpoint

In [7]:
local_endpoint.stop()

# Deploy prediction container to Model Registry

In [8]:
try:
    [parent_model] = aiplatform.Model.list(
        filter=f'display_name="{IMAGE_NAME}"',
        project=PROJECT,
        location=LOCATION,
    )
    model = aiplatform.Model.upload(
        parent_model=parent_model.resource_name,
        serving_container_image_uri=MODEL_IMAGE_URI,
        serving_container_health_route="/health",
        serving_container_predict_route="/predict",
        project=PROJECT,
        location=LOCATION,
        sync=True,
    )
except:
    model = aiplatform.Model.upload(
        serving_container_image_uri=MODEL_IMAGE_URI,
        serving_container_health_route="/health",
        serving_container_predict_route="/predict",
        project=PROJECT,
        location=LOCATION,
        sync=True,
    )

# Create Vertex Endpoint

In [9]:
try:
    [endpoint] = aiplatform.Endpoint.list(
        filter=f'display_name="{IMAGE_NAME}_endpoint"',
        project=PROJECT,
        location=LOCATION,
    )
    print("Endpoint already exists")
except:
    endpoint = aiplatform.Endpoint.create(
        display_name=f"{IMAGE_NAME}_endpoint",
        project=PROJECT,
        location=LOCATION,
        sync=True,
    )
    print("Endpoint created:", endpoint.resource_name)

Endpoint already exists


# Deploy Model to Vertex Endpoint

In [10]:
deployed_model = model.deploy(
    endpoint=endpoint,
    traffic_percentage=100,
    machine_type="n1-standard-4",
    min_replica_count=1,
    max_replica_count=1,
    sync=True,
)

# Test Prediction on Endpoint

In [16]:
import time
import numpy as np

start_time = time.time()

test_prediction = deployed_model.predict(
    instances = [
        {
            "user_id": str(np.random.randint(1, 10_000)),
            "exclusions": ["Fargo (1996)"]
        }
        for _ in range(2_000)
    ],
    parameters = {
        "k": 5
    }
)

end_time = time.time()

print("Elapsed seconds:", end_time - start_time)

Elapsed seconds: 2.4627251625061035


In [15]:
prediction_num_display = min(10, len(test_prediction.predictions))

print(json.dumps(test_prediction.predictions[:prediction_num_display], indent=2))

[
  {
    "movie_scores": [
      0.37508824467659,
      0.27032870054245,
      0.2434602379798889,
      0.2323040068149567,
      0.2177023887634277
    ],
    "movie_titles": [
      "Devil's Own, The (1997)",
      "Contact (1997)",
      "Caro Diario (Dear Diary) (1994)",
      "As Good As It Gets (1997)",
      "George of the Jungle (1997)"
    ]
  },
  {
    "movie_titles": [
      "Devil's Own, The (1997)",
      "Contact (1997)",
      "Caro Diario (Dear Diary) (1994)",
      "As Good As It Gets (1997)",
      "George of the Jungle (1997)"
    ],
    "movie_scores": [
      0.37508824467659,
      0.27032870054245,
      0.2434602379798889,
      0.2323040068149567,
      0.2177023887634277
    ]
  },
  {
    "movie_scores": [
      0.37508824467659,
      0.27032870054245,
      0.2434602379798889,
      0.2323040068149567,
      0.2177023887634277
    ],
    "movie_titles": [
      "Devil's Own, The (1997)",
      "Contact (1997)",
      "Caro Diario (Dear Diary) (1994)",
