In [None]:
from google.cloud import aiplatform
import json

# Input Parameters

In [None]:
import os

import dotenv
dotenv.load_dotenv()

LOCATION=os.environ['LOCATION']
PROJECT=os.environ['PROJECT']
REPO_NAME=os.environ['REPO_NAME']
IMAGE_NAME=os.environ['IMAGE_NAME']

MODEL_IMAGE_URI = f"{LOCATION}-docker.pkg.dev/{PROJECT}/{REPO_NAME}/{IMAGE_NAME}:latest"
MODEL_IMAGE_URI

# Configure authorization credentials for Artifact Registry

In [None]:
!gcloud auth configure-docker --quiet $LOCATION-docker.pkg.dev

# Create Artifact Registry Repository

Note: Only needs to be ran once

In [None]:
!gcloud artifacts repositories create $REPO_NAME \
    --repository-format=docker \
    --location=$LOCATION

# Build and upload container

In [None]:
!gcloud builds submit --config cloudbuild.yaml \
    --region=$LOCATION \
    --substitutions="_IMAGE_NAME=$MODEL_IMAGE_URI" 

# Test evaluation on LocalEndpoint

## Create Local Model

In [None]:
from google.cloud.aiplatform.prediction import LocalModel

local_model = LocalModel(
    serving_container_image_uri=MODEL_IMAGE_URI,
    serving_container_health_route='/health',
    serving_container_predict_route='/predict',
)

## "Deploy" model to local endpoint

In [None]:
local_endpoint = local_model.deploy_to_local_endpoint(
    gpu_count=0,
)

## Start serving endpoint and run health check

In [None]:
local_endpoint.serve()

health_check_response = local_endpoint.run_health_check()
print(health_check_response, health_check_response.content)

## Test prediction on local endpoint

In [None]:
from IPython.display import display, HTML

with open('request.json', 'r') as f:
    request = f.read()
    prediction_res = local_endpoint.predict(
        request=request, headers={"Content-Type": "application/json"}
    )

if prediction_res.status_code == 200:
    prediction = prediction_res.json()
    print(json.dumps(prediction, indent=2))
else:
    display(HTML(prediction_res.text))

## Stop serving local endpoint

In [None]:
local_endpoint.stop()

# Deploy prediction container to Model Registry

In [None]:
try:
    [parent_model] = aiplatform.Model.list(
        filter=f'display_name="{IMAGE_NAME}"',
        project=PROJECT,
        location=LOCATION,
    )
    model = aiplatform.Model.upload(
        parent_model=parent_model.resource_name,
        serving_container_image_uri=MODEL_IMAGE_URI,
        serving_container_health_route="/health",
        serving_container_predict_route="/predict",
        project=PROJECT,
        location=LOCATION,
        sync=True,
    )
except:
    model = aiplatform.Model.upload(
        serving_container_image_uri=MODEL_IMAGE_URI,
        serving_container_health_route="/health",
        serving_container_predict_route="/predict",
        project=PROJECT,
        location=LOCATION,
        sync=True,
    )

# Create Vertex Endpoint

In [None]:
try:
    [endpoint] = aiplatform.Endpoint.list(
        filter=f'display_name="{IMAGE_NAME}_endpoint"',
        project=PROJECT,
        location=LOCATION,
    )
    print("Endpoint already exists")
except:
    endpoint = aiplatform.Endpoint.create(
        display_name=f"{IMAGE_NAME}_endpoint",
        project=PROJECT,
        location=LOCATION,
        sync=True,
    )
    print("Endpoint created:", endpoint.resource_name)

# Deploy Model to Vertex Endpoint

In [None]:
deployed_model = model.deploy(
    endpoint=endpoint,
    traffic_percentage=100,
    machine_type="n1-standard-4",
    min_replica_count=1,
    max_replica_count=1,
    sync=True,
)

# Test Prediction on Endpoint

In [None]:
import time
import numpy as np

start_time = time.time()

test_prediction = deployed_model.predict(
    instances = [
        {
            "user_id": str(np.random.randint(1, 10_000)),
            "exclusions": ["Fargo (1996)"]
        }
        for _ in range(2_000)
    ],
    parameters = {
        "k": 5
    }
)

end_time = time.time()

print("Elapsed seconds:", end_time - start_time)

In [None]:
prediction_num_display = min(10, len(test_prediction.predictions))

print(json.dumps(test_prediction.predictions[:prediction_num_display], indent=2))