In [None]:
# Copyright 2024 Forusone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Build CPR Model Server and Handler

1. Model Server
    * HTTP server that hosts the model.
    * Responsible for setting up routes/ports/etc.
2. Request Handler
    * Responsible for webserver aspects of handling a request, such as deserializing the request body, serializing the response, setting response headers, etc.
    * In this example, we will use the default Handler, google.cloud.aiplatform.prediction.handler.PredictionHandler provided in the SDK.
3. Predictor
    * Responsible for the ML logic for processing a prediction request.

In [1]:
!pwd

/home/jupyter/llmOps_vertexAI/cpr_handler


### Configuration

In [3]:
! pip install --upgrade --quiet  google-cloud-aiplatform \
                                 google-cloud-storage

In [4]:
PROJECT_ID = "ai-hangsik" 
LOCATION = "us-central1" 
BUCKET_URI = f"gs://sllm_0116" 

In [5]:
import os
import sys
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=LOCATION, staging_bucket=BUCKET_URI)

In [6]:
MODEL_ARTIFACT_DIR = "cpr-handler-model"
REPOSITORY = "cpr-handler-prediction"
IMAGE = "cpr-handler-server"
MODEL_DISPLAY_NAME = "cpr-handler-model"
USER_SRC_DIR = "app"

### CPR source code.

In [70]:
%%writefile $USER_SRC_DIR/predictor.py

import numpy as np
import pickle

from google.cloud.aiplatform.prediction.predictor import Predictor
from google.cloud.aiplatform.utils import prediction_utils

from sklearn.datasets import load_iris

class CprPredictor(Predictor):
    
    def __init__(self):
        return
    
    def load(self, artifacts_uri: str):

        print("Load start!!!")
        prediction_utils.download_model_artifacts(artifacts_uri)

        with open(f"model.pkl", "rb") as model_f:
            self._model = pickle.load(model_f)

        self._class_names = load_iris().target_names

        print(f"Model : {self._model}")
    
    def predict(self, instances):
        """Performs prediction."""
        inputs = np.asarray(instances)

        print(f"Inputs: {inputs}")
        outputs = self._model.predict(inputs)
        
        return {"predictions": [self._class_names[class_num] for class_num in outputs]}

Overwriting app/predictor.py


In [71]:
%%writefile $USER_SRC_DIR/handler.py

import csv
from io import StringIO
import json
from fastapi import Response
from google.cloud.aiplatform.prediction.handler import PredictionHandler

class CprHandler(PredictionHandler):
    """Default prediction handler for the prediction requests sent to the application."""

    async def handle(self, request):
        """Handles a prediction request."""
        request_body = await request.body()
        
        print(f"request_body : {request_body}")
        
        prediction_instances = self._convert_csv_to_list(request_body)
        prediction_results = self._predictor.postprocess(
            self._predictor.predict(self._predictor.preprocess(prediction_instances))
        )
        
        print(f"prediction_results : {prediction_results}")
        
        return Response(content=json.dumps(prediction_results))
    
    def _convert_csv_to_list(self, data):
        """Converts list of string in csv format to list of float.
        
        Example input:
          b"1.1,2.2,3.3,4.4\n2.3,3.4,4.5,5.6\n"
          
        Example output:
            [ [1.1, 2.2, 3.3, 4.4],[2.3, 3.4, 4.5, 5.6],]
        """
        res = []
        for r in csv.reader(StringIO(data.decode("utf-8")), quoting=csv.QUOTE_NONNUMERIC):
            res.append(r)
        print(f"res : {res}")

        return res

Overwriting app/handler.py


In [72]:
%%writefile $USER_SRC_DIR/requirements.txt
numpy
scikit-learn
google-cloud-storage

Overwriting app/requirements.txt


### Build and serve CPR model

In [73]:

import os

from google.cloud.aiplatform.prediction import LocalModel
from app.handler import CprHandler  # Custom predictor class
from app.predictor import CprPredictor  # Custom Handler class

# https://cloud.google.com/python/docs/reference/aiplatform/1.19.1/google.cloud.aiplatform.prediction.LocalModel#google_cloud_aiplatform_prediction_LocalModel_build_cpr_model

local_model = LocalModel.build_cpr_model(
    USER_SRC_DIR,
    f"{LOCATION}-docker.pkg.dev/{PROJECT_ID}/{REPOSITORY}/{IMAGE}",
    predictor=CprPredictor,  # custom predictor class.
    handler=CprHandler,  # custom handler class.
    requirements_path=os.path.join(USER_SRC_DIR, "requirements.txt"),
)

  self.stdin = io.open(p2cwrite, 'wb', bufsize)
  self.stdout = io.open(c2pread, 'rb', bufsize)


In [74]:
local_model.get_serving_container_spec()

image_uri: "us-central1-docker.pkg.dev/ai-hangsik/cpr-handler-prediction/cpr-handler-server"
predict_route: "/predict"
health_route: "/health"

In [81]:
import json

# deploy_to_local_endpoint
# https://cloud.google.com/python/docs/reference/aiplatform/1.19.1/google.cloud.aiplatform.prediction.LocalModel#google_cloud_aiplatform_prediction_LocalModel_deploy_to_local_endpoint

local_endpoint = local_model.deploy_to_local_endpoint(
        artifact_uri=f"{BUCKET_URI}/{MODEL_ARTIFACT_DIR}",
        # artifact_uri = "app/",
        container_ready_timeout = 600,
)

In [85]:
local_endpoint.serve()
local_endpoint.get_container_status()

'running'

In [87]:
health_check_response = local_endpoint.run_health_check()
health_check_response

<Response [200]>

  #### Test local endpoint  

In [23]:
INPUT_FILE = "instances.csv"

In [24]:
%%writefile $INPUT_FILE
6.7,3.1,4.7,1.5
4.6,3.1,1.5,0.2

Overwriting instances.csv


In [91]:
#Run some code here to test monitor
predict_response = local_endpoint.predict(
    request_file=INPUT_FILE,
    headers={"Content-Type": "application/json"},
)
print(predict_response.content)
local_endpoint.print_container_logs()


b'{"predictions": ["virginica", "virginica"]}'


In [86]:
local_endpoint.print_container_logs(show_all=True)

In [78]:
local_endpoint.print_container_logs_if_container_is_not_running(show_all=True)

In [None]:
local_endpoint.stop()
local_endpoint.get_container_status()

### Deploy local model on Vertex AI

#### Push local model to artifact repository

In [93]:
!gcloud services enable artifactregistry.googleapis.com

In [94]:
!gcloud artifacts repositories create {REPOSITORY} \
    --repository-format=docker \
    --location=$LOCATION

Create request issued for: [cpr-handler-prediction]
Waiting for operation [projects/ai-hangsik/locations/us-central1/operations/469
62fe3-70c5-4bdf-a4a6-cbdda0f906be] to complete...done.                         
Created repository [cpr-handler-prediction].


In [95]:
!gcloud auth configure-docker {LOCATION}-docker.pkg.dev --quiet


{
  "credHelpers": {
    "gcr.io": "gcloud",
    "us.gcr.io": "gcloud",
    "eu.gcr.io": "gcloud",
    "asia.gcr.io": "gcloud",
    "staging-k8s.gcr.io": "gcloud",
    "marketplace.gcr.io": "gcloud",
    "us-central1-docker.pkg.dev": "gcloud"
  }
}
Adding credentials for: us-central1-docker.pkg.dev
gcloud credential helpers already registered correctly.


In [96]:
local_model.push_image()

  self.stdin = io.open(p2cwrite, 'wb', bufsize)
  self.stdout = io.open(c2pread, 'rb', bufsize)


#### Upload and deploy model on Vertex AI

In [98]:
model = aiplatform.Model.upload(
    local_model=local_model,
    display_name=MODEL_DISPLAY_NAME,
    artifact_uri=f"{BUCKET_URI}/{MODEL_ARTIFACT_DIR}",
)

Creating Model
Create Model backing LRO: projects/721521243942/locations/us-central1/models/8566845947328331776/operations/4049362683554693120
Model created. Resource name: projects/721521243942/locations/us-central1/models/8566845947328331776@1
To use this Model in another session:
model = aiplatform.Model('projects/721521243942/locations/us-central1/models/8566845947328331776@1')


In [99]:
deployed_model = model.deploy(
    endpoint=aiplatform.Endpoint.create(display_name="cpr-handler-model-endpoint"),
    machine_type="n1-standard-4"
)

Creating Endpoint
Create Endpoint backing LRO: projects/721521243942/locations/us-central1/endpoints/6570251578542915584/operations/403698785198276608
Endpoint created. Resource name: projects/721521243942/locations/us-central1/endpoints/6570251578542915584
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/721521243942/locations/us-central1/endpoints/6570251578542915584')
Deploying model to Endpoint : projects/721521243942/locations/us-central1/endpoints/6570251578542915584
Deploy Endpoint model backing LRO: projects/721521243942/locations/us-central1/endpoints/6570251578542915584/operations/6265133700220977152
Endpoint model deployed. Resource name: projects/721521243942/locations/us-central1/endpoints/6570251578542915584


### Test model deployed on Vertex AI

#### Python Test

In [102]:
ENDPOINT_RESOURCE_NAME = deployed_model.resource_name
ENDPOINT_RESOURCE_NAME

'projects/721521243942/locations/us-central1/endpoints/6570251578542915584'

In [104]:
from google.api import httpbody_pb2
from google.cloud import aiplatform_v1

prediction_client = aiplatform_v1.PredictionServiceClient(
    client_options={"api_endpoint": f"{LOCATION}-aiplatform.googleapis.com"}
)

with open(INPUT_FILE) as f:
    http_body = httpbody_pb2.HttpBody(
        data=f.read().encode("utf-8"),
        content_type="text/csv",
    )

request = aiplatform_v1.RawPredictRequest(
    endpoint=ENDPOINT_RESOURCE_NAME,
    http_body=http_body,
)

prediction_client.raw_predict(request=request)
     

data: "{\"predictions\": [\"virginica\", \"virginica\"]}"

#### HTTP curl test

In [106]:
ENDPOINT_ID = deployed_model.name
ENDPOINT_ID

'6570251578542915584'

In [108]:
! curl \
-H "Authorization: Bearer $(gcloud auth print-access-token)" \
-H "Content-Type: text/csv" \
--data-binary @instances.csv \
https://{LOCATION}-aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/{LOCATION}/endpoints/{ENDPOINT_ID}:rawPredict


{"predictions": ["virginica", "virginica"]}

#### gcloud CLI test

In [113]:
!gcloud ai endpoints raw-predict $ENDPOINT_ID \
  --region=$LOCATION \
  --http-headers=Content-Type=text/csv \
  --request=@$INPUT_FILE

Using endpoint [https://us-central1-aiplatform.googleapis.com/]
{"predictions": ["virginica", "virginica"]}