In [None]:
# Copyright 2024 Forusone
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Customer Container for prediction

### Configuration

In [290]:
! pip install --upgrade --quiet  google-cloud-aiplatform \
                                 google-cloud-storage

In [392]:
PROJECT_ID = "ai-hangsik" 
LOCATION = "us-central1" 
BUCKET_URI = f"gs://sllm_0116" 

MODEL_ARTIFACT_DIR = "custom-container-prediction"
REPOSITORY = "custom-container-repo"
IMAGE = "sklearn-fastapi-server"
MODEL_DISPLAY_NAME = "sklearn-fastapi-model"

SRC_DIR = "app"

In [292]:
import os
import sys
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=LOCATION, staging_bucket=BUCKET_URI)

### Build customer container

In [293]:
!pwd
!ls -al

/home/jupyter/llmOps_vertexAI/custom_container
total 148
drwxr-xr-x 4 jupyter jupyter  4096 Feb  5 07:23 .
drwxr-xr-x 6 jupyter jupyter  4096 Feb  3 09:54 ..
drwxr-xr-x 2 jupyter jupyter  4096 Feb  5 05:10 .ipynb_checkpoints
-rw-r--r-- 1 jupyter jupyter 34441 Feb  5 07:23 1.model_build.ipynb
-rw-r--r-- 1 jupyter jupyter 54405 Feb  5 07:23 2.custom_container.ipynb
-rw-r--r-- 1 jupyter jupyter 25964 Feb  3 09:42 3.deploy_model_vertexai.ipynb
-rw-r--r-- 1 jupyter jupyter   220 Feb  5 07:14 Dockerfile
drwxr-xr-x 2 jupyter jupyter  4096 Feb  5 07:21 app
-rw-r--r-- 1 jupyter jupyter    88 Feb  5 07:16 instances.json
-rw-r--r-- 1 jupyter jupyter    80 Feb  5 06:23 requirements.txt


In [393]:
%mkdir $SRC_DIR

mkdir: cannot create directory ‘app’: File exists


In [295]:
# Copy model to GCS
!gsutil cp app/model.pkl {BUCKET_URI}/{MODEL_ARTIFACT_DIR}/

Copying file://app/model.pkl [Content-Type=application/octet-stream]...
/ [1 files][  2.6 KiB/  2.6 KiB]                                                
Operation completed over 1 objects/2.6 KiB.                                      


#### Write application for HTTP Webserver using FastAPI()

In [394]:
%%writefile $SRC_DIR/main.py
from fastapi import FastAPI, Request

import joblib
import json
import numpy as np
import pickle
import os

from google.cloud import storage
from sklearn.datasets import load_iris

app = FastAPI()
gcs_client = storage.Client()

print(f"---------[ Environment ] --------------")
print(f"PORT:{os.environ['AIP_HTTP_PORT']}")
print(f"AIP_STORAGE_URI:{os.environ['AIP_STORAGE_URI']}")
print(f"AIP_HEALTH_ROUTE:{os.environ['AIP_HEALTH_ROUTE']}")
print(f"AIP_PREDICT_ROUTE:{os.environ['AIP_PREDICT_ROUTE']}")

# Download model file from GCS
with open("model.pkl", 'wb') as model_f:
    gcs_client.download_blob_to_file(
        f"{os.environ['AIP_STORAGE_URI']}/model.pkl", model_f
    )

# Load model file stored in local was downloaded from GCS
with open("model.pkl", "rb") as f:
    model = pickle.load(f)

_class_names = load_iris().target_names
_model = model

@app.get(os.environ['AIP_HEALTH_ROUTE'], status_code=200)
def health():
    print(f"Health Check : OK !!")

    return {"status":"OK"}

@app.post(os.environ['AIP_PREDICT_ROUTE'])
async def predict(request: Request):
    body = await request.json()
    print(f"Prediction request body : {body}")

    instances = body["instances"]
    inputs = np.asarray(instances)
    outputs = _model.predict(inputs)

    return {"predictions": [_class_names[class_num] for class_num in outputs]}


Writing app/main.py


In [395]:
# %%writefile app/prestart.sh
# #!/bin/bash
# export PORT=$AIP_HTTP_PORT
# export WORKER_THREADS=$WORKER_THREADS

Overwriting app/prestart.sh


In [518]:
%%writefile app/entrypoint.sh
#!/bin/bash

export HOST=$AIP_HTTP_HOST
export PORT=$AIP_HTTP_PORT
export WORKER_THREADS=$WORKER_THREADS

uvicorn app.main:app --host=$HOST --port=$PORT --workers=$WORKER_THREADS


Overwriting app/entrypoint.sh


In [519]:
!chmod +x app/entrypoint.sh 

#### Build a Docker file.

In [520]:
%%writefile Dockerfile

FROM python:3.9

COPY ./app /app
COPY requirements.txt requirements.txt

RUN pip install -r requirements.txt

ENTRYPOINT ["/app/entrypoint.sh"]


Overwriting Dockerfile


In [521]:
%%writefile requirements.txt
numpy
scikit-learn
google-cloud-storage
uvicorn[standard]
gunicorn
fastapi[all]

Overwriting requirements.txt


In [522]:
! docker build --tag="{LOCATION}-docker.pkg.dev/{PROJECT_ID}/{REPOSITORY}/{IMAGE}" .

Sending build context to Docker daemon  200.2kB
Step 1/5 : FROM python:3.9
 ---> bb95474bc3b1
Step 2/5 : COPY ./app /app
 ---> 86b72602eafc
Step 3/5 : COPY requirements.txt requirements.txt
 ---> 716e7399690a
Step 4/5 : RUN pip install -r requirements.txt
 ---> Running in cd50d3142d63
Collecting numpy
  Downloading numpy-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (19.5 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 19.5/19.5 MB 93.2 MB/s eta 0:00:00
Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.5 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 13.5/13.5 MB 110.4 MB/s eta 0:00:00
Collecting google-cloud-storage
  Downloading google_cloud_storage-3.0.0-py2.py3-none-any.whl (173 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 173.9/173.9 kB 38.8 MB/s eta 0:00:00
Collecting uvicorn[standard]
  Downloading uvicorn-0.34.0-py3-none-any.whl (62 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 62.3/

In [532]:
! docker images

REPOSITORY                                                                                                                TAG       IMAGE ID       CREATED             SIZE
us-central1-docker.pkg.dev/ai-hangsik/custom-inference-gpu/tgi-release                                                    latest    c926fe3a73f7   57 minutes ago      15.2GB
us-central1-docker.pkg.dev/ai-hangsik/custom-inference-gpu/vllm-release                                                   latest    545a407ea139   About an hour ago   26.4GB
us-central1-docker.pkg.dev/ai-hangsik/custom-container-repo/sklearn-fastapi-server                                        latest    28e1a768aebd   5 hours ago         1.42GB
<none>                                                                                                                    <none>    bcf670dcddec   5 hours ago         1.42GB
<none>                                                                                                                    <none>    

In [524]:
!docker stop local-iris

local-iris


In [525]:
!docker rm local-iris

local-iris


In [526]:
! docker run -d -p 80:8080 \
        --name=local-iris \
        --env AIP_HTTP_HOST=0.0.0.0 \
        --env AIP_HTTP_PORT=8080 \
        --env AIP_HEALTH_ROUTE=/health \
        --env AIP_PREDICT_ROUTE=/predict \
        --env AIP_STORAGE_URI={BUCKET_URI}/{MODEL_ARTIFACT_DIR} \
        --env WORKER_THREADS=4 \
        "{LOCATION}-docker.pkg.dev/{PROJECT_ID}/{REPOSITORY}/{IMAGE}"

bddc260c95e5bd52e64e15bfe098a040fa3effc340c72afce5fcaf7846161a2f


In [528]:
! docker ps -a

CONTAINER ID   IMAGE                                                                                COMMAND                  CREATED             STATUS                           PORTS                                   NAMES
bddc260c95e5   us-central1-docker.pkg.dev/ai-hangsik/custom-container-repo/sklearn-fastapi-server   "/app/entrypoint.sh"     6 seconds ago       Up 4 seconds                     0.0.0.0:80->8080/tcp, :::80->8080/tcp   local-iris
4addae9004b4   ddf5e5afc02a                                                                         "/bin/sh -c /app/pre…"   56 minutes ago      Exited (126) 56 minutes ago                                              xenodochial_dijkstra
31b3d6584e15   ddf5e5afc02a                                                                         "/bin/sh -c app/pres…"   About an hour ago   Exited (126) About an hour ago                                           stupefied_gates
291d1c5fb19b   59c85a25256b                                               

In [529]:
!docker logs bddc260c95e5

INFO:     Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)
INFO:     Started parent process [7]
INFO:     Started server process [11]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Started server process [12]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Started server process [10]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Started server process [9]
INFO:     Waiting for application startup.
INFO:     Application startup complete.


#### Test Health check and prediction

In [530]:
! curl localhost/health

{"status":"OK"}

In [531]:
%%writefile instances.json
{
    "instances": [
        [6.7, 3.1, 4.7, 1.5],
        [4.6, 3.1, 1.5, 0.2]
    ]
}

Overwriting instances.json


In [429]:
! curl -X POST \
  -d @instances.json \
  -H "Content-Type: application/json; charset=utf-8" \
  localhost/predict

{"predictions":["versicolor","setosa"]}

#### Manage Docker images

In [280]:
!docker stop local-iris

local-iris


In [281]:
!docker rm local-iris

local-iris


In [540]:
!docker images

REPOSITORY                                                                                                                TAG       IMAGE ID       CREATED        SIZE
us-central1-docker.pkg.dev/ai-hangsik/custom-inference-gpu/tgi-release                                                    latest    c926fe3a73f7   2 hours ago    15.2GB
us-central1-docker.pkg.dev/ai-hangsik/custom-container-repo/sklearn-fastapi-server                                        latest    28e1a768aebd   5 hours ago    1.42GB
us-central1-docker.pkg.dev/ai-hangsik/cpr-handler-prediction/cpr-handler-server                                           latest    56365fd66f7a   39 hours ago   1.53GB
us-central1-docker.pkg.dev/ai-hangsik/custom-inference-gpu/tgi-release                                                    <none>    8969912fc0db   4 days ago     15.2GB
us-docker.pkg.dev/deeplearning-platform-release/gcr.io/huggingface-text-generation-inference-cu124.2-4.ubuntu2204.py311   latest    9e59d29d5e8a   3 weeks ag

In [533]:
!docker rmi -f 545a407ea139

Untagged: us-central1-docker.pkg.dev/ai-hangsik/custom-inference-gpu/vllm-release:latest
Deleted: sha256:545a407ea1395f7352425d573c219739c5381d2113fe2d9fc9d61ffde4629818
Deleted: sha256:d7aeb408dc70f1ba872e24180cd9307362b7294d5394bb43ab30f37ccc7a2859
Deleted: sha256:bde324ddea3605282cc1a50957457b763b3b449246dd37c54bb99a63688cd817


In [539]:
!docker rmi -f $(docker images | grep "^<none>" | awk "{print $3}")

"docker rmi" requires at least 1 argument.
See 'docker rmi --help'.

Usage:  docker rmi [OPTIONS] IMAGE [IMAGE...]

Remove one or more images
