# Hugging Face Model Deployment to Vertex AI

This notebook guides you through deploying a Hugging Face model to Vertex AI. It covers:
1. Setting up your environment
2. Downloading and packaging your model
3. Building and pushing the Docker container
4. Deploying to Vertex AI

It uses environment variables that should be set from your infrastructure configuration provivded under `models/config/`.

Required environment variables:
```
GCP_PROJECT_ID            # Your GCP project ID
GCP_REGION               # Your GCP region
GCP_ARTIFACT_REGISTRY    # Artifact Registry repository
MODEL_ARTIFACTS_BUCKET   # GCS bucket for model artifacts
VERTEX_AI_ENDPOINT       # Vertex AI endpoint name
```

## Prerequisites
- Google Cloud SDK installed and configured
- Docker installed and running
- Required Python packages installed

In [None]:
# Install required packages
!pip install google-cloud-aiplatform huggingface-hub transformers torch google-cloud-storage

## 1. Configuration
Set the project root path:

In [2]:
import os
from pathlib import Path
import json
from typing import Dict
import sys

def setup_project_path():
    """Add project root to Python path by searching for .git directory"""
    current_path = Path.cwd()
    
    # Search up the directory tree for .git folder or pyproject.toml
    root_indicators = ['.git', 'pyproject.toml']
    
    while current_path != current_path.parent:
        if any((current_path / indicator).exists() for indicator in root_indicators):
            sys.path.append(str(current_path))
            return current_path
        current_path = current_path.parent
    
    raise RuntimeError(
        "Could not find project root. "
        "Please run this notebook from within the project directory."
    )

# Setup path
project_root = setup_project_path()
print(f"Project root detected at: {project_root}")

Project root detected at: /home/steffen/sign-language-translator


Set your project configuration and model details below:

In [3]:
import os
from pathlib import Path
import json
from typing import Dict
from models.vertex_ai import get_config

# Model-specific configuration that might change between deployments
model_config = {
    'model_id': 'openai/whisper-small',  # Hugging Face model ID
    'image_name': 'untrained-predictor',           # Docker image name
    'model_version': 'v1',                  # Model version
    'hf_task': 'text-to-speech',  # Hugging Face task type
}

# Load environment configuration
try:
    vertex_ai_config = get_config("dev") # Get vertex ai configuration for 'dev' environment
    
    # Access config properties directly
    config = {
        'environment': vertex_ai_config.environment,
        'project_id': vertex_ai_config.project_id,
        'region': vertex_ai_config.region,
        'artifact_registry_repo': vertex_ai_config.environment + '-' + vertex_ai_config.endpoint_name + '-repo',
        'artifacts_bucket': vertex_ai_config.project_id + '-' + vertex_ai_config.environment + '-' + vertex_ai_config.endpoint_name + '-artifacts',
        'endpoint_name': vertex_ai_config.endpoint_name,
        **model_config  # Add model-specific config
    }
    
    print("Configuration loaded successfully!")
    print("\nEnvironment settings:")
    print(json.dumps(config, indent=2))
except ValueError as e:
    print(f"Error: {e}")
    print("\nPlease set the required environment variables before continuing.")

Environment variables loaded from .env
Loading configuration from /home/steffen/sign-language-translator/models/vertex_ai/config/dev.yaml
Configuration loaded successfully!

Environment settings:
{
  "environment": "dev",
  "project_id": "sign-lang-translator-20241029",
  "region": "europe-west3",
  "artifact_registry_repo": "dev-vertex-ai-repo",
  "artifacts_bucket": "sign-lang-translator-20241029-dev-vertex-ai-artifacts",
  "endpoint_name": "vertex-ai",
  "model_id": "openai/whisper-small",
  "image_name": "untrained-predictor",
  "model_version": "v1",
  "hf_task": "text-to-speech"
}


## 2. Download and Package Model
Download the model from Hugging Face and package it for Vertex AI:

In [3]:
import os
from pathlib import Path
import tarfile
from transformers import AutoModel, AutoTokenizer
from huggingface_hub import login
from google.cloud import storage

def download_and_package_model(model_id: str, model_dir: str = './types/huggingface/model', skip_download: bool = False):
    """Download and package model, or use existing tar.gz if available"""
    tar_path = './types/huggingface/model.tar.gz'
    
    # If tar.gz exists and skip_download is False, use existing file
    if os.path.exists(tar_path) and not skip_download:
        print(f"Using existing {tar_path}")
        return tar_path
    
    # Otherwise, download and package the model
    os.makedirs(model_dir, exist_ok=True)
    
    print(f"Downloading model {model_id}...")
    model = AutoModel.from_pretrained(model_id)
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    
    print(f"Saving to {model_dir}")
    model.save_pretrained(model_dir)
    tokenizer.save_pretrained(model_dir)
    
    print("Creating tar.gz archive...")
    with tarfile.open(tar_path, "w:gz") as tar:
        tar.add(model_dir, arcname=".")
    
    return tar_path


def upload_to_gcs(file_path: str, bucket_name: str, model_version: str):
    from google.api_core import retry
    destination_blob_name = f"{model_version}/model.tar.gz"
    
    client = storage.Client()
    bucket = client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)
    
    print(f"Uploading to gs://{bucket_name}/{destination_blob_name}")
    
    # Configure retry with longer timeout
    retry_config = retry.Retry(
        initial=1.0,  # Initial delay in seconds
        maximum=60.0,  # Maximum delay between retries
        multiplier=2.0,  # Multiplier applied to delay between retries
        deadline=600.0  # Total timeout in seconds (10 minutes)
    )
    
    blob.upload_from_filename(
        file_path,
        retry=retry_config,
        timeout=600  # 10 minute timeout
    )
    
    return f"gs://{bucket_name}/{destination_blob_name}"

# Execute
if 'HUGGINGFACE_TOKEN' in os.environ:
    login(os.environ['HUGGINGFACE_TOKEN'])

tar_path = download_and_package_model(config['model_id'], skip_download=True)
artifacts_uri = upload_to_gcs(
    tar_path, 
    config['artifacts_bucket'],
    config['model_version']
)
print(f"\nModel artifacts uploaded to: {artifacts_uri}")

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/steffen/.cache/huggingface/token
Login successful
Downloading model openai/whisper-small...


2024-11-28 23:52:01.587346: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Saving to ./types/huggingface/model


Non-default generation parameters: {'max_length': 448, 'suppress_tokens': [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50360, 50361, 50362], 'begin_suppress_tokens': [220, 50257]}


Creating tar.gz archive...
Uploading to gs://sign-lang-translator-20241029-dev-vertex-ai-artifacts/v1/model.tar.gz

Model artifacts uploaded to: gs://sign-lang-translator-20241029-dev-vertex-ai-artifacts/v1/model.tar.gz


## 3. Build and Push Docker Container
Build the custom prediction routine container and push it to Artifact Registry:

In [6]:
import subprocess

def build_and_push_image(config):
    """Build and push the Docker image to Artifact Registry"""
    image_uri = f"{config['region']}-docker.pkg.dev/{config['project_id']}/{config['artifact_registry_repo']}/{config['image_name']}:latest"
    
    # Configure Docker authentication for Artifact Registry
    print("Configuring Docker authentication...")
    subprocess.run([
        "gcloud", "auth", "configure-docker",
        f"{config['region']}-docker.pkg.dev"
    ], check=True)
    
    # Get paths relative to the project root
    docker_path = project_root / "models" / "types" / "huggingface" / "docker"

    
    # Build command
    build_cmd = [
        "docker", "build",
        "-t", image_uri,
        "--build-arg", f"HF_TASK={config['hf_task']}",
        "--platform=linux/amd64",
        "-f", str(docker_path / "Dockerfile"),
        str(docker_path)
    ]
    
    print("Building Docker image...")
    subprocess.run(build_cmd, check=True)
    
    print("\nPushing Docker image...")
    subprocess.run(["docker", "push", image_uri], check=True)
    
    return image_uri

# Execute
container_image_uri = build_and_push_image(config)
print(f"\nContainer image available at: {container_image_uri}")

Configuring Docker authentication...



{
  "credHelpers": {
    "europe-west3-docker.pkg.dev": "gcloud"
  }
}
Adding credentials for: europe-west3-docker.pkg.dev
gcloud credential helpers already registered correctly.


Building Docker image...


#0 building with "default" instance using docker driver

#1 [internal] load build definition from Dockerfile
#1 transferring dockerfile: 483B done
#1 DONE 0.0s

#2 [internal] load metadata for docker.io/alvarobartt/torch-gpu:py310-cu12.3-torch-2.2.0
#2 DONE 0.9s

#3 [internal] load .dockerignore
#3 transferring context: 2B done
#3 DONE 0.1s

#4 [1/5] FROM docker.io/alvarobartt/torch-gpu:py310-cu12.3-torch-2.2.0@sha256:1d47d9917362fbfdf5e713a7408f81bdbfd8afb40518a8229c5a953aff991507
#4 DONE 0.0s

#5 [internal] load build context
#5 transferring context: 69B 0.0s done
#5 DONE 0.1s

#6 [2/5] WORKDIR /app
#6 CACHED

#7 [3/5] COPY requirements.txt .
#7 CACHED

#8 [4/5] COPY predictor.py .
#8 CACHED

#9 [5/5] RUN pip install --no-cache-dir -r requirements.txt     fastapi     "uvicorn[standard]"     google-cloud-aiplatform
#9 4.792 Collecting fastapi
#9 5.108   Downloading fastapi-0.115.5-py3-none-any.whl.metadata (27 kB)
#9 5.500 Collecting google-cloud-aiplatform
#9 5.537   Downloading goog


Pushing Docker image...
The push refers to repository [europe-west3-docker.pkg.dev/sign-lang-translator-20241029/dev-vertex-ai-repo/untrained-predictor]
7ba2e8f8ae1e: Preparing
389d4ac4ebd5: Preparing
7d8bb2ae266b: Preparing
17cf2dcd2e65: Preparing
34e017a0c676: Preparing
53ec940b113a: Preparing
4e2b998b3fee: Preparing
7d363b148ad5: Preparing
81f7adde8bea: Preparing
fa9c963de60f: Preparing
25b5af5e3767: Preparing
256d88da4185: Preparing
7d363b148ad5: Waiting
81f7adde8bea: Waiting
fa9c963de60f: Waiting
4e2b998b3fee: Waiting
25b5af5e3767: Waiting
256d88da4185: Waiting
53ec940b113a: Waiting
389d4ac4ebd5: Layer already exists
7d8bb2ae266b: Layer already exists
17cf2dcd2e65: Layer already exists
34e017a0c676: Layer already exists
7d363b148ad5: Layer already exists
53ec940b113a: Layer already exists
81f7adde8bea: Layer already exists
4e2b998b3fee: Layer already exists
fa9c963de60f: Layer already exists
256d88da4185: Layer already exists
25b5af5e3767: Layer already exists
7ba2e8f8ae1e: Pushe

## 4. Deploy to Vertex AI
Finally, deploy the model to a Vertex AI endpoint:

In [14]:
from google.cloud import aiplatform

# First, verify the exact GCS path
artifacts_uri = f"gs://{config['artifacts_bucket']}/{config['model_version']}"
print(f"Using artifacts URI: {artifacts_uri}")

# Then deploy using this verified path
def deploy_model(config, artifacts_uri, container_image_uri):
    # Initialize Vertex AI
    aiplatform.init(project=config['project_id'], location=config['region'])
    
    print("Uploading model to Vertex AI...")
    model = aiplatform.Model.upload(
        display_name=f"hf-{config['model_id'].split('/')[-1]}",
        artifact_uri=artifacts_uri,  # This should point to the directory containing model.tar.gz
        serving_container_image_uri=container_image_uri,
        serving_container_environment_variables={
            "HF_TASK": config['hf_task'],
            "VERTEX_CPR_WEB_CONCURRENCY": "1"
        }
    )
    
    print("\nFetching existing endpoint...")
    endpoint = aiplatform.Endpoint(
        endpoint_name=f"{config['environment']}-{config['endpoint_name']}-endpoint",  # Fixed string concatenation
        project=config['project_id'],
        location=config['region']
    )


    print("\nDeploying model to endpoint...")
    endpoint = model.deploy(
        endpoint=endpoint,
        machine_type="n1-standard-4",
        min_replica_count=1,
        max_replica_count=1,
    )
    
    return endpoint

# Execute
endpoint = deploy_model(config, artifacts_uri, container_image_uri)
print(f"\nModel deployed successfully!")
print(f"Endpoint: {endpoint.resource_name}")

Using artifacts URI: gs://sign-lang-translator-20241029-dev-vertex-ai-artifacts/v1
Uploading model to Vertex AI...
Creating Model
Create Model backing LRO: projects/788230573749/locations/europe-west3/models/3322486644627472384/operations/7468035258215038976
Model created. Resource name: projects/788230573749/locations/europe-west3/models/3322486644627472384@1
To use this Model in another session:
model = aiplatform.Model('projects/788230573749/locations/europe-west3/models/3322486644627472384@1')

Fetching existing endpoint...

Deploying model to endpoint...
Deploying model to Endpoint : projects/788230573749/locations/europe-west3/endpoints/dev-vertex-ai-endpoint
Deploy Endpoint model backing LRO: projects/788230573749/locations/europe-west3/endpoints/dev-vertex-ai-endpoint/operations/3254917806809939968


FailedPrecondition: 400 Model server exited unexpectedly. Model server logs can be found at https://console.cloud.google.com/logs/viewer?project=788230573749&resource=aiplatform.googleapis.com%2FEndpoint&advancedFilter=resource.type%3D%22aiplatform.googleapis.com%2FEndpoint%22%0Aresource.labels.endpoint_id%3D%22dev-vertex-ai-endpoint%22%0Aresource.labels.location%3D%22europe-west3%22. 9: Model server exited unexpectedly. Model server logs can be found at https://console.cloud.google.com/logs/viewer?project=788230573749&resource=aiplatform.googleapis.com%2FEndpoint&advancedFilter=resource.type%3D%22aiplatform.googleapis.com%2FEndpoint%22%0Aresource.labels.endpoint_id%3D%22dev-vertex-ai-endpoint%22%0Aresource.labels.location%3D%22europe-west3%22.

## 5. Test the Endpoint
Let's test the deployed model with a sample prediction:

In [None]:
import json
from google.cloud import aiplatform_v1
from google.api import httpbody_pb2

def test_prediction(project_id: str, location: str, endpoint_id: str, test_data: dict):
    client = aiplatform_v1.PredictionServiceClient(
        client_options={"api_endpoint": f"{location}-aiplatform.googleapis.com"}
    )
    
    endpoint = f"projects/{project_id}/locations/{location}/endpoints/{endpoint_id}"
    
    json_data = json.dumps(test_data)
    http_body = httpbody_pb2.HttpBody(
        data=json_data.encode("utf-8"),
        content_type="application/json",
    )
    
    request = aiplatform_v1.RawPredictRequest(
        endpoint=endpoint,
        http_body=http_body,
    )
    
    response = client.raw_predict(request)
    return json.loads(response.data)

# Test data for zero-shot classification
test_data = {
    "sequences": "I need help with my account login",
    "candidate_labels": ["account access", "billing", "technical issue", "general inquiry"]
}

# Get endpoint ID from the endpoint resource name
endpoint_id = endpoint.resource_name.split("/")[-1]

# Run test prediction
result = test_prediction(
    config['project_id'],
    config['region'],
    endpoint_id,
    test_data
)

print("Prediction result:")
print(json.dumps(result, indent=2))