In [29]:
%pip install google-cloud-aiplatform google-cloud-storage scikit-learn xgboost fastapi uvicorn
# gcloud auth application-default login

/Users/Sam/git/ml_projects/.venv/bin/python3: No module named pip
Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import json
from pathlib import Path
import pickle
from datetime import datetime

import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.metrics import accuracy_score

from google.cloud import aiplatform
from google.cloud.aiplatform.gapic.schema import predict

In [2]:
%%markdown
## 1. Config

## 1. Config


In [3]:
PROJECT_ID = "project-04642f0b-576e-45fc-81f"           
LOCATION = "us-central1"                 
BUCKET_NAME = f"{PROJECT_ID}-vertex-models"
TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S")

MODEL_DISPLAY_NAME = f"fraud-xgb-{TIMESTAMP}"
ENDPOINT_DISPLAY_NAME = f"fraud-endpoint-{TIMESTAMP}"

# Artifact paths
ARTIFACT_URI = f"gs://{BUCKET_NAME}/{MODEL_DISPLAY_NAME}"
CONTAINER_IMAGE = f"us-docker.pkg.dev/{PROJECT_ID}/vertex-prediction/custom-fastapi:latest"

In [4]:
%%markdown
## 2. Train and Save a simple Model

## 2. Train and Save a simple Model


In [5]:
X, y = make_classification(
    n_samples=50_000, n_features=20, n_informative=15,
    random_state=42, class_sep=1.2
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = HistGradientBoostingClassifier(max_iter=200, random_state=42)
model.fit(X_train, y_train)

print("Train accuracy:", accuracy_score(y_train, model.predict(X_train)))
print("Test  accuracy:", accuracy_score(y_test,  model.predict(X_test)))

# Save model + preprocessor info
model_dir = Path("artifacts/model")
model_dir.mkdir(parents=True, exist_ok=True)

with open(model_dir / "model.pkl", "wb") as f:
    pickle.dump(model, f)

# Minimal metadata (Vertex AI likes to see this sometimes)
metadata = {"framework": "scikit-learn", "version": "1.5", "trained": TIMESTAMP}
with open(model_dir / "metadata.json", "w") as f:
    json.dump(metadata, f, indent=2)

Train accuracy: 0.9942
Test  accuracy: 0.9829


In [6]:
print(X)

[[-1.63657534  1.96651418  0.19538661 ... -0.64602269  0.13313867
   3.69264014]
 [-5.97280996  0.52032228 -0.84339535 ... -3.74705627  0.49234605
  -0.33216355]
 [-4.49936943  0.38124791  0.05331708 ... -1.22109478 -0.6326804
   2.90264548]
 ...
 [-3.6684972   1.36052215  0.84745957 ...  1.54544045 -1.420468
  -0.71083699]
 [ 6.20095328 -0.05635427  5.13914594 ...  1.19260612  0.68431307
  -3.45626169]
 [-5.51872527  6.44380414 -2.879291   ... -3.86224261 -0.57876872
  -0.76907594]]


In [7]:
%%markdown
## 3. Upload model artifacts to GCS

## 3. Upload model artifacts to GCS


In [8]:
!gcloud auth login

Your browser has been opened to visit:

    https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=32555940559.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8085%2F&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcloud-platform+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fappengine.admin+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fsqlservice.login+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcompute+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Faccounts.reauth&state=7hUsmpzofvUD068c6ANmhmW9IVcJDu&access_type=offline&code_challenge=dWtj06eUiP3nL6UM9pYu93XLsXLyhjhbAR-JY0wGy4w&code_challenge_method=S256


You are now logged in as [sam.aiexpert2023@gmail.com].
Your current project is [project-04642f0b-576e-45fc-81f].  You can change this setting by running:
  $ gcloud config set project PROJECT_ID


Updates are available for some Google Cloud CLI components.  To install them,
please run:
  $ gcloud compo

In [9]:
!gsutil -m cp -r artifacts/model/* gs://{BUCKET_NAME}/{MODEL_DISPLAY_NAME}/

If you experience problems with multiprocessing on MacOS, they might be related to https://bugs.python.org/issue33725. You can disable multiprocessing by editing your .boto config or by adding the following flag to your command: `-o "GSUtil:parallel_process_count=1"`. Note that multithreading is still available even if you disable multiprocessing.

Copying file://artifacts/model/metadata.json [Content-Type=application/json]...
Copying file://artifacts/model/model.pkl [Content-Type=application/octet-stream]...
- [2/2 files][740.0 KiB/740.0 KiB] 100% Done                                    
Operation completed over 2 objects/740.0 KiB.                                    


In [10]:
%%markdown
## 4. Create FastAPI serving application (prediction server)

## 4. Create FastAPI serving application (prediction server)


In [26]:
fastapi_code = """\
# app/main.py
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import pickle
import numpy as np
from typing import List

app = FastAPI(title="Vertex AI Custom Prediction")

with open("/model/model.pkl", "rb") as f:
    model = pickle.load(f)

class PredictRequest(BaseModel):
    instances: List[List[float]]

class PredictResponse(BaseModel):
    predictions: List[int]
    probabilities: List[List[float]]

@app.post("/predict", response_model=PredictResponse)
async def predict(body: PredictRequest):
    try:
        X = np.array(body.instances, dtype=np.float32)
        if X.ndim != 2:
            raise ValueError("Expected 2D array")

        proba = model.predict_proba(X).tolist()
        preds = model.predict(X).tolist()

        return {"predictions": preds, "probabilities": proba}
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))

@app.get("/health")
async def health():
    return {"status": "healthy"}
"""

with open("app/main.py", "w") as f:
    f.write(fastapi_code)

# Minimal requirements
with open("app/requirements.txt", "w") as f:
    f.write("""\
            fastapi==0.115.0
            uvicorn==0.30.6
            numpy==1.26.4
            scikit-learn==1.5.2
            """)

# Very simple Dockerfile
dockerfile = f"""\
FROM python:3.11-slim

WORKDIR /app

COPY /app/requirements.txt requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

COPY /app/main.py main.py

COPY /artifacts/model /model

CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
"""

with open("Dockerfile", "w") as f:
    f.write(dockerfile)

In [12]:
%%markdown
## 5. Build & push container (run these commands manually or via cloudbuild)

## 5. Build & push container (run these commands manually or via cloudbuild)


In [13]:
!gcloud services enable compute.googleapis.com --project {PROJECT_ID} --quiet
!gcloud services enable appengine.googleapis.com --project {PROJECT_ID} --quiet   # or whichever API it complained about

In [28]:
!gcloud auth application-default login

Your browser has been opened to visit:

    https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=764086051850-6qr4p6gpi6hn506pt8ejuq83di341hur.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8085%2F&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcloud-platform+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fsqlservice.login&state=BIzWxxETFsGpThHxyFGIJPuuEDp3rI&access_type=offline&code_challenge=BaeoRAea6YJImLnvInqhkz0MVtUduoxiypHnJlHVLgs&code_challenge_method=S256


Credentials saved to file: [/Users/Sam/.config/gcloud/application_default_credentials.json]

These credentials will be used by any library that requests Application Default Credentials (ADC).

Quota project "project-04642f0b-576e-45fc-81f" was added to ADC which can be used by Google client libraries for billing and quota. Note that some services may still bill the project owning the resource.


In [31]:
!gcloud artifacts repositories create vertex-prediction \
  --repository-format=docker \
  --location=us \
  --description="Repository for Vertex custom FastAPI images"


Create request issued for: [vertex-prediction]
Waiting for operation [projects/project-04642f0b-576e-45fc-81f/locations/us/ope
rations/19bb590e-52c1-461d-bee1-46b2d074d744] to complete...done.              
Created repository [vertex-prediction].


In [30]:
!gcloud builds submit --tag {CONTAINER_IMAGE} \
  --project {PROJECT_ID} \
  --timeout=20m \
  --quiet

Creating temporary archive of 11 file(s) totalling 840.4 KiB before compression.
Uploading tarball of [.] to [gs://project-04642f0b-576e-45fc-81f_cloudbuild/source/1770268358.837656-b0455d3227364abc9a9f8a43e4f2c910.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/project-04642f0b-576e-45fc-81f/locations/global/builds/9f78f12f-0427-4054-b484-aea338795364].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds/9f78f12f-0427-4054-b484-aea338795364?project=492144913879 ].
Waiting for build to complete. Polling interval: 1 second(s).
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "9f78f12f-0427-4054-b484-aea338795364"

FETCHSOURCE
Fetching storage object: gs://project-04642f0b-576e-45fc-81f_cloudbuild/source/1770268358.837656-b0455d3227364abc9a9f8a43e4f2c910.tgz#1770268359724222
Copying gs://project-04642f0b-576e-45fc-81f_cloudbuild/source/1770268358.837656-b0455d3227364abc9a9f8a43e4f2c910.tgz#1770268359724222

In [32]:
print("\nBuild & push the container with one of the commands above ↑\n")


Build & push the container with one of the commands above ↑



In [33]:
%%markdown
## 6. Upload model to Vertex AI Model Registry

## 6. Upload model to Vertex AI Model Registry


In [34]:
aiplatform.init(project=PROJECT_ID, location=LOCATION, staging_bucket=ARTIFACT_URI)

model = aiplatform.Model.upload(
    display_name=MODEL_DISPLAY_NAME,
    artifact_uri=ARTIFACT_URI,
    serving_container_image_uri=CONTAINER_IMAGE,
    serving_container_environment_variables={
        "GUNICORN_CMD_ARGS": "--timeout 120"
    },
    serving_container_ports=[8080],
    description="Fraud detection model - HistGradientBoosting"
)

print(f"Model uploaded: {model.resource_name}")

Creating Model
Create Model backing LRO: projects/492144913879/locations/us-central1/models/8520800599380328448/operations/9111021722525499392
Model created. Resource name: projects/492144913879/locations/us-central1/models/8520800599380328448@1
To use this Model in another session:
model = aiplatform.Model('projects/492144913879/locations/us-central1/models/8520800599380328448@1')
Model uploaded: projects/492144913879/locations/us-central1/models/8520800599380328448


In [35]:
print(f"Model uploaded: {model.resource_name}")

Model uploaded: projects/492144913879/locations/us-central1/models/8520800599380328448


In [37]:
%%markdown
## 7. Deploy to an endpoint

## 7. Deploy to an endpoint


In [None]:
endpoint = model.deploy(
    deployed_model_display_name=f"deployed-{TIMESTAMP}",
    machine_type="n1-standard-4",
    min_replica_count=1,
    max_replica_count=2,
    traffic_percentage=100,
)

print(f"Endpoint created: {endpoint.resource_name}")

Creating Endpoint
Create Endpoint backing LRO: projects/492144913879/locations/us-central1/endpoints/6321750955549261824/operations/8392134632006483968
Endpoint created. Resource name: projects/492144913879/locations/us-central1/endpoints/6321750955549261824
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/492144913879/locations/us-central1/endpoints/6321750955549261824')
Deploying model to Endpoint : projects/492144913879/locations/us-central1/endpoints/6321750955549261824
Deploy Endpoint model backing LRO: projects/492144913879/locations/us-central1/endpoints/6321750955549261824/operations/8226627345700618240


In [None]:
%%markdown
## 8. Test prediction

In [None]:
test_instances = [
    X_test[0].tolist(),
    X_test[1].tolist(),
    X_test[42].tolist(),
]

prediction = endpoint.predict(instances=test_instances)

print("\nPrediction result:")
print(json.dumps(prediction.predictions, indent=2))

# Or lower-level way (good for debugging)
instances_proto = [
    {"content": json.dumps(inst)} for inst in test_instances
]

response = endpoint.raw_predict(
    http_body=predict.HttpBody(
        data=json.dumps({"instances": test_instances}).encode("utf-8"),
        content_type="application/json"
    )
)

print("\nRaw response:")
print(response.data.decode("utf-8"))