In [6]:
# Create directories
USER_SRC_DIR = "src_dir_sdk"

In [7]:
%%writefile $USER_SRC_DIR/requirements.txt
xgboost==1.6.2
google-cloud-aiplatform==1.25.0
fastapi
numpy
uvicorn==0.17.6
pandas==1.3.5
joblib==1.1.0
scikit-learn==1.0.2
google-cloud-storage>=1.26.0,<2.0.0
shapely==1.8.5.post1 
pygeos==0.12.0 
geopandas==0.10.2

Overwriting src_dir_sdk/requirements.txt


In [8]:
import os
import logging
import pandas as pd
import joblib
import xgboost as xgb
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, precision_recall_curve
from google.cloud import aiplatform
from google.cloud.aiplatform.prediction import LocalModel


In [9]:

# Set up Google Cloud project and bucket
PROJECT_ID = "henry-scien"  # @param {type:"string"}
REGION = "us-central1"
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
MODEL_ARTIFACT_DIR = "xgboost-model-sdk"
REPOSITORY = "custom-container-prediction-sdk"
IMAGE = "xgboost-server-sdk"
MODEL_DISPLAY_NAME = "xgboost-model-sdk"
BUCKET_NAME = "xgboost_new_-henry-scien-unique"
BUCKET_URI = f"gs://{BUCKET_NAME}"


LOCAL_MODEL_ARTIFACTS_DIR = "model_artifacts"
%mkdir $USER_SRC_DIR
%mkdir $LOCAL_MODEL_ARTIFACTS_DIR




# Train and export the model
url = "https://raw.githubusercontent.com/prins2516/Dataset/main/datasets_228_482_diabetes.csv"
raw_data = pd.read_csv(url)
X = raw_data.drop(['Outcome'], axis=1)
y = raw_data['Outcome']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
xgbc = xgb.XGBClassifier(n_estimators=100, learning_rate=0.1, gamma=0, subsample=0.5, colsample_bytree=1, max_depth=8)
xgbc.fit(X_train, y_train)

predictions = xgbc.predict(X_test)
score = accuracy_score(y_test, predictions)
auc = roc_auc_score(y_test, predictions)
precision_recall_curve(y_test, predictions)

os.makedirs("model_artifacts", exist_ok=True)
joblib.dump(xgbc, "model_artifacts/model.joblib")

# Upload model artifacts to GCS
!gsutil cp model_artifacts/* $BUCKET_URI/$MODEL_ARTIFACT_DIR/
!gsutil ls $BUCKET_URI/$MODEL_ARTIFACT_DIR/

# Build the custom container


mkdir: cannot create directory ‘src_dir_sdk’: File exists
mkdir: cannot create directory ‘model_artifacts’: File exists
Copying file://model_artifacts/model.joblib [Content-Type=application/octet-stream]...
/ [1 files][184.5 KiB/184.5 KiB]                                                
Operation completed over 1 objects/184.5 KiB.                                    
gs://xgboost_new_-henry-scien-unique/xgboost-model-sdk/model.joblib


In [12]:
%%writefile src_dir_sdk/predictor.py
import joblib
import numpy as np
 
from google.cloud.aiplatform.utils import prediction_utils
from google.cloud.aiplatform.prediction.predictor import Predictor
class SklearnPredictor(Predictor):
    def __init__(self):
        return
    def load(self, artifacts_uri: str):
        prediction_utils.download_model_artifacts(artifacts_uri)
        self._model = joblib.load("model.joblib")
 
    def preprocess(self, prediction_input: dict) -> np.ndarray:
        instances = prediction_input["instances"]
        return np.asarray(instances)
 
    def predict(self, instances: np.ndarray) -> np.ndarray:
        return self._model.predict(instances)
 
    def postprocess(self, prediction_results: np.ndarray) -> dict:
        return {"predictions": prediction_results.tolist()}


Overwriting src_dir_sdk/predictor.py


In [13]:
from src_dir_sdk.predictor import SklearnPredictor

aiplatform.init(project=PROJECT_ID, location=REGION)

local_model = LocalModel.build_cpr_model(
    USER_SRC_DIR,
    f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{REPOSITORY}/{IMAGE}",
    predictor=SklearnPredictor,
    requirements_path=os.path.join(USER_SRC_DIR, "requirements.txt"),
)



  self.stdin = io.open(p2cwrite, 'wb', bufsize)
  self.stdout = io.open(c2pread, 'rb', bufsize)


In [14]:
import json

sample = {"instances": [[6, 140, 72, 35, 0, 33.6, 0.62, 48]]}

with open('instances.json', 'w') as fp:
    json.dump(sample, fp)
    
with local_model.deploy_to_local_endpoint(
    artifact_uri = 'model_artifacts/', # local path to artifacts
) as local_endpoint:
    predict_response = local_endpoint.predict(
        request_file='instances.json',
        headers={"Content-Type": "application/json"},
    )

    health_check_response = local_endpoint.run_health_check()


In [15]:
predict_response.content

b'{"predictions": [1]}'

In [16]:

# Push the image to Artifact Registry
!gcloud artifacts repositories create {REPOSITORY} --repository-format=docker \
--location={REGION} --description="Docker repository"
!gcloud auth configure-docker {REGION}-docker.pkg.dev --quiet
local_model.push_image()




Create request issued for: [custom-container-prediction-sdk]
Waiting for operation [projects/henry-scien/locations/us-central1/operations/a7
04b8ee-9f1f-49e3-a262-a08b113c3a1e] to complete...done.                        
Created repository [custom-container-prediction-sdk].

{
  "credHelpers": {
    "gcr.io": "gcloud",
    "us.gcr.io": "gcloud",
    "eu.gcr.io": "gcloud",
    "asia.gcr.io": "gcloud",
    "staging-k8s.gcr.io": "gcloud",
    "marketplace.gcr.io": "gcloud"
  }
}
Adding credentials for: us-central1-docker.pkg.dev
Docker configuration file updated.


  self.stdin = io.open(p2cwrite, 'wb', bufsize)
  self.stdout = io.open(c2pread, 'rb', bufsize)


In [19]:
# Upload the model to Vertex AI Model Registry
model = aiplatform.Model.upload(local_model=local_model,
                                display_name=MODEL_DISPLAY_NAME,
                                artifact_uri=f"{BUCKET_URI}/{MODEL_ARTIFACT_DIR}")

# Deploy the model for online predictions
endpoint = model.deploy(machine_type="n1-standard-2")

'us-central1-docker.pkg.dev/henry-scien/custom-container-prediction-sdk/xgboost-server-sdk'

In [2]:
f"{BUCKET_URI}/{MODEL_ARTIFACT_DIR}"

NameError: name 'BUCKET_URI' is not defined

In [None]:
us-central1-docker.pkg.dev/henry-scien/custom-container-prediction-sdk/xgboost-server-sdk