In [1]:
library(glue)
library(IRdisplay)
library(reticulate)
library(jsonlite)

In [2]:
use_python(Sys.which("python3"))

Cloud blog: [Use R to train and deploy machine learning models on Vertex AI](https://cloud.google.com/blog/products/ai-machine-learning/train-and-deploy-ml-models-with-r-and-plumber-on-vertex-ai)


In [3]:
PROJECT_ID <- "cxb1-prj-test-no-vpcsc"
REGION <- "europe-west1"
BUCKET_URI <- glue("gs://{PROJECT_ID}-vertex-r")

DOCKER_REPO <- "vertex-r"
IMAGE_NAME <- "vertex-r-billingproj-logs"
IMAGE_TAG <- "latest"
IMAGE_URI <- glue("{REGION}-docker.pkg.dev/{PROJECT_ID}/{DOCKER_REPO}/{IMAGE_NAME}:{IMAGE_TAG}")

In [4]:
# Define function to execute shell commands
sh <- function(cmd, args = c(), intern = FALSE) {
  if (is.null(args)) {
    cmd <- glue(cmd)
    s <- strsplit(cmd, " ")[[1]]
    cmd <- s[1]
    if (length(s) > 1) {
      args <- s[2:length(s)]
    }
  }
  ret <- system2(cmd, args, stdout = TRUE, stderr = TRUE)
  if ("errmsg" %in% attributes(attributes(ret))$names) cat(attr(ret, "errmsg"), "\n")
  if (intern) return(ret) else cat(paste(ret, collapse = "\n"))
}

In [5]:
# Create staging bucket (only do this once)
#sh("gsutil mb -l {REGION} -p {PROJECT_ID} {BUCKET_URI}")

In [6]:
aiplatform <- import("google.cloud.aiplatform")
aiplatform$init(project = PROJECT_ID, location = REGION, staging_bucket = BUCKET_URI)

### Build container using Cloud Build

In [7]:
sh("gcloud builds submit --region={REGION} --tag={IMAGE_URI} --timeout=1h --billing-project={PROJECT_ID} --project={PROJECT_ID} --gcs-log-dir={BUCKET_URI}")

Creating temporary tarball archive of 20 file(s) totalling 49.1 KiB before compression.
Uploading tarball of [.] to [gs://cxb1-prj-test-no-vpcsc_cloudbuild/source/1668007839.88463-c29c4efba60543c58f518981dbc4e791.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/cxb1-prj-test-no-vpcsc/locations/europe-west1/builds/5458f674-484c-4087-b36d-f4814ab956a8].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds;region=europe-west1/5458f674-484c-4087-b36d-f4814ab956a8?project=1031952735253 ].
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "5458f674-484c-4087-b36d-f4814ab956a8"

FETCHSOURCE
Fetching storage object: gs://cxb1-prj-test-no-vpcsc_cloudbuild/source/1668007839.88463-c29c4efba60543c58f518981dbc4e791.tgz#1668007840614857
Copying gs://cxb1-prj-test-no-vpcsc_cloudbuild/source/1668007839.88463-c29c4efba60543c58f518981dbc4e791.tgz#1668007840614857...
/ [1 files][ 14.3 KiB/ 14.3 KiB]                           

### Create Vertex AI Managed Dataset

In [8]:
data_uri <- "gs://cloud-samples-data/ai-platform-unified/datasets/tabular/california-housing-tabular-regression.csv"
dataset <- aiplatform$TabularDataset$create(
  display_name = "California Housing Dataset",
  gcs_source = data_uri
)


### Create and run a Custom Training Job

In [64]:
# Create the training job
job <- aiplatform$CustomContainerTrainingJob(
  display_name = "vertex-r",
  container_uri = IMAGE_URI,
  command = c("Rscript", "train.R"),
  model_serving_container_command = c("Rscript", "serve.R"),
  model_serving_container_image_uri = IMAGE_URI,
  model_serving_container_predict_route= "/predict",
  model_serving_container_health_route="/health"
)


Run it:

In [65]:
model <- job$run(
  dataset=dataset,
  model_display_name = "vertex-r-model",
  machine_type = "n1-standard-4"
)


In [66]:
model

<google.cloud.aiplatform.models.Model object at 0x7f89ffe8b290> 
resource name: projects/1031952735253/locations/europe-west1/models/5799844671681200128

### Deploy the model to an Endpoint

First create an endpoint:

In [15]:
endpoint <- aiplatform$Endpoint$create(
  display_name = "California Housing Endpoint 11-09",
  project = PROJECT_ID,
  location = REGION
)

ENDPOINT_ID=endpoint$resource_name

Deploy the model to the endpoint:

In [16]:
model$deploy(endpoint = endpoint, machine_type = "n1-standard-4")

<google.cloud.aiplatform.models.Endpoint object at 0x7f89f9b57110> 
resource name: projects/1031952735253/locations/europe-west1/endpoints/7718694772289830912

Now test it!

In [55]:
url <- glue("https://{REGION}-aiplatform.googleapis.com/v1/{ENDPOINT_ID}:predict")
access_token <- sh("gcloud auth print-access-token", intern = TRUE)
data_file="data/instances.json"

In [63]:
sh('curl -X POST -H "Authorization: Bearer {access_token}" -H "Content-Type: application/json" {url} -d "@{data_file}"')

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  1125    0   307  100   818   2417   6440 --:--:-- --:--:-- --:--:--  8858
{
  "predictions": [
    430193.4402,
    392028.1078,
    414126.1372,
    340872.1893,
    299184.9333
  ],
  "deployedModelId": "4103852383883231232",
  "model": "projects/1031952735253/locations/europe-west1/models/5691758280624308224",
  "modelDisplayName": "vertex-r-model",
  "modelVersionId": "1"
}

### Deploy in batch: Vertex AI Batch Prediction

In [101]:
sh("gsutil cp data/batch_input.jsonl {BUCKET_URI}/data/batch_input.jsonl")

Copying file://batch_input.jsonl [Content-Type=application/octet-stream]...
/ [1 files][  803.0 B/  803.0 B]                                                
Operation completed over 1 objects/803.0 B.                                      

batch_pred_req = paste0('{
  "displayName": "vertex-r",
  "model": "', model$resource_name, '",
  "inputConfig": {
    "instancesFormat": "jsonl",
    "gcsSource": {
      "uris": [
        "', BUCKET_URI, "/data/batch_input.jsonl", '"
      ]
    },
  },
  "outputConfig": {
    "predictionsFormat": "jsonl",
    "gcsDestination": {
      "outputUriPrefix": "', BUCKET_URI, '/data/batch_pred_r_out/"
    }
  },
  "dedicatedResources": {
    "machineSpec": {
      "machineType": "n1-standard-32",
      "acceleratorCount": "0"
    },
    "startingReplicaCount": 1,
    "maxReplicaCount": 1
  }

}')

In [111]:
fileConn<-file("batch_pred_req.json")
writeLines(batch_pred_req, fileConn)
close(fileConn)

Send the request to start the job

In [None]:
access_token <- sh("gcloud auth print-access-token", intern = TRUE)
sh('curl -X POST -H "Authorization: Bearer {access_token}" -H "Content-Type: application/json; charset=utf-8"  -d @batch_pred_req.json https://{REGION}-aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/{REGION}/batchPredictionJobs')

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  1539    0   910  100   629   4789   3310 --:--:-- --:--:-- --:--:--  8057
{
  "name": "projects/1031952735253/locations/europe-west1/batchPredictionJobs/3811077726173921280",
  "displayName": "vertex-r",
  "model": "projects/1031952735253/locations/europe-west1/models/5799844671681200128",
  "inputConfig": {
    "instancesFormat": "jsonl",
    "gcsSource": {
      "uris": [
        "gs://cxb1-prj-test-no-vpcsc-vertex-r/data/batch_input.jsonl"
      ]
    }
  },
  "outputConfig": {
    "predictionsFormat": "jsonl",
    "gcsDestination": {
      "outputUriPrefix": "gs://cxb1-prj-test-no-vpcsc-vertex-r/data/batch_pred_r_out/"
    }
  },
  "dedicatedResources": {
    "machineSpec": {
      "machineType": "n1-standard-32"
    },
    "startingReplicaCount": 1,
    "maxReplicaCount": 1
  },
  "manualBatchTuningParameters": {},
  "st

In [125]:
display_html((glue("<font size=+1>Now select region <code>{REGION}</code> in the <a href='https://pantheon.corp.google.com/vertex-ai/batch-predictions?project={PROJECT_ID}'>Batch Prediction UI</a> in the Google Cloud Console to find the job.<p>It should take about 30 minutes to complete.")))