# Installing Google Cloud dependencies

In [87]:
try:
    import google.cloud.service_usage_v1
except ImportError:
    print('You need to pip install google-cloud-service-usage')
    !pip install google-cloud-service-usage -q

In [88]:
try:
    import google.cloud.artifactregistry_v1
except ImportError:
    print('You need to pip install google-cloud-artifact-registry')
    !pip install google-cloud-artifact-registry -q

In [89]:
try:
    import google.cloud.devtools.cloudbuild
except ImportError:
    print('You need to pip install google-cloud-build')
    !pip install google-cloud-build

In [90]:
from google.cloud import aiplatform
from datetime import datetime
import pkg_resources
from IPython.display import Markdown as md
from google.cloud import service_usage_v1
from google.cloud.devtools import cloudbuild_v1
from google.cloud import artifactregistry_v1
from google.cloud import storage
from google.cloud import bigquery
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value
import json
import numpy as np
import pandas as pd

# Setup

In [91]:
project = !gcloud config get-value project
PROJECT_ID = project[0]
PROJECT_ID

'tymestack-439300'

In [92]:
REGION = 'us-central1'
EXPERIMENT = '01'
SERIES = '01'

# source data
BQ_PROJECT = PROJECT_ID
BQ_DATASET = 'housing'
BQ_TABLE = 'housing'

BASE_IMAGE = "gcr.io/deeplearning-platform-release/xgboost-cpu"
SCRIPT_PATH = 'trainer/train.py'
DOCKERFILE = 'Dockerfile'

HEAD_COMPUTE = 'n1-standard-16'
WORKER_COMPUTE = 'n1-standard-4'
DEPLOY_COMPUTE = 'n1-standard-8'

In [93]:
from google.cloud import bigquery
from google.cloud import storage
from google.cloud import aiplatform

In [94]:
aiplatform.init(project=PROJECT_ID, location=REGION)
bq = bigquery.Client(project=PROJECT_ID)
gcs = storage.Client(project=PROJECT_ID)
su_client = service_usage_v1.ServiceUsageClient()
ar_client = artifactregistry_v1.ArtifactRegistryClient()
cb_client = cloudbuild_v1.CloudBuildClient()

In [95]:
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
BUCKET = PROJECT_ID
URI = f"gs://{BUCKET}/{SERIES}/{EXPERIMENT}"
DIR = f"temp/{EXPERIMENT}"

In [96]:
SERVICE_ACCOUNT = !gcloud config list --format='value(core.account)' 
SERVICE_ACCOUNT = SERVICE_ACCOUNT[0]
SERVICE_ACCOUNT

'652637980750-compute@developer.gserviceaccount.com'

# Traning with RAY (autoscaling)

Installing dependencies for RAY-based training

In [11]:
!pip install --upgrade xgboost_ray ray tensorboardX google.cloud.aiplatform



In [12]:
from google.cloud import aiplatform
import vertex_ray
from vertex_ray import AutoscalingSpec, Resources
import xgboost_ray as xgb_ray
from ray import tune
import pickle
import ray
import sys
from google.cloud import bigquery
from ray.air.config import RunConfig, ScalingConfig
from ray.air import CheckpointConfig
from ray.train.xgboost import XGBoostTrainer
import pandas as pd
from ray.tune.tuner import Tuner, TuneConfig
from ray.tune.search.bayesopt import BayesOptSearch

Initializing RAY runtime with 28 CPUs using for training

In [13]:
ray.shutdown()
ray.init(num_cpus=28)

2024-10-23 06:05:01,992	INFO worker.py:1777 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


0,1
Python version:,3.10.15
Ray version:,2.37.0
Dashboard:,http://127.0.0.1:8265




[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m View detailed results here: tymestack-439300/01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-13
[36m(train_xgboost_model pid=317637)[0m To visualize your results with TensorBoard, run: `tensorboard --logdir /var/tmp/ray/session_2024-10-23_06-04-58_972716_317054/artifacts/2024-10-23_06-05-14/XGBoostTrainer_2024-10-23_06-05-13/driver_artifacts`
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m Training started without custom configuration.
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317638)[0m View detailed results here: tymestack-439300/01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-13
[36m(train_xgboost_model pid=317638)[0m To visualize your results with TensorBoard, run: `tensorboard --logdir /var/tmp/ray/session_2024-10-23_06-04-58_972716_317054/artifacts/2024-10-23

[36m(XGBoostTrainer pid=317949)[0m Started distributed worker processes: 
[36m(XGBoostTrainer pid=317949)[0m - (node_id=6788233764ed76c9515c74ccc9cdc6a23a07a67861ea56d815dd7b37, ip=10.128.0.7, pid=318047) world_rank=0, local_rank=0, node_rank=0
[36m(XGBoostTrainer pid=317949)[0m - (node_id=6788233764ed76c9515c74ccc9cdc6a23a07a67861ea56d815dd7b37, ip=10.128.0.7, pid=318054) world_rank=1, local_rank=1, node_rank=0
[36m(RayTrainWorker pid=318047)[0m [06:05:22] Task [xgboost.ray-rank=00000000]:16821799a773bfe4497b1a2301000000 got rank 0


[36m(train_xgboost_model pid=317637)[0m Training started without custom configuration.


[36m(SplitCoordinator pid=318204)[0m Starting execution of Dataset. Full logs are in /var/tmp/ray/session_2024-10-23_06-04-58_972716_317054/logs/ray-data
[36m(SplitCoordinator pid=318204)[0m Execution plan of Dataset: InputDataBuffer[Input] -> OutputSplitter[split(2, equal=True)]


[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m Training finished iteration 1 at 2024-10-23 06:05:26. Total running time: 11s
[36m(train_xgboost_model pid=317638)[0m ╭───────────────────────────────╮
[36m(train_xgboost_model pid=317638)[0m │ Training result               │
[36m(train_xgboost_model pid=317638)[0m ├───────────────────────────────┤
[36m(train_xgboost_model pid=317638)[0m │ checkpoint_dir_name           │
[36m(train_xgboost_model pid=317638)[0m │ time_this_iter_s      8.48427 │
[36m(train_xgboost_model pid=317638)[0m │ time_total_s          8.48427 │
[36m(train_xgboost_model pid=317638)[0m │ training_iteration          1 │
[36m(train_xgboost_model pid=317638)[0m │ train-rmse            0.25304 │
[36m(train_xgboost_model pid=317638)[0m ╰───────────────────────────────╯
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m Training finished iteration 2 at 2024-10-23 06:05:26. Total running 

[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [0]	train-rmse:0.25304
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [1]	train-rmse:0.23863
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [2]	train-rmse:0.22489
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [3]	train-rmse:0.21226
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [4]	train-rmse:0.20061
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [5]	train-rmse:0.19184
[36m(XGBoostTrainer pid=317952)[0m Started distributed worker processes: 
[36m(XGBoostTrainer pid=317952)[0m - (node_id=6788233764ed76c9515c74ccc9cdc6a23a07a67861ea56d815dd7b37, ip=10.128.0.7, pid=318055) world_rank=1, local_rank=1, node_rank=0[32m [repeated 2x across cluster][0m
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [6]	train-rmse:0.18157
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [7]	train-rmse:0.17200
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [8]	train-rmse:0.16298
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [9]	train-rmse:0.15471
[3

[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m Training finished iteration 15 at 2024-10-23 06:05:26. Total running time: 11s
[36m(train_xgboost_model pid=317638)[0m ╭───────────────────────────────╮
[36m(train_xgboost_model pid=317638)[0m │ Training result               │
[36m(train_xgboost_model pid=317638)[0m ├───────────────────────────────┤
[36m(train_xgboost_model pid=317638)[0m │ checkpoint_dir_name           │
[36m(train_xgboost_model pid=317638)[0m │ time_this_iter_s      0.00864 │
[36m(train_xgboost_model pid=317638)[0m │ time_total_s          8.57192 │
[36m(train_xgboost_model pid=317638)[0m │ training_iteration         15 │
[36m(train_xgboost_model pid=317638)[0m │ train-rmse            0.12037 │
[36m(train_xgboost_model pid=317638)[0m ╰───────────────────────────────╯
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m Training finished iteration 16 at 2024-10-23 06:05:26. Total runnin

[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [15]	train-rmse:0.11454
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [16]	train-rmse:0.10946
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [17]	train-rmse:0.10434
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [18]	train-rmse:0.09953
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [19]	train-rmse:0.09507
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [20]	train-rmse:0.09109
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [21]	train-rmse:0.08799
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [22]	train-rmse:0.08530
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [23]	train-rmse:0.08174
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [24]	train-rmse:0.07847
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [25]	train-rmse:0.07526
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [26]	train-rmse:0.07337
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [27]	train-rmse:0.07039
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [28]	train-rmse:

[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train

[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [31]	train-rmse:0.06247
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [32]	train-rmse:0.06044
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [33]	train-rmse:0.05836
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [34]	train-rmse:0.05637
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [35]	train-rmse:0.05530
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [36]	train-rmse:0.05336
[36m(XGBoostTrainer pid=317952)[0m [06:05:26] [0]	train-rmse:0.25304
[36m(XGBoostTrainer pid=317952)[0m [06:05:26] [1]	train-rmse:0.23863
[36m(XGBoostTrainer pid=317952)[0m [06:05:26] [2]	train-rmse:0.22489
[36m(XGBoostTrainer pid=317952)[0m [06:05:26] [3]	train-rmse:0.21226
[36m(XGBoostTrainer pid=317952)[0m [06:05:26] [4]	train-rmse:0.20061
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [37]	train-rmse:0.05190
[36m(XGBoostTrainer pid=317952)[0m [06:05:26] [5]	train-rmse:0.19184
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [38]	train-rmse:0.0502

[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 


[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [45]	train-rmse:0.04060
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [46]	train-rmse:0.03941
[36m(XGBoostTrainer pid=317949)[0m [06:05:26] [47]	train-rmse:0.03846
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [48]	train-rmse:0.03802
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [49]	train-rmse:0.03718
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [50]	train-rmse:0.03623
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [51]	train-rmse:0.03547
[36m(XGBoostTrainer pid=317952)[0m [06:05:26] [13]	train-rmse:0.12647
[36m(XGBoostTrainer pid=317952)[0m [06:05:26] [14]	train-rmse:0.12037
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [15]	train-rmse:0.11454
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [16]	train-rmse:0.10946
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [17]	train-rmse:0.10434
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [52]	train-rmse:0.03450
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [53]	train-rmse:

[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 


[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [57]	train-rmse:0.03076
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [58]	train-rmse:0.03003
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [59]	train-rmse:0.02929
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [60]	train-rmse:0.02870
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [61]	train-rmse:0.02802
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [24]	train-rmse:0.07847
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [25]	train-rmse:0.07526
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [26]	train-rmse:0.07337
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [27]	train-rmse:0.07039
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [28]	train-rmse:0.06786
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [29]	train-rmse:0.06616
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [62]	train-rmse:0.02735
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [63]	train-rmse:0.02675
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [64]	train-rmse:

[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train

[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [69]	train-rmse:0.02415
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [70]	train-rmse:0.02369
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [71]	train-rmse:0.02336
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [72]	train-rmse:0.02304
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [73]	train-rmse:0.02256
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [74]	train-rmse:0.02222
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [36]	train-rmse:0.05336
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [37]	train-rmse:0.05190
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [38]	train-rmse:0.05022
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [39]	train-rmse:0.04862
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [40]	train-rmse:0.04708
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [75]	train-rmse:0.02200
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [41]	train-rmse:0.04567
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [42]	train-rmse:

[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 


[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [83]	train-rmse:0.01959
[36m(XGBoostTrainer pid=317949)[0m [06:05:27] [84]	train-rmse:0.01930
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [50]	train-rmse:0.03623
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [51]	train-rmse:0.03547
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [52]	train-rmse:0.03450
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [53]	train-rmse:0.03364
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [54]	train-rmse:0.03293
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [55]	train-rmse:0.03227
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [56]	train-rmse:0.03147
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [57]	train-rmse:0.03076
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [58]	train-rmse:0.03003
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [59]	train-rmse:0.02929
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [60]	train-rmse:0.02870
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [61]	train-rmse:

[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317637)[0m ╭──────────────────────────────╮
[36m(train_xgboost_model pid=317637)[0m ├──────────────────────────────┤
[36m(train_xgboost_model pid=317

[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [67]	train-rmse:0.02488
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [68]	train-rmse:0.02449
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [69]	train-rmse:0.02415
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [70]	train-rmse:0.02369
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [71]	train-rmse:0.02336
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [72]	train-rmse:0.02304
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [73]	train-rmse:0.02256
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [74]	train-rmse:0.02222
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [75]	train-rmse:0.02200
[36m(RayTrainWorker pid=318055)[0m [06:05:23] Task [xgboost.ray-rank=00000001]:f79c5438142071a33196ecc601000000 got rank 1[32m [repeated 3x across cluster][0m
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [76]	train-rmse:0.02165
[36m(XGBoostTrainer pid=317952)[0m [06:05:27] [77]	train-rmse:0.02135
[36m(XGBoostTrainer pid=317952)[0m [06:05:2

[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317638)[0m 
[36m(train_xgboost_model pid=317638)[0m Training completed after 86 iterations at 2024-10-23 06:05:29. Total running time: 14s
[36m(train_xgboost_model pid=317638)[0m 


[36m(train_xgboost_model pid=317638)[0m Wrote the latest version of all result files and experiment state to 'tymestack-439300/01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-13' in 0.3441s.
[36m(SplitCoordinator pid=318208)[0m Starting execution of Dataset. Full logs are in /var/tmp/ray/session_2024-10-23_06-04-58_972716_317054/logs/ray-data
[36m(SplitCoordinator pid=318208)[0m Execution plan of Dataset: InputDataBuffer[Input] -> OutputSplitter[split(2, equal=True)]


[36m(train_xgboost_model pid=317637)[0m 
[36m(train_xgboost_model pid=317638)[0m Available metrics: OrderedDict([('train-rmse', 0.01929830866235552), ('timestamp', 1729663527), ('checkpoint_dir_name', 'checkpoint_000000'), ('should_checkpoint', True), ('done', True), ('training_iteration', 86), ('trial_id', 'c54bf_00000'), ('date', '2024-10-23_06-05-28'), ('time_this_iter_s', 0.33137035369873047), ('time_total_s', 9.491608619689941), ('pid', 317949), ('hostname', 'instance-20241022-184641'), ('node_ip', '10.128.0.7'), ('config', {}), ('time_since_restore', 9.491608619689941), ('iterations_since_restore', 86), ('experiment_tag', '0')])
[36m(train_xgboost_model pid=317637)[0m 


[36m(RayTrainWorker pid=318053)[0m Checkpoint successfully created at: Checkpoint(filesystem=gcs, path=tymestack-439300/01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-13/XGBoostTrainer_c5561_00000_0_2024-10-23_06-05-14/checkpoint_000000)
[36m(train_xgboost_model pid=317637)[0m Wrote the latest version of all result files and experiment state to 'tymestack-439300/01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-13' in 0.4376s.


[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m View detailed results here: tymestack-439300/01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-34
[36m(train_xgboost_model pid=318510)[0m To visualize your results with TensorBoard, run: `tensorboard --logdir /var/tmp/ray/session_2024-10-23_06-04-58_972716_317054/artifacts/2024-10-23_06-05-36/XGBoostTrainer_2024-10-23_06-05-34/driver_artifacts`
[36m(train_xgboost_model pid=317637)[0m Training finished iteration 86 at 2024-10-23 06:05:28. Total running time: 13s[32m [repeated 142x across cluster][0m
[36m(train_xgboost_model pid=317637)[0m ╭───────────────────────────────╮[32m [repeated 139x across cluster][0m
[36m(train_xgboost_model pid=317637)[0m │ Training result                         │[32m [repeated 142x across cluster][0m
[36m(train_xgboost_model pid=317637)[0m ├───────────────────────────────┤[32m [repeated 139x across cluster][0m
[36m(train_xgboost_model pid=317637)

[36m(XGBoostTrainer pid=318692)[0m Started distributed worker processes: 
[36m(XGBoostTrainer pid=318692)[0m - (node_id=6788233764ed76c9515c74ccc9cdc6a23a07a67861ea56d815dd7b37, ip=10.128.0.7, pid=318791) world_rank=0, local_rank=0, node_rank=0
[36m(XGBoostTrainer pid=318692)[0m - (node_id=6788233764ed76c9515c74ccc9cdc6a23a07a67861ea56d815dd7b37, ip=10.128.0.7, pid=318794) world_rank=1, local_rank=1, node_rank=0


[36m(train_xgboost_model pid=318516)[0m View detailed results here: tymestack-439300/01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-35
[36m(train_xgboost_model pid=318516)[0m To visualize your results with TensorBoard, run: `tensorboard --logdir /var/tmp/ray/session_2024-10-23_06-04-58_972716_317054/artifacts/2024-10-23_06-05-36/XGBoostTrainer_2024-10-23_06-05-35/driver_artifacts`


[36m(RayTrainWorker pid=318791)[0m [06:05:44] Task [xgboost.ray-rank=00000000]:479b720807666ecc0b0b572a01000000 got rank 0


[36m(train_xgboost_model pid=318516)[0m Training started without custom configuration.
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m Training finished iteration 1 at 2024-10-23 06:05:45. Total running time: 9s
[36m(train_xgboost_model pid=318510)[0m ╭───────────────────────────────╮
[36m(train_xgboost_model pid=318510)[0m │ Training result               │
[36m(train_xgboost_model pid=318510)[0m ├───────────────────────────────┤
[36m(train_xgboost_model pid=318510)[0m │ checkpoint_dir_name           │
[36m(train_xgboost_model pid=318510)[0m │ time_this_iter_s        6.268 │
[36m(train_xgboost_model pid=318510)[0m │ time_total_s            6.268 │
[36m(train_xgboost_model pid=318510)[0m │ training_iteration          1 │
[36m(train_xgboost_model pid=318510)[0m │ train-rmse            0.25267 │
[36m(train_xgboost_model pid=318510)[0m ╰───────────────────────────────╯
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_mo

[36m(SplitCoordinator pid=318971)[0m Starting execution of Dataset. Full logs are in /var/tmp/ray/session_2024-10-23_06-04-58_972716_317054/logs/ray-data
[36m(SplitCoordinator pid=318971)[0m Execution plan of Dataset: InputDataBuffer[Input] -> OutputSplitter[split(2, equal=True)]
[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [0]	train-rmse:0.25267
[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [1]	train-rmse:0.23794
[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [2]	train-rmse:0.22392


[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m Training finished iteration 3 at 2024-10-23 06:05:45. Total running time: 9s
[36m(train_xgboost_model pid=318510)[0m ╭───────────────────────────────╮
[36m(train_xgboost_model pid=318510)[0m │ Training result               │
[36m(train_xgboost_model pid=318510)[0m ├───────────────────────────────┤
[36m(train_xgboost_model pid=318510)[0m │ checkpoint_dir_name           │
[36m(train_xgboost_model pid=318510)[0m │ time_this_iter_s      0.00715 │
[36m(train_xgboost_model pid=318510)[0m │ time_total_s          6.28794 │
[36m(train_xgboost_model pid=318510)[0m │ training_iteration          3 │
[36m(train_xgboost_model pid=318510)[0m │ train-rmse            0.22392 │
[36m(train_xgboost_model pid=318510)[0m ╰───────────────────────────────╯
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m Training finished iteration 4 at 2024-10-23 06:05:45. Total running t

[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [3]	train-rmse:0.21105
[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [4]	train-rmse:0.19920
[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [5]	train-rmse:0.19028
[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [6]	train-rmse:0.17980
[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [7]	train-rmse:0.17021
[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [8]	train-rmse:0.16109
[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [9]	train-rmse:0.15272
[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [10]	train-rmse:0.14487
[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [11]	train-rmse:0.13749
[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [12]	train-rmse:0.13050
[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [13]	train-rmse:0.12405


[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m Training finished iteration 15 at 2024-10-23 06:05:45. Total running time: 9s
[36m(train_xgboost_model pid=318510)[0m ╭───────────────────────────────╮
[36m(train_xgboost_model pid=318510)[0m │ Training result               │
[36m(train_xgboost_model pid=318510)[0m ├───────────────────────────────┤
[36m(train_xgboost_model pid=318510)[0m │ checkpoint_dir_name           │
[36m(train_xgboost_model pid=318510)[0m │ time_this_iter_s       0.0234 │
[36m(train_xgboost_model pid=318510)[0m │ time_total_s          6.41237 │
[36m(train_xgboost_model pid=318510)[0m │ training_iteration         15 │
[36m(train_xgboost_model pid=318510)[0m │ train-rmse            0.11793 │
[36m(train_xgboost_model pid=318510)[0m ╰───────────────────────────────╯
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m Training finished iteration 16 at 2024-10-23 06:05:45. Total running

[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [14]	train-rmse:0.11793
[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [15]	train-rmse:0.11203
[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [16]	train-rmse:0.10678
[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [17]	train-rmse:0.10177
[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [18]	train-rmse:0.09692
[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [19]	train-rmse:0.09245


[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m Training finished iteration 21 at 2024-10-23 06:05:45. Total running time: 9s
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m ╭───────────────────────────────╮
[36m(train_xgboost_model pid=318510)[0m │ Training result               │
[36m(train_xgboost_model pid=318510)[0m ├───────────────────────────────┤
[36m(train_xgboost_model pid=318510)[0m │ checkpoint_dir_name           │
[36m(train_xgboost_model pid=318510)[0m │ time_this_iter_s        0.011 │
[36m(train_xgboost_model pid=318510)[0m │ time_total_s          6.47406 │
[36m(train_xgboost_model pid=318510)[0m │ training_iteration         21 │
[36m(train_xgboost_model pid=318510)[0m │ train-rmse            0.08852 │
[36m(train_xgboost_model pid=318510)[0m ╰───────────────────────────────╯
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m Training finished iterati

[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [20]	train-rmse:0.08852
[36m(XGBoostTrainer pid=318692)[0m [06:05:45] [21]	train-rmse:0.08540
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [22]	train-rmse:0.08283
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [23]	train-rmse:0.07941
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [24]	train-rmse:0.07619
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [25]	train-rmse:0.07301


[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m Training finished iteration 27 at 2024-10-23 06:05:46. Total running time: 9s
[36m(train_xgboost_model pid=318510)[0m ╭───────────────────────────────╮
[36m(train_xgboost_model pid=318510)[0m │ Training result               │
[36m(train_xgboost_model pid=318510)[0m ├───────────────────────────────┤
[36m(train_xgboost_model pid=318510)[0m │ checkpoint_dir_name           │
[36m(train_xgboost_model pid=318510)[0m │ time_this_iter_s      0.01669 │
[36m(train_xgboost_model pid=318510)[0m │ time_total_s          6.54658 │
[36m(train_xgboost_model pid=318510)[0m │ training_iteration         27 │
[36m(train_xgboost_model pid=318510)[0m │ train-rmse            0.07117 │
[36m(train_xgboost_model pid=318510)[0m ╰───────────────────────────────╯
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m Training finished iteration 28 at 2024-10-23 06:05:46. Total running

[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [26]	train-rmse:0.07117
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [27]	train-rmse:0.06826
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [28]	train-rmse:0.06575
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [29]	train-rmse:0.06402
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [30]	train-rmse:0.06182
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [31]	train-rmse:0.06042
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [32]	train-rmse:0.05838
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [33]	train-rmse:0.05632
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [0]	train-rmse:0.25493
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [1]	train-rmse:0.24217
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [2]	train-rmse:0.22990
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [34]	train-rmse:0.05433
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [3]	train-rmse:0.21851


[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318516)[0m 


[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [35]	train-rmse:0.05328
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [36]	train-rmse:0.05140
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [37]	train-rmse:0.04972
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [38]	train-rmse:0.04804
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [39]	train-rmse:0.04642
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [40]	train-rmse:0.04503
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [41]	train-rmse:0.04369
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [42]	train-rmse:0.04232
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [4]	train-rmse:0.20763
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [5]	train-rmse:0.19951
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [6]	train-rmse:0.18998
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [7]	train-rmse:0.18108
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [8]	train-rmse:0.17264
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [9]	train-rmse:0.1647

[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train

[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [43]	train-rmse:0.04112
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [44]	train-rmse:0.03983
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [45]	train-rmse:0.03863
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [46]	train-rmse:0.03764
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [47]	train-rmse:0.03659
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [48]	train-rmse:0.03591
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [10]	train-rmse:0.15748
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [11]	train-rmse:0.15042
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [12]	train-rmse:0.14399
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [13]	train-rmse:0.13769
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [14]	train-rmse:0.13170
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [15]	train-rmse:0.12588
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [16]	train-rmse:0.12065
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [49]	train-rmse:

[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train

[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [55]	train-rmse:0.03023
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [56]	train-rmse:0.02947
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [57]	train-rmse:0.02878
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [58]	train-rmse:0.02806
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [59]	train-rmse:0.02735
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [60]	train-rmse:0.02676
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [61]	train-rmse:0.02616
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [22]	train-rmse:0.09593
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [23]	train-rmse:0.09232
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [24]	train-rmse:0.08887
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [25]	train-rmse:0.08536
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [26]	train-rmse:0.08324
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [27]	train-rmse:0.08011
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [28]	train-rmse:

[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train

[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [70]	train-rmse:0.02230
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [71]	train-rmse:0.02192
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [72]	train-rmse:0.02164
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [73]	train-rmse:0.02131
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [74]	train-rmse:0.02094
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [36]	train-rmse:0.06145
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [37]	train-rmse:0.05980
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [38]	train-rmse:0.05785
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [39]	train-rmse:0.05611
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [40]	train-rmse:0.05440
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [41]	train-rmse:0.05279
[36m(XGBoostTrainer pid=318707)[0m [06:05:46] [42]	train-rmse:0.05138
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [75]	train-rmse:0.02075
[36m(XGBoostTrainer pid=318692)[0m [06:05:46] [76]	train-rmse:

[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 


[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [50]	train-rmse:0.04200
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [51]	train-rmse:0.04103
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [52]	train-rmse:0.03997
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [53]	train-rmse:0.03890
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [54]	train-rmse:0.03812
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [55]	train-rmse:0.03736
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [56]	train-rmse:0.03658
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [57]	train-rmse:0.03578
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [58]	train-rmse:0.03505
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [59]	train-rmse:0.03426
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [60]	train-rmse:0.03357
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [61]	train-rmse:0.03286
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [62]	train-rmse:0.03216
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [63]	train-rmse:

[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318516)[0m 
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m ╭─────────────────────────────────────────╮
[36m(train_xgboost_model pid=318510)[0m ├─────────────────────────────────────────┤
[36m(train_xgboost_model pid=318510)[0m ╰─────────────────────────────────────────╯
[36m(train_xgboost_model pid=318510)[0m Training saved a checkpoint for iteration 86 at: (gcs)tymestack-439300/01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-34/XGBoostTrainer_d213e_00000_0_2024-10-23_06-05-36/checkpoint_000000
[36m(train_xgboost_model 

[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [69]	train-rmse:0.02803
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [70]	train-rmse:0.02750
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [71]	train-rmse:0.02704
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [72]	train-rmse:0.02665
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [73]	train-rmse:0.02617
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [74]	train-rmse:0.02569
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [75]	train-rmse:0.02539
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [76]	train-rmse:0.02496
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [77]	train-rmse:0.02467
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [78]	train-rmse:0.02432
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [79]	train-rmse:0.02397
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [80]	train-rmse:0.02365
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [81]	train-rmse:0.02322
[36m(XGBoostTrainer pid=318707)[0m [06:05:47] [82]	train-rmse:

[36m(train_xgboost_model pid=318516)[0m 


[36m(XGBoostTrainer pid=318707)[0m Started distributed worker processes: 
[36m(XGBoostTrainer pid=318707)[0m - (node_id=6788233764ed76c9515c74ccc9cdc6a23a07a67861ea56d815dd7b37, ip=10.128.0.7, pid=318801) world_rank=1, local_rank=1, node_rank=0[32m [repeated 2x across cluster][0m


[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318510)[0m Training completed after 86 iterations at 2024-10-23 06:05:48. Total running time: 12s
[36m(train_xgboost_model pid=318510)[0m 
[36m(train_xgboost_model pid=318516)[0m 


[36m(train_xgboost_model pid=318510)[0m Wrote the latest version of all result files and experiment state to 'tymestack-439300/01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-34' in 0.2472s.


[36m(train_xgboost_model pid=318510)[0m Available metrics: OrderedDict([('train-rmse', 0.01829503681009907), ('timestamp', 1729663547), ('checkpoint_dir_name', 'checkpoint_000000'), ('should_checkpoint', True), ('done', True), ('training_iteration', 86), ('trial_id', 'd213e_00000'), ('date', '2024-10-23_06-05-47'), ('time_this_iter_s', 0.3710594177246094), ('time_total_s', 7.427787780761719), ('pid', 318692), ('hostname', 'instance-20241022-184641'), ('node_ip', '10.128.0.7'), ('config', {}), ('time_since_restore', 7.427787780761719), ('iterations_since_restore', 86), ('experiment_tag', '0')])
[36m(train_xgboost_model pid=318516)[0m 


[36m(RayTrainWorker pid=318801)[0m [06:05:44] Task [xgboost.ray-rank=00000001]:148f8ae8c2bada51492364c401000000 got rank 1[32m [repeated 3x across cluster][0m
[36m(SplitCoordinator pid=318974)[0m Starting execution of Dataset. Full logs are in /var/tmp/ray/session_2024-10-23_06-04-58_972716_317054/logs/ray-data
[36m(SplitCoordinator pid=318974)[0m Execution plan of Dataset: InputDataBuffer[Input] -> OutputSplitter[split(2, equal=True)]
[36m(RayTrainWorker pid=318800)[0m Checkpoint successfully created at: Checkpoint(filesystem=gcs, path=tymestack-439300/01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-35/XGBoostTrainer_d2458_00000_0_2024-10-23_06-05-36/checkpoint_000000)


[36m(train_xgboost_model pid=319158)[0m 
[36m(train_xgboost_model pid=319158)[0m View detailed results here: tymestack-439300/01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-53
[36m(train_xgboost_model pid=319158)[0m To visualize your results with TensorBoard, run: `tensorboard --logdir /var/tmp/ray/session_2024-10-23_06-04-58_972716_317054/artifacts/2024-10-23_06-05-55/XGBoostTrainer_2024-10-23_06-05-53/driver_artifacts`
[36m(train_xgboost_model pid=318516)[0m Training finished iteration 86 at 2024-10-23 06:05:47. Total running time: 11s[32m [repeated 139x across cluster][0m
[36m(train_xgboost_model pid=318516)[0m ╭───────────────────────────────╮[32m [repeated 137x across cluster][0m
[36m(train_xgboost_model pid=318516)[0m │ Training result                         │[32m [repeated 139x across cluster][0m
[36m(train_xgboost_model pid=318516)[0m ├───────────────────────────────┤[32m [repeated 137x across cluster][0m
[36m(train_xgboost_model pid=318516)

[36m(XGBoostTrainer pid=319238)[0m Started distributed worker processes: 
[36m(XGBoostTrainer pid=319238)[0m - (node_id=6788233764ed76c9515c74ccc9cdc6a23a07a67861ea56d815dd7b37, ip=10.128.0.7, pid=319288) world_rank=0, local_rank=0, node_rank=0
[36m(XGBoostTrainer pid=319238)[0m - (node_id=6788233764ed76c9515c74ccc9cdc6a23a07a67861ea56d815dd7b37, ip=10.128.0.7, pid=319289) world_rank=1, local_rank=1, node_rank=0
[36m(train_xgboost_model pid=318516)[0m Wrote the latest version of all result files and experiment state to 'tymestack-439300/01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-35' in 0.3172s.
[36m(RayTrainWorker pid=319288)[0m [06:06:01] Task [xgboost.ray-rank=00000000]:b893bc005f1a533c3317694101000000 got rank 0
[36m(SplitCoordinator pid=319365)[0m Starting execution of Dataset. Full logs are in /var/tmp/ray/session_2024-10-23_06-04-58_972716_317054/logs/ray-data
[36m(SplitCoordinator pid=319365)[0m Execution plan of Dataset: InputDataBuffer[Input] -> 

[36m(train_xgboost_model pid=319158)[0m 
[36m(train_xgboost_model pid=319158)[0m Training finished iteration 1 at 2024-10-23 06:06:02. Total running time: 7s
[36m(train_xgboost_model pid=319158)[0m ╭───────────────────────────────╮
[36m(train_xgboost_model pid=319158)[0m │ Training result               │
[36m(train_xgboost_model pid=319158)[0m ├───────────────────────────────┤
[36m(train_xgboost_model pid=319158)[0m │ checkpoint_dir_name           │
[36m(train_xgboost_model pid=319158)[0m │ time_this_iter_s      4.81598 │
[36m(train_xgboost_model pid=319158)[0m │ time_total_s          4.81598 │
[36m(train_xgboost_model pid=319158)[0m │ training_iteration          1 │
[36m(train_xgboost_model pid=319158)[0m │ train-rmse            0.25597 │
[36m(train_xgboost_model pid=319158)[0m ╰───────────────────────────────╯
[36m(train_xgboost_model pid=319158)[0m 
[36m(train_xgboost_model pid=319158)[0m Training finished iteration 2 at 2024-10-23 06:06:02. Total running t

[36m(XGBoostTrainer pid=319238)[0m [06:06:02] [6]	train-rmse:0.19500
[36m(XGBoostTrainer pid=319238)[0m [06:06:02] [7]	train-rmse:0.18647
[36m(XGBoostTrainer pid=319238)[0m [06:06:02] [8]	train-rmse:0.17834
[36m(XGBoostTrainer pid=319238)[0m [06:06:02] [9]	train-rmse:0.17081
[36m(XGBoostTrainer pid=319238)[0m [06:06:02] [10]	train-rmse:0.16372
[36m(XGBoostTrainer pid=319238)[0m [06:06:02] [11]	train-rmse:0.15685
[36m(XGBoostTrainer pid=319238)[0m [06:06:02] [12]	train-rmse:0.15054
[36m(XGBoostTrainer pid=319238)[0m [06:06:02] [13]	train-rmse:0.14435
[36m(XGBoostTrainer pid=319238)[0m [06:06:02] [14]	train-rmse:0.13859
[36m(XGBoostTrainer pid=319238)[0m [06:06:02] [15]	train-rmse:0.13295
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [16]	train-rmse:0.12779
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [17]	train-rmse:0.12264
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [18]	train-rmse:0.11782
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [19]	train-rmse:0.11

[36m(train_xgboost_model pid=319158)[0m 
[36m(train_xgboost_model pid=319158)[0m Training finished iteration 14 at 2024-10-23 06:06:02. Total running time: 7s
[36m(train_xgboost_model pid=319158)[0m ╭───────────────────────────────╮
[36m(train_xgboost_model pid=319158)[0m │ Training result               │
[36m(train_xgboost_model pid=319158)[0m ├───────────────────────────────┤
[36m(train_xgboost_model pid=319158)[0m │ checkpoint_dir_name           │
[36m(train_xgboost_model pid=319158)[0m │ time_this_iter_s      0.00872 │
[36m(train_xgboost_model pid=319158)[0m │ time_total_s          4.92556 │
[36m(train_xgboost_model pid=319158)[0m │ training_iteration         14 │
[36m(train_xgboost_model pid=319158)[0m │ train-rmse            0.14435 │
[36m(train_xgboost_model pid=319158)[0m ╰───────────────────────────────╯
[36m(train_xgboost_model pid=319158)[0m 
[36m(train_xgboost_model pid=319158)[0m Training finished iteration 15 at 2024-10-23 06:06:02. Total running

[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [23]	train-rmse:0.09938
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [24]	train-rmse:0.09585
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [25]	train-rmse:0.09244
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [26]	train-rmse:0.09013
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [27]	train-rmse:0.08694
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [28]	train-rmse:0.08395
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [29]	train-rmse:0.08188
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [30]	train-rmse:0.07933
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [31]	train-rmse:0.07759
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [32]	train-rmse:0.07529
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [33]	train-rmse:0.07279
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [34]	train-rmse:0.07055
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [35]	train-rmse:0.06952
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [36]	train-rmse:

[36m(train_xgboost_model pid=319158)[0m 
[36m(train_xgboost_model pid=319158)[0m Training finished iteration 32 at 2024-10-23 06:06:03. Total running time: 8s
[36m(train_xgboost_model pid=319158)[0m ╭───────────────────────────────╮
[36m(train_xgboost_model pid=319158)[0m │ Training result               │
[36m(train_xgboost_model pid=319158)[0m ├───────────────────────────────┤
[36m(train_xgboost_model pid=319158)[0m │ checkpoint_dir_name           │
[36m(train_xgboost_model pid=319158)[0m │ time_this_iter_s      0.00999 │
[36m(train_xgboost_model pid=319158)[0m │ time_total_s          5.05249 │
[36m(train_xgboost_model pid=319158)[0m │ training_iteration         32 │
[36m(train_xgboost_model pid=319158)[0m │ train-rmse            0.07759 │
[36m(train_xgboost_model pid=319158)[0m ╰───────────────────────────────╯
[36m(train_xgboost_model pid=319158)[0m 
[36m(train_xgboost_model pid=319158)[0m Training finished iteration 33 at 2024-10-23 06:06:03. Total running

[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [40]	train-rmse:0.05989
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [41]	train-rmse:0.05825
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [42]	train-rmse:0.05661
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [43]	train-rmse:0.05511
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [44]	train-rmse:0.05356
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [45]	train-rmse:0.05221
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [46]	train-rmse:0.05079
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [47]	train-rmse:0.04954
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [48]	train-rmse:0.04898
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [49]	train-rmse:0.04773
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [50]	train-rmse:0.04662
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [51]	train-rmse:0.04561
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [52]	train-rmse:0.04456
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [53]	train-rmse:

[36m(train_xgboost_model pid=319158)[0m 
[36m(train_xgboost_model pid=319158)[0m Training finished iteration 53 at 2024-10-23 06:06:03. Total running time: 8s
[36m(train_xgboost_model pid=319158)[0m ╭───────────────────────────────╮
[36m(train_xgboost_model pid=319158)[0m │ Training result               │
[36m(train_xgboost_model pid=319158)[0m ├───────────────────────────────┤
[36m(train_xgboost_model pid=319158)[0m │ checkpoint_dir_name           │
[36m(train_xgboost_model pid=319158)[0m │ time_this_iter_s      0.01141 │
[36m(train_xgboost_model pid=319158)[0m │ time_total_s          5.20622 │
[36m(train_xgboost_model pid=319158)[0m │ training_iteration         53 │
[36m(train_xgboost_model pid=319158)[0m │ train-rmse            0.04456 │
[36m(train_xgboost_model pid=319158)[0m ╰───────────────────────────────╯
[36m(train_xgboost_model pid=319158)[0m 
[36m(train_xgboost_model pid=319158)[0m Training finished iteration 54 at 2024-10-23 06:06:03. Total running

[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [60]	train-rmse:0.03748
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [61]	train-rmse:0.03671
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [62]	train-rmse:0.03596
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [63]	train-rmse:0.03519
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [64]	train-rmse:0.03450
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [65]	train-rmse:0.03388
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [66]	train-rmse:0.03338
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [67]	train-rmse:0.03300
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [68]	train-rmse:0.03241
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [69]	train-rmse:0.03170
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [70]	train-rmse:0.03099
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [71]	train-rmse:0.03059
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [72]	train-rmse:0.03006
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [73]	train-rmse:

[36m(train_xgboost_model pid=319158)[0m 
[36m(train_xgboost_model pid=319158)[0m Training finished iteration 69 at 2024-10-23 06:06:03. Total running time: 8s
[36m(train_xgboost_model pid=319158)[0m ╭───────────────────────────────╮
[36m(train_xgboost_model pid=319158)[0m │ Training result               │
[36m(train_xgboost_model pid=319158)[0m ├───────────────────────────────┤
[36m(train_xgboost_model pid=319158)[0m │ checkpoint_dir_name           │
[36m(train_xgboost_model pid=319158)[0m │ time_this_iter_s      0.00603 │
[36m(train_xgboost_model pid=319158)[0m │ time_total_s          5.33158 │
[36m(train_xgboost_model pid=319158)[0m │ training_iteration         69 │
[36m(train_xgboost_model pid=319158)[0m │ train-rmse            0.03241 │
[36m(train_xgboost_model pid=319158)[0m ╰───────────────────────────────╯
[36m(train_xgboost_model pid=319158)[0m 
[36m(train_xgboost_model pid=319158)[0m Training finished iteration 70 at 2024-10-23 06:06:03. Total running

[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [77]	train-rmse:0.02785
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [78]	train-rmse:0.02751
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [79]	train-rmse:0.02708
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [80]	train-rmse:0.02671
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [81]	train-rmse:0.02624
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [82]	train-rmse:0.02579
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [83]	train-rmse:0.02541
[36m(XGBoostTrainer pid=319238)[0m [06:06:03] [84]	train-rmse:0.02511


[36m(train_xgboost_model pid=319158)[0m 
[36m(train_xgboost_model pid=319158)[0m Training finished iteration 86 at 2024-10-23 06:06:04. Total running time: 9s
[36m(train_xgboost_model pid=319158)[0m ╭─────────────────────────────────────────╮
[36m(train_xgboost_model pid=319158)[0m │ Training result                         │
[36m(train_xgboost_model pid=319158)[0m ├─────────────────────────────────────────┤
[36m(train_xgboost_model pid=319158)[0m │ checkpoint_dir_name   checkpoint_000000 │
[36m(train_xgboost_model pid=319158)[0m │ time_this_iter_s                0.38331 │
[36m(train_xgboost_model pid=319158)[0m │ time_total_s                     5.8233 │
[36m(train_xgboost_model pid=319158)[0m │ training_iteration                   86 │
[36m(train_xgboost_model pid=319158)[0m │ train-rmse                      0.02511 │
[36m(train_xgboost_model pid=319158)[0m ╰─────────────────────────────────────────╯
[36m(train_xgboost_model pid=319158)[0m Training saved a chec

[36m(RayTrainWorker pid=319288)[0m Checkpoint successfully created at: Checkpoint(filesystem=gcs, path=tymestack-439300/01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-53/XGBoostTrainer_dd558_00000_0_2024-10-23_06-05-55/checkpoint_000000)


[36m(train_xgboost_model pid=319158)[0m 
[36m(train_xgboost_model pid=319158)[0m Training completed after 86 iterations at 2024-10-23 06:06:05. Total running time: 10s


[36m(train_xgboost_model pid=319158)[0m Wrote the latest version of all result files and experiment state to 'tymestack-439300/01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-53' in 0.4389s.


[36m(train_xgboost_model pid=319158)[0m 
[36m(train_xgboost_model pid=319158)[0m Available metrics: OrderedDict([('train-rmse', 0.02510849421864081), ('timestamp', 1729663563), ('checkpoint_dir_name', 'checkpoint_000000'), ('should_checkpoint', True), ('done', True), ('training_iteration', 86), ('trial_id', 'dd558_00000'), ('date', '2024-10-23_06-06-04'), ('time_this_iter_s', 0.38330507278442383), ('time_total_s', 5.823301076889038), ('pid', 319238), ('hostname', 'instance-20241022-184641'), ('node_ip', '10.128.0.7'), ('config', {}), ('time_since_restore', 5.823301076889038), ('iterations_since_restore', 86), ('experiment_tag', '0')])


Creating RAY cluster on GCP UI

![RAY name](./images/ray_name.png)

Specifying head node

![RAY head](./images/ray_head.png)

Specifying worker node

![RAY head](./images/ray_worker.png)

After the cluster is created, access it using `vertex_ray.get_ray_cluster()`

In [14]:
vertex_ray.get_ray_cluster("projects/652637980750/locations/us-central1/persistentResources/cluster-20241023-123602")

Cluster(cluster_resource_name='projects/652637980750/locations/us-central1/persistentResources/cluster-20241023-123602', network='', reserved_ip_ranges=[], service_account=None, state='RUNNING', python_version='3.10', ray_version='2.33', head_node_type=Resources(machine_type='n1-standard-16', node_count=1, accelerator_type=None, accelerator_count=0, boot_disk_type='pd-standard', boot_disk_size_gb=100, custom_image=None, autoscaling_spec=None), worker_node_types=[Resources(machine_type='n1-standard-4', node_count=1, accelerator_type=None, accelerator_count=0, boot_disk_type='pd-standard', boot_disk_size_gb=100, custom_image=None, autoscaling_spec=AutoscalingSpec(min_replica_count=1, max_replica_count=3))], dashboard_address='1a538065013652cd-dot-us-central1.aiplatform-training.googleusercontent.com', ray_metric_enabled=True, ray_logs_enabled=True, psc_interface_config=None, labels={})

In [15]:
print("Cluster Resources:", ray.cluster_resources())

Cluster Resources: {'memory': 9352657307.0, 'object_store_memory': 4676328652.0, 'CPU': 28.0, 'node:10.128.0.7': 1.0, 'node:__internal_head__': 1.0}


## Data Loading Function for BigQuery

```python
def load_data(project, dataset, table):
   """Load data from BigQuery and restructure into features and target"""
```

This function handles data loading and preprocessing:
- Fetches data from specified BigQuery table
- Restructures data by pairing even/odd rows
- Creates features by combining:
 - All columns from even rows
 - First 2 columns from odd rows
- Uses last column of odd rows as target
- Converts to Ray Dataset format for distributed training

Parameters:
- `project`: GCP project ID
- `dataset`: BigQuery dataset name
- `table`: BigQuery table name

Returns a Ray Dataset with features and target column ready for training.

In [17]:
def load_data(project, dataset, table):
    """
    Load data from BigQuery and restructure into features and target
    with handling for mismatched row counts
    """
    client = bigquery.Client(project=project)
    query = f"SELECT * FROM `{project}.{dataset}.{table}`"
    raw_df = client.query(query).to_dataframe()
    
    # Convert to numpy for easier array manipulation
    raw_values = raw_df.values
    
    # Get even and odd indices, ensuring we have matching pairs
    n_pairs = min(len(raw_values[::2]), len(raw_values[1::2]))
    even_rows = raw_values[::2][:n_pairs]
    odd_rows = raw_values[1::2][:n_pairs]
    
    # Now combine features with guaranteed matching dimensions
    features = np.hstack([
        even_rows,  # all columns from even rows
        odd_rows[:, :2]  # first 2 columns from odd rows
    ])
    
    # Get target from last column of odd rows
    target = odd_rows[:, 2]
    
    # Convert back to DataFrame for proper column handling
    feature_cols = [f'feat_{i}' for i in range(features.shape[1])]
    X = pd.DataFrame(features, columns=feature_cols)
    y = pd.Series(target, name='target')
    
    # Ensure data types are correct
    X = X.select_dtypes(include=['number']).fillna(0)
    y = y.astype(float)
    
    # Convert to Ray Dataset
    dataset = ray.data.from_pandas(pd.concat([X, y], axis=1))
    return dataset

## XGBoost Model Training Function

This function implements the training loop for XGBoost using Ray's distributed training capabilities:

```python
def train_xgboost_model(config):
    """Training function using Ray's XGBoostTrainer with distributed processing"""
```

Key components:
- Loads data from BigQuery
- Configures XGBoost with hyperparameters from tuning
- Uses 2 workers for distributed training
- Saves checkpoints for model versioning
- Returns MSE, RMSE metrics and model checkpoint

The function takes a `config` dictionary containing hyperparameters:
- n_estimators: Number of boosting rounds
- max_depth: Maximum tree depth
- learning_rate: Learning rate for gradient descent
- subsample: Subsample ratio for training data

In [18]:
def train_xgboost_model(config):
    """Training function using Ray's XGBoostTrainer with restructured data"""
    # Load and preprocess data
    train_dataset = load_data(BQ_PROJECT, BQ_DATASET, BQ_TABLE)
    
    trainer = XGBoostTrainer(
        scaling_config=ScalingConfig(
            num_workers=2,
            use_gpu=False,
        ),
        run_config=RunConfig(
            checkpoint_config=CheckpointConfig(
                num_to_keep=None  # Keep all checkpoints
            ),
            storage_path=f"{URI}/models/{TIMESTAMP}",
        ),
        label_column="target",
        num_boost_round=config["n_estimators"],
        params={
            "objective": "reg:squarederror",
            "max_depth": config["max_depth"],
            "learning_rate": config["learning_rate"],
            "subsample": config["subsample"],
            "tree_method": "hist",
            "eval_metric": ["rmse"],
        },
        datasets={"train": train_dataset},
    )
    
    result = trainer.fit()
    
    # Extract metrics - updated to handle new metrics structure
    try:
        # First try the new metrics structure
        final_rmse = result.metrics["eval_metrics"]["rmse"][-1]
    except KeyError:
        try:
            # Try alternative metrics structure
            final_rmse = result.metrics["rmse"][-1]
        except KeyError:
            # If both fail, print available metrics and use a default
            print("Available metrics:", result.metrics)
            final_rmse = result.metrics.get("training_iteration", 0)
    
    final_mse = final_rmse ** 2 if final_rmse is not None else float('inf')
    
    return {
        "mse": final_mse,
        "rmse": final_rmse,
        "model": result.checkpoint
    }

## Hyperparameter Tuning with Bayesian Optimization

```python
tuner = Tuner(
   train_xgboost_model,
   param_space=tune_config,
   tune_config=TuneConfig(...)
)
```

Configures and runs hyperparameter optimization using Ray Tune:
- Uses Bayesian Optimization (`BayesOptSearch`) for intelligent parameter search
- Performs 5 trials to find optimal parameters
- Runs 2 trials concurrently for faster tuning
- Optimizes for minimum RMSE

The tuner:
1. Explores the parameter space defined in `tune_config`
2. Tracks performance across trials
3. Returns best performing model configuration
4. Prints optimal hyperparameters and RMSE score

This optimization helps find the best combination of:
- Number of estimators
- Maximum tree depth
- Learning rate
- Subsample ratio

In [19]:
# Hyperparameter space
tune_config = {
    "n_estimators": tune.choice([80, 85]),
    "max_depth": tune.choice([5, 6]),
    "learning_rate": tune.loguniform(0.05, 0.1),
    "subsample": tune.uniform(0.8, 0.9)
}

In [20]:
# Create the tuner
tuner = Tuner(
    train_xgboost_model,
    param_space=tune_config,
    tune_config=TuneConfig(
        num_samples=5,           # Number of trials to run
        metric="rmse",          # Metric to optimize
        mode="min",             # Minimize the metric
        max_concurrent_trials=2,  # 2-way concurrent tuning
        search_alg=BayesOptSearch()
    ),
)

# Run the hyperparameter search
result_grid = tuner.fit()

# Get the best result
best_result = result_grid.get_best_result()

print("Best hyperparameters found:", best_result.config)
print("Best RMSE achieved:", best_result.metrics["rmse"])

0,1
Current time:,2024-10-23 06:06:06
Running for:,00:01:01.82
Memory:,3.0/15.6 GiB

Trial name,status,loc,learning_rate,max_depth,n_estimators,subsample,iter,total time (s),mse,rmse
train_xgboost_model_bef6b_00000,TERMINATED,10.128.0.7:317637,0.0683871,5,85,0.845569,1,21.9042,7396,86
train_xgboost_model_bef6b_00001,TERMINATED,10.128.0.7:317638,0.0561543,6,80,0.896244,1,21.2061,7396,86
train_xgboost_model_bef6b_00002,TERMINATED,10.128.0.7:318510,0.0700223,5,85,0.85071,1,15.2118,7396,86
train_xgboost_model_bef6b_00003,TERMINATED,10.128.0.7:318516,0.0600009,5,85,0.849573,1,15.1147,7396,86
train_xgboost_model_bef6b_00004,TERMINATED,10.128.0.7:319158,0.0553765,5,85,0.841276,1,13.1389,7396,86


(pid=318204) Running 0: 0.00 row [00:00, ? row/s]

(pid=318208) Running 0: 0.00 row [00:00, ? row/s]

(pid=318971) Running 0: 0.00 row [00:00, ? row/s]

(pid=318974) Running 0: 0.00 row [00:00, ? row/s]

(pid=319365) Running 0: 0.00 row [00:00, ? row/s]

2024-10-23 06:06:06,006	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/home/jupyter/ray_results/train_xgboost_model_2024-10-23_06-05-04' in 0.0110s.
2024-10-23 06:06:06,024	INFO tune.py:1041 -- Total run time: 61.89 seconds (61.81 seconds for the tuning loop).


Best hyperparameters found: {'n_estimators': 85, 'max_depth': 5, 'learning_rate': 0.06838712273668071, 'subsample': 0.8455691366345284}
Best RMSE achieved: 86


# Upload Container Image

In [180]:
import subprocess

# Get a list of all installed packages and their versions
installed_packages = subprocess.run(['pip', 'freeze'], capture_output=True, text=True).stdout.splitlines()

# List to store only valid packages
valid_packages = []

# Iterate through each package and attempt to install it to check for issues
for package in installed_packages:
    package_name = package.split('==')[0]  # Get only the package name (exclude version)
    
    try:
        print(f"Checking package: {package_name}")
        # Try installing the package to check if it's valid
        subprocess.run(['pip', 'install', package_name], check=True, capture_output=True, text=True)
        valid_packages.append(package)  # If installation succeeds, add to valid list
    except subprocess.CalledProcessError as e:
        print(f"Skipping package {package_name} due to error: {e}")
    except OSError as os_error:
        print(f"Skipping package {package_name} due to missing files or directories: {os_error}")

# Write the valid packages to a requirements.txt file
with open('requirements.txt', 'w') as f:
    f.write("\n".join(valid_packages))

print("requirements.txt file has been created with valid installed libraries and versions.")

requirements.txt file has been created with all installed libraries and versions.


In [194]:
bucket = gcs.lookup_bucket(PROJECT_ID)
SOURCEPATH = f'{SERIES}/{EXPERIMENT}/training'

In [195]:
blob = bucket.blob(f'{SOURCEPATH}/{EXPERIMENT}_trainer/train.py')
blob.upload_from_filename(SCRIPT_PATH)

In [196]:
dockerfile = f"""
FROM gcr.io/deeplearning-platform-release/xgboost-cpu
WORKDIR /training
# copy requirements and install them
COPY requirements.txt ./
RUN pip install --no-cache-dir --upgrade pip \
  && pip install --no-cache-dir --ignore-installed --no-deps -r requirements.txt || true
"""

In [198]:
blob = bucket.blob(f'{SOURCEPATH}/requirements.txt')
blob.upload_from_filename('requirements.txt')

In [199]:
blob = bucket.blob(f'{SOURCEPATH}/Dockerfile')
blob.upload_from_string(dockerfile)

In [200]:
for repo in ar_client.list_repositories(parent = f'projects/{PROJECT_ID}/locations/{REGION}'):
    print(repo.name)

projects/tymestack-439300/locations/us-central1/repositories/tymestack-439300


In [201]:
docker_repo = None
for repo in ar_client.list_repositories(parent = f'projects/{PROJECT_ID}/locations/{REGION}'):
    if f'{PROJECT_ID}' == repo.name.split('/')[-1]:
        docker_repo = repo
        print(f'Retrieved existing repo: {docker_repo.name}')

if not docker_repo:
    operation = ar_client.create_repository(
        request = artifactregistry_v1.CreateRepositoryRequest(
            parent = f'projects/{PROJECT_ID}/locations/{REGION}',
            repository_id = f'{PROJECT_ID}',
            repository = artifactregistry_v1.Repository(
                description = f'A repository for the {EXPERIMENT} experiment that holds docker images.',
                name = f'{PROJECT_ID}',
                format_ = artifactregistry_v1.Repository.Format.DOCKER,
                labels = {'series': SERIES, 'experiment': EXPERIMENT}
            )
        )
    )
    print('Creating Repository ...')
    docker_repo = operation.result()
    print(f'Completed creating repo: {docker_repo.name}')

Retrieved existing repo: projects/tymestack-439300/locations/us-central1/repositories/tymestack-439300


In [202]:
docker_repo.name, docker_repo.format_.name

('projects/tymestack-439300/locations/us-central1/repositories/tymestack-439300',
 'DOCKER')

In [203]:
REPOSITORY = f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{docker_repo.name.split('/')[-1]}"

In [204]:
# setup the build config with empty list of steps - these will be added sequentially
build = cloudbuild_v1.Build(
    steps = []
)
# retrieve the source
build.steps.append(
    {
        'name': 'gcr.io/cloud-builders/gsutil',
        'args': ['cp', '-r', f'gs://{PROJECT_ID}/{SOURCEPATH}/*', '/workspace']
    }
)
# docker build
build.steps.append(
    {
        'name': 'gcr.io/cloud-builders/docker',
        'args': ['build', '-t', f'{REPOSITORY}/{EXPERIMENT}_trainer', '/workspace']
    }    
)
# docker push
build.images = [f"{REPOSITORY}/{EXPERIMENT}_trainer"]

In [205]:
build

steps {
  name: "gcr.io/cloud-builders/gsutil"
  args: "cp"
  args: "-r"
  args: "gs://tymestack-439300/01/01/training/*"
  args: "/workspace"
}
steps {
  name: "gcr.io/cloud-builders/docker"
  args: "build"
  args: "-t"
  args: "us-central1-docker.pkg.dev/tymestack-439300/tymestack-439300/01_trainer"
  args: "/workspace"
}
images: "us-central1-docker.pkg.dev/tymestack-439300/tymestack-439300/01_trainer"

In [206]:
operation = cb_client.create_build(
    project_id = PROJECT_ID,
    build = build
)

In [207]:
response = operation.result()
response.status, response.artifacts

(<Status.SUCCESS: 3>,
 images: "us-central1-docker.pkg.dev/tymestack-439300/tymestack-439300/01_trainer")

# Deploy Best Model's Endpoint

In [35]:
!pip install py-ubjson

Collecting py-ubjson
  Using cached py-ubjson-0.16.1.tar.gz (50 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: py-ubjson
  Building wheel for py-ubjson (setup.py) ... [?25ldone
[?25h  Created wheel for py-ubjson: filename=py_ubjson-0.16.1-cp310-cp310-linux_x86_64.whl size=45683 sha256=518ace72394477a125c517c43d2bf374a9fbe970833f162bd51282c4808020d0
  Stored in directory: /home/jupyter/.cache/pip/wheels/ca/7d/41/4ca3da5703c6c538acebd3eba359d8226dc7e1c4494480a2d2
Successfully built py-ubjson
Installing collected packages: py-ubjson
Successfully installed py-ubjson-0.16.1


In [36]:
path = f"gcs://{best_result.metrics['model'].path}"
path

'gcs://tymestack-439300/01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-13/XGBoostTrainer_c5561_00000_0_2024-10-23_06-05-14/checkpoint_000000'

In [None]:
import ubjson
import pickle
from google.cloud import storage

# Initialize GCS client
client = storage.Client()

# Original path
# path = 'gcs://tymestack-439300/01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-13/XGBoostTrainer_c5561_00000_0_2024-10-23_06-05-14/checkpoint_000000'
# Find the index where "01" starts and slice from there
source_blob_name = path[path.index("01"):]
destination_file_name = "/tmp/model.ubj"  # Temporary local file path

# Download file from GCS
bucket = client.bucket(BUCKET)
blob = bucket.blob(source_blob_name)
blob.download_to_filename(destination_file_name)

print(f"Downloaded {source_blob_name} to {destination_file_name}.")

# Load the UBJ model from the local temporary path
with open(destination_file_name, "rb") as ubj_file:
    model = ubjson.load(ubj_file)

# Save the model as a pickle (.pkl) file
pkl_file_path = "/tmp/model.pkl"
with open(pkl_file_path, "wb") as pkl_file:
    pickle.dump(model, pkl_file)

print(f"Model has been successfully converted and saved as {pkl_file_path}.")

In [40]:
blob = bucket.blob(f"{source_blob_name}/model.pkl")
blob.upload_from_filename(pkl_file_path)

In [100]:
from google.cloud import storage

# Initialize GCS client
storage_client = storage.Client()

# Parse the GCS path
gcs_path = path.replace('gcs://', '')
bucket_name = gcs_path.split('/')[0]
blob_prefix = '/'.join(gcs_path.split('/')[1:])

# Get bucket
bucket = storage_client.bucket(bucket_name)

# List contents
print(f"Listing contents of checkpoint directory: {path}")
blobs = list(bucket.list_blobs(prefix=blob_prefix))
for blob in blobs:
    print(f"- {blob.name}")

Listing contents of checkpoint directory: gcs://tymestack-439300/01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-13/XGBoostTrainer_c5561_00000_0_2024-10-23_06-05-14/checkpoint_000000
- 01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-13/XGBoostTrainer_c5561_00000_0_2024-10-23_06-05-14/checkpoint_000000/
- 01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-13/XGBoostTrainer_c5561_00000_0_2024-10-23_06-05-14/checkpoint_000000/model.pkl
- 01/01/models/20241023060440/XGBoostTrainer_2024-10-23_06-05-13/XGBoostTrainer_c5561_00000_0_2024-10-23_06-05-14/checkpoint_000000/model.ubj


In [208]:
# Upload the model to Vertex AI
model = aiplatform.Model.upload(
    display_name="xgboost-housing",
    artifact_uri=f"gs://{BQ_PROJECT}/" + source_blob_name.rsplit("/", 1)[0],
    serving_container_image_uri=f"{REPOSITORY}/{EXPERIMENT}_trainer",
)

Creating Model


INFO:google.cloud.aiplatform.models:Creating Model


Create Model backing LRO: projects/652637980750/locations/us-central1/models/1723547148701663232/operations/3876225611185782784


INFO:google.cloud.aiplatform.models:Create Model backing LRO: projects/652637980750/locations/us-central1/models/1723547148701663232/operations/3876225611185782784


Model created. Resource name: projects/652637980750/locations/us-central1/models/1723547148701663232@1


INFO:google.cloud.aiplatform.models:Model created. Resource name: projects/652637980750/locations/us-central1/models/1723547148701663232@1


To use this Model in another session:


INFO:google.cloud.aiplatform.models:To use this Model in another session:


model = aiplatform.Model('projects/652637980750/locations/us-central1/models/1723547148701663232@1')


INFO:google.cloud.aiplatform.models:model = aiplatform.Model('projects/652637980750/locations/us-central1/models/1723547148701663232@1')


# Deploy model

In [209]:
DEPLOYED_NAME = f"ray-on-vertex-registered-xgboost-model-{aiplatform.utils.timestamped_unique_name()}"

TRAFFIC_SPLIT = {"0": 100}

MIN_NODES = 1
MAX_NODES = 1

endpoint = model.deploy(
    deployed_model_display_name=DEPLOYED_NAME,
    traffic_split=TRAFFIC_SPLIT,
    machine_type=DEPLOY_COMPUTE,
    min_replica_count=MIN_NODES,
    max_replica_count=MAX_NODES,
    sync=False,
)

In [210]:
endpoint

<google.cloud.aiplatform.models.Endpoint object at 0x7f34ab733760> is waiting for upstream dependencies to complete.

In [211]:
from google.cloud import aiplatform_v1
# Initialize the Vertex AI client
client = aiplatform_v1.EndpointServiceClient(client_options={"api_endpoint": f"{REGION}-aiplatform.googleapis.com"})
# Define the parent resource (where the endpoints are located)
parent = f"projects/{BQ_PROJECT}/locations/{REGION}"
# List all available endpoints
endpoints = client.list_endpoints(parent=parent)
# List all available endpoints
request = aiplatform_v1.ListEndpointsRequest(parent=parent)
response = client.list_endpoints(request=request)

# Print the details of each endpoint
for endpoint in response:
    print(f"Endpoint name: {endpoint.name}")
    print(f"Display name: {endpoint.display_name}")
    print(f"Deployed models: {len(endpoint.deployed_models)}")
    print("--------")

Endpoint name: projects/652637980750/locations/us-central1/endpoints/7890958557548380160
Display name: xgboost-housing_endpoint
Deployed models: 0
--------
Endpoint name: projects/652637980750/locations/us-central1/endpoints/6044201235349766144
Display name: xgboost-housing_endpoint
Deployed models: 0
--------
Endpoint name: projects/652637980750/locations/us-central1/endpoints/2759106782159699968
Display name: xgboost-housing_endpoint
Deployed models: 1
--------
Endpoint name: projects/652637980750/locations/us-central1/endpoints/7815523263789924352
Display name: xgboost-housing_endpoint
Deployed models: 0
--------
Endpoint name: projects/652637980750/locations/us-central1/endpoints/8116982963847036928
Display name: xgboost-housing_endpoint
Deployed models: 0
--------


In [176]:
endpoint = aiplatform.Endpoint('projects/652637980750/locations/us-central1/endpoints/8116982963847036928')