# Testing Ray Setup for LLM Serving

In [1]:
from snowflakeray.cluster_init import SnowflakeRay
from snowflake.snowpark import Session
import json
from pprint import pprint

### Setup Snowpark Session

In [2]:
with open('creds.json') as f:
    data = json.load(f)
    username = data['username']
    password = data['password']
    account = data["account"]
    warehouse = data["warehouse"]
    database = data["database"]
    schema = data["schema"]
    role = data["role"]


CONNECTION_PARAMETERS = {
    'account': account,
    'user': username,
    'password': password,
    'schema': schema,
    'database': database,
    'warehouse': warehouse,
    'role': role,
    "session_parameters": {"PYTHON_CONNECTOR_QUERY_RESULT_FORMAT": "json"}
}

In [3]:
session = Session.builder.configs(CONNECTION_PARAMETERS).create()

 * To change owner, run `chown $USER "/Users/plakhanpal/Library/Application Support/snowflake/config.toml"`.
 * To restrict permissions, run `chmod 0600 "/Users/plakhanpal/Library/Application Support/snowflake/config.toml"`.

  warn(f"Bad owner or permissions on {str(filep)}{chmod_message}")


In [4]:
project_name = "llm serving"

### Bring your own compute pools for LLM Serving

In [5]:
head_compute_pool_name = "RAY_HEAD_CP_LLM_SERVING"
worker_compute_pool_name = "RAY_WORKER_CP_LLM_SERVING"

In [6]:
ray_head_cp_sql = f"""
    create compute pool if not exists {head_compute_pool_name}
        min_nodes = 1
        max_nodes = 1
        instance_family = GPU_NV_S
        auto_resume = TRUE
        AUTO_SUSPEND_SECS = 3600;
"""

In [7]:
ray_worker_cp_sql = f"""
    create compute pool if not exists {worker_compute_pool_name}
        min_nodes = 2
        max_nodes = 2
        instance_family = GPU_NV_M
        auto_resume = TRUE
        AUTO_SUSPEND_SECS = 3600;
"""

In [8]:
session.sql(ray_head_cp_sql).collect()

[Row(status='RAY_HEAD_CP_LLM_SERVING already exists, statement succeeded.')]

In [9]:
session.sql(ray_worker_cp_sql).collect()

[Row(status='RAY_WORKER_CP_LLM_SERVING already exists, statement succeeded.')]

#### Initiate SnowflakeRay object

In [13]:
snowflake_ray = SnowflakeRay(session=session, project_name=project_name, 
                             head_compute_pool_name=head_compute_pool_name, 
                             worker_compute_pool_name=worker_compute_pool_name)

#### Setup Ray Cluster now

In [14]:
endpoints = snowflake_ray.setup_ray_cluster(stage_name_for_specs="RAY_SPECS", stage_name_for_artifacts="ARTIFACTS", 
                                            external_access_integrations=["ALLOW_ALL_EAI"], 
                                            ray_requirements=["ray[data]==2.10.0", "ray[client]==2.10.0", "ray[default]==2.10.0", 
                                                              "ray[serve]==2.10.0"],
                                            pip_requirements=["jupyterlab", "py-spy", "ipywidgets", "virtualenv", "datasets==2.18.0", 
                                                              "numpy", "transformers==4.39.3", "evaluate", "torch==2.1.2", 
                                                              "accelerate==0.29.3", "tokenizers==0.15.2", "pandas==1.5.3", 
                                                              "pytorch_lightning==2.0.3", "deepspeed==0.14.1", "sentencepiece==0.2.0", 
                                                              "torchvision==0.16.2", "bitsandbytes==0.43.1", "tiktoken==0.6.0", 
                                                              "tqdm==4.66.2", "vllm==0.4.0", "xformers==0.0.23.post1", 
                                                              "huggingface-hub==0.22.2", "sentence-transformers"])

ray_head_precreated_compute_pool: True
ray_worker_precreated_compute_pool: True


INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Created image repo: sfsenorthamerica-demo274-awseast.registry.snowflakecomputing.com/ray_db/ray_schema/spcs_ray_image_repollmserving
INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Similar environment detected. Using existing image sfsenorthamerica-demo274-awseast.registry.snowflakecomputing.com/ray_db/ray_schema/spcs_ray_image_repollmserving/ray_grafana:48e0aa5139e19661c85469042fa8103037d755af to skip image build. To disable this feature, set 'force_image_build=True' in deployment options
INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Similar environment detected. Using existing image sfsenorthamerica-demo274-awseast.registry.snowflakecomputing.com/ray_db/ray_schema/spcs_ray_image_repollmserving/ray_prometheus:94d29dce4e2f3355da1d28a1781619418ad65135 to skip image build. To disable this feature, set 'force_image_build=True' in deployment options
INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Simi

In [15]:
endpoints

[{'api': 'nlwmi-sfsenorthamerica-demo274-awseast.snowflakecomputing.app'},
 {'notebook': 'nlwmm-sfsenorthamerica-demo274-awseast.snowflakecomputing.app'},
 {'ray-client-server-port': 'nlwmq-sfsenorthamerica-demo274-awseast.snowflakecomputing.app'},
 {'prometheus': 'nlwmu-sfsenorthamerica-demo274-awseast.snowflakecomputing.app'},
 {'grafana': 'nlwmy-sfsenorthamerica-demo274-awseast.snowflakecomputing.app'},
 {'ray-dashboard': 'nlwm4-sfsenorthamerica-demo274-awseast.snowflakecomputing.app'}]

### Get service endpoints independent of the command above (optional)

In [16]:
pprint(snowflake_ray.get_public_endpoints())

[{'api': 'nlwmi-sfsenorthamerica-demo274-awseast.snowflakecomputing.app'},
 {'notebook': 'nlwmm-sfsenorthamerica-demo274-awseast.snowflakecomputing.app'},
 {'ray-client-server-port': 'nlwmq-sfsenorthamerica-demo274-awseast.snowflakecomputing.app'},
 {'prometheus': 'nlwmu-sfsenorthamerica-demo274-awseast.snowflakecomputing.app'},
 {'grafana': 'nlwmy-sfsenorthamerica-demo274-awseast.snowflakecomputing.app'},
 {'ray-dashboard': 'nlwm4-sfsenorthamerica-demo274-awseast.snowflakecomputing.app'}]


### Get ray head service status

In [17]:
pprint(snowflake_ray.get_ray_head_service_status())

[{'containerName': 'head',
  'image': 'sfsenorthamerica-demo274-awseast.registry.snowflakecomputing.com/ray_db/ray_schema/spcs_ray_image_repollmserving/ray_head:ff27f251068846294fa94ab808a6914689105ff2',
  'instanceId': '0',
  'message': 'Running',
  'restartCount': 0,
  'serviceName': 'SPCSRAYHEADSERVICELLMSERVING',
  'startTime': '2024-05-09T22:25:23Z',
  'status': 'READY'},
 {'containerName': 'prometheus',
  'image': 'sfsenorthamerica-demo274-awseast.registry.snowflakecomputing.com/ray_db/ray_schema/spcs_ray_image_repollmserving/ray_prometheus:94d29dce4e2f3355da1d28a1781619418ad65135',
  'instanceId': '0',
  'message': 'Running',
  'restartCount': 0,
  'serviceName': 'SPCSRAYHEADSERVICELLMSERVING',
  'startTime': '2024-05-09T22:25:26Z',
  'status': 'READY'},
 {'containerName': 'grafana',
  'image': 'sfsenorthamerica-demo274-awseast.registry.snowflakecomputing.com/ray_db/ray_schema/spcs_ray_image_repollmserving/ray_grafana:48e0aa5139e19661c85469042fa8103037d755af',
  'instanceId': '0

### Get ray head service logs

In [18]:
pprint(snowflake_ray.get_ray_head_logs())

('[{\'SYSTEM$GET_SERVICE_LOGS\': "+ WORKLOAD=rayhead\\n++ ifconfig eth0\\n++ '
 "sed -En -e 's/.*inet ([0-9.]+).*/\\\\1/p'\\n+ eth0Ip=10.244.6.12\\n+ echo "
 "'WORKLOAD: rayhead'\\nWORKLOAD: rayhead\\n+ '[' rayhead == rayhead ']'\\n+ "
 "'[' -z http://SPCSRAYHEADSERVICELLMSERVING:3000 ']'\\n+ '[' -z "
 "http://SPCSRAYHEADSERVICELLMSERVING:9090 ']'\\n+ export "
 'RAY_GRAFANA_HOST=http://SPCSRAYHEADSERVICELLMSERVING:3000\\n+ '
 'RAY_GRAFANA_HOST=http://SPCSRAYHEADSERVICELLMSERVING:3000\\n+ export '
 'RAY_PROMETHEUS_HOST=http://SPCSRAYHEADSERVICELLMSERVING:9090\\nMaking log '
 'directory /raylogs/ray...\\n+ '
 'RAY_PROMETHEUS_HOST=http://SPCSRAYHEADSERVICELLMSERVING:9090\\n+ export '
 "log_dir=/raylogs/ray\\n+ log_dir=/raylogs/ray\\n+ echo 'Making log directory "
 "/raylogs/ray...'\\n+ mkdir -p /raylogs/ray\\n+ export "
 'RAY_ENABLE_RECORD_ACTOR_TASK_LOGGING=1\\n+ '
 'RAY_ENABLE_RECORD_ACTOR_TASK_LOGGING=1\\n+ export '
 'RAY_BACKEND_LOG_LEVEL=debug\\n+ RAY_BACKEND_LOG_LEVEL=debug\\n+ expo

### Get ray worker service status

In [19]:
pprint(snowflake_ray.get_ray_worker_service_status())

[{'containerName': 'worker',
  'image': 'sfsenorthamerica-demo274-awseast.registry.snowflakecomputing.com/ray_db/ray_schema/spcs_ray_image_repollmserving/ray_worker:cfdb32c304b1ce29fc54c1af236deb2be8251f17',
  'instanceId': '0',
  'message': 'Running',
  'restartCount': 0,
  'serviceName': 'SPCSRAYWORKERSERVICELLMSERVING',
  'startTime': '2024-05-09T22:25:30Z',
  'status': 'READY'},
 {'containerName': 'worker',
  'image': 'sfsenorthamerica-demo274-awseast.registry.snowflakecomputing.com/ray_db/ray_schema/spcs_ray_image_repollmserving/ray_worker:cfdb32c304b1ce29fc54c1af236deb2be8251f17',
  'instanceId': '1',
  'message': 'Running',
  'restartCount': 0,
  'serviceName': 'SPCSRAYWORKERSERVICELLMSERVING',
  'startTime': '2024-05-09T22:25:30Z',
  'status': 'READY'}]


### Get ray worker logs

In [20]:
pprint(snowflake_ray.get_ray_worker_logs())

("[{'SYSTEM$GET_SERVICE_LOGS': '+ WORKLOAD=rayworker\\n++ ifconfig eth0\\n++ "
 "sed -En -e \\'s/.*inet ([0-9.]+).*/\\\\1/p\\'\\n+ eth0Ip=10.244.4.12\\n+ "
 "echo \\'WORKLOAD: rayworker\\'\\nWORKLOAD: rayworker\\n+ \\'[\\' rayworker "
 "== rayhead \\']\\'\\n+ \\'[\\' rayworker == rayworker \\']\\'\\n+ \\'[\\' -z "
 "SPCSRAYHEADSERVICELLMSERVING:6379 \\']\\'\\n+ export "
 'RAY_ENABLE_RECORD_ACTOR_TASK_LOGGING=1\\n+ '
 'RAY_ENABLE_RECORD_ACTOR_TASK_LOGGING=1\\n+ export '
 'RAY_BACKEND_LOG_LEVEL=debug\\n+ RAY_BACKEND_LOG_LEVEL=debug\\n+ export '
 'HOST_IP=10.244.4.12\\n+ HOST_IP=10.244.4.12\\n+ export NCCL_DEBUG=INFO\\n+ '
 'NCCL_DEBUG=INFO\\n+ export NCCL_SOCKET_IFNAME=eth0\\n+ '
 'NCCL_SOCKET_IFNAME=eth0\\n+ ray start --node-ip-address=10.244.4.12 '
 '--disable-usage-stats --address=SPCSRAYHEADSERVICELLMSERVING:6379 '
 '\\\'--resources={"custom_llm_serving_label": 1}\\\' '
 '--object-manager-port=8076 --node-manager-port=8077 '
 '--runtime-env-agent-port=8078 --dashboard-agent-grpc-port

### Delete all services

In [None]:
#snowflake_ray.delete_all_services()

INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Deleted service: SPCSRAYHEADSERVICELLMSERVING
INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Deleted service: SPCSRAYWORKERSERVICELLMSERVING


### Suspend all compute pools

In [24]:
#snowflake_ray.suspend_all_compute_pools()

INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Deleted service: SPCSRAYHEADSERVICELLMSERVINGFORSUMMIT
INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Deleted service: SPCSRAYWORKERSERVICELLMSERVINGFORSUMMIT
INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Suspended compute pool: RAY_HEAD_CP_LLM_SERVING_TEST
INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Suspended compute pool: RAY_WORKER_CP_LLM_SERVING_TEST


### Delete all compute pools

In [21]:
snowflake_ray.delete_all_compute_pools()

INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Deleted service: SPCSRAYHEADSERVICELLMSERVING
INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Deleted service: SPCSRAYWORKERSERVICELLMSERVING
INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Deleted compute pool: RAY_HEAD_CP_LLM_SERVING
INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Deleted compute pool: RAY_WORKER_CP_LLM_SERVING


### Close snowpark session

In [22]:
session.close()