# Testing Ray Setup for Image classification

In [1]:
from snowflakeray.cluster_init import SnowflakeRay
from snowflake.snowpark import Session
import json
from pprint import pprint

### Setup Snowpark Session

In [2]:
with open('creds.json') as f:
    data = json.load(f)
    username = data['username']
    password = data['password']
    account = data["account"]
    warehouse = data["warehouse"]
    database = data["database"]
    schema = data["schema"]
    role = data["role"]


CONNECTION_PARAMETERS = {
    'account': account,
    'user': username,
    'password': password,
    'schema': schema,
    'database': database,
    'warehouse': warehouse,
    'role': role,
    "session_parameters": {"PYTHON_CONNECTOR_QUERY_RESULT_FORMAT": "json"}
}

In [3]:
session = Session.builder.configs(CONNECTION_PARAMETERS).create()

In [5]:
session.get_current_warehouse()

'"RAY_WH"'

In [6]:
session.get_current_role()

'"RAY_ROLE"'

In [7]:
project_name = "image classification pytorch"

### Bring your own compute pools for Image classification

In [8]:
head_compute_pool_name = "RAY_HEAD_CP_IMAGECLASSIFCATION"
worker_compute_pool_name = "RAY_WORKER_CP_IMAGECLASSIFCATION"

In [15]:
ray_head_cp_sql = f"""
    create compute pool if not exists {head_compute_pool_name}
        min_nodes = 1
        max_nodes = 1
        instance_family = GPU_NV_S
        auto_resume = TRUE
        AUTO_SUSPEND_SECS = 3600;
"""

In [16]:
ray_worker_cp_sql = f"""
    create compute pool if not exists {worker_compute_pool_name}
        min_nodes = 4
        max_nodes = 4
        instance_family = GPU_NV_S
        auto_resume = TRUE
        AUTO_SUSPEND_SECS = 3600;
"""

In [17]:
session.sql(ray_head_cp_sql).collect()

[Row(status='RAY_HEAD_CP_IMAGECLASSIFCATION already exists, statement succeeded.')]

In [18]:
session.sql(ray_worker_cp_sql).collect()

[Row(status="Compute Pool RAY_WORKER_CP_IMAGECLASSIFCATION successfully created. Please run 'DESCRIBE COMPUTE POOL RAY_WORKER_CP_IMAGECLASSIFCATION' to check the compute pool state. NOTE the compute pool is not ready to deploy a service or job before reaching ACTIVE or IDLE state.")]

### Initiate SnowflakeRay object

In [9]:
snowflake_ray = SnowflakeRay(session=session, project_name=project_name, head_compute_pool_name=head_compute_pool_name, worker_compute_pool_name=worker_compute_pool_name)

##### Note that if you are signed up for Block Storage PrPr, add the parameter need_block_storage_for_ray_logs=True below

In [23]:
endpoints = snowflake_ray.setup_ray_cluster(stage_name_for_specs="RAY_SPECS", stage_name_for_artifacts="ARTIFACTS", external_access_integrations=["ALLOW_ALL_EAI"], query_warehouse=warehouse,
                              ray_requirements=["ray[data]==2.9.3", "ray[client]==2.9.3", "ray[default]==2.9.3", "ray[serve]==2.9.3", "ray[tune]==2.9.3"],
                              pip_requirements=["jupyterlab", "py-spy", "ipywidgets", "virtualenv", "pandas==1.5.3", "torch==2.1.2", "torchvision", "pillow"])

ray_head_precreated_compute_pool: True
ray_worker_precreated_compute_pool: True


INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Created image repo: sfsenorthamerica-demo391.registry.snowflakecomputing.com/ray_db/ray_schema/spcs_ray_image_repoimageclassificationpytorch
INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Building the Docker image and deploying to Snowpark Container Service. 
INFO:snowflakeray.deploy_client.image_builds.client_image_builder:Client:

INFO:snowflakeray.deploy_client.image_builds.client_image_builder: Version:    25.0.3

INFO:snowflakeray.deploy_client.image_builds.client_image_builder: Context:    desktop-linux

INFO:snowflakeray.deploy_client.image_builds.client_image_builder: Debug Mode: false

INFO:snowflakeray.deploy_client.image_builds.client_image_builder: Plugins:

INFO:snowflakeray.deploy_client.image_builds.client_image_builder:  buildx: Docker Buildx (Docker Inc.)

INFO:snowflakeray.deploy_client.image_builds.client_image_builder:    Version:  v0.12.1-desktop.4

INFO:snowflakeray.deploy_client.image_builds.cli

In [25]:
endpoints

[{'api': 'biaoey5-sfsenorthamerica-demo391.snowflakecomputing.app'},
 {'notebook': 'biaoezb-sfsenorthamerica-demo391.snowflakecomputing.app'},
 {'ray-client-server-port': 'biaoezf-sfsenorthamerica-demo391.snowflakecomputing.app'},
 {'prometheus': 'biaoezj-sfsenorthamerica-demo391.snowflakecomputing.app'},
 {'grafana': 'biaoezn-sfsenorthamerica-demo391.snowflakecomputing.app'},
 {'ray-dashboard': 'biaoezr-sfsenorthamerica-demo391.snowflakecomputing.app'}]

### Get service endpoints independent of the command above (optional)

In [26]:
pprint(snowflake_ray.get_public_endpoints())

[{'api': 'biaoey5-sfsenorthamerica-demo391.snowflakecomputing.app'},
 {'notebook': 'biaoezb-sfsenorthamerica-demo391.snowflakecomputing.app'},
 {'ray-client-server-port': 'biaoezf-sfsenorthamerica-demo391.snowflakecomputing.app'},
 {'prometheus': 'biaoezj-sfsenorthamerica-demo391.snowflakecomputing.app'},
 {'grafana': 'biaoezn-sfsenorthamerica-demo391.snowflakecomputing.app'},
 {'ray-dashboard': 'biaoezr-sfsenorthamerica-demo391.snowflakecomputing.app'}]


### Get ray head service status

In [27]:
pprint(snowflake_ray.get_ray_head_service_status())

[{'containerName': 'head',
  'image': 'sfsenorthamerica-demo391.registry.snowflakecomputing.com/ray_db/ray_schema/spcs_ray_image_repoimageclassificationpytorch/ray_head:a7b7fb7a78e1bd4c3a1ffb6243343bc794ed00dc',
  'instanceId': '0',
  'message': 'Running',
  'restartCount': 0,
  'serviceName': 'SPCSRAYHEADSERVICEIMAGECLASSIFICATIONPYTORCH',
  'startTime': '2024-05-09T18:14:23Z',
  'status': 'READY'},
 {'containerName': 'prometheus',
  'image': 'sfsenorthamerica-demo391.registry.snowflakecomputing.com/ray_db/ray_schema/spcs_ray_image_repoimageclassificationpytorch/ray_prometheus:6d19737014a9d54cd6894cd4f9ac23356dd3f0b1',
  'instanceId': '0',
  'message': 'Running',
  'restartCount': 0,
  'serviceName': 'SPCSRAYHEADSERVICEIMAGECLASSIFICATIONPYTORCH',
  'startTime': '2024-05-09T18:16:07Z',
  'status': 'READY'},
 {'containerName': 'grafana',
  'image': 'sfsenorthamerica-demo391.registry.snowflakecomputing.com/ray_db/ray_schema/spcs_ray_image_repoimageclassificationpytorch/ray_grafana:ae6ea

### Get ray head service logs

In [28]:
pprint(snowflake_ray.get_ray_head_logs())

("[{'SYSTEM$GET_SERVICE_LOGS': '[W 2024-05-09 19:40:24.787 ServerApp] 404 GET "
 '/api/contents/sec?content=0&hash=0&1715283624741 '
 '(75c9b86b94064ecb810827a70054e264@127.0.0.1) 2.32ms '
 'referer=https://biaoezb-sfsenorthamerica-demo391.snowflakecomputing.app/lab/tree/home/artifacts/Pytorch%20image%20classification.ipynb\\n[W '
 '2024-05-09 19:40:24.788 ServerApp] 404 GET '
 '/api/contents/sec?content=0&hash=0&1715283624741 (127.0.0.1): No such file '
 'or directory: sec\\n[W 2024-05-09 19:40:24.788 ServerApp] 404 GET '
 '/api/contents/sec?content=0&hash=0&1715283624744 '
 '(75c9b86b94064ecb810827a70054e264@127.0.0.1) 2.63ms '
 'referer=https://biaoezb-sfsenorthamerica-demo391.snowflakecomputing.app/lab/tree/home/artifacts/Pytorch%20image%20classification.ipynb\\n[W '
 '2024-05-09 19:40:24.788 ServerApp] 404 GET '
 '/api/contents/sec?content=0&hash=0&1715283624744 (127.0.0.1): No such file '
 'or directory: sec\\n[W 2024-05-09 19:40:24.790 ServerApp] 404 GET '
 '/api/contents/sec?co

### Get ray worker service status

In [29]:
pprint(snowflake_ray.get_ray_worker_service_status())

[{'containerName': 'worker',
  'image': 'sfsenorthamerica-demo391.registry.snowflakecomputing.com/ray_db/ray_schema/spcs_ray_image_repoimageclassificationpytorch/ray_worker:0980aaba4599bc91a6387c8ddcad341c06628bf5',
  'instanceId': '0',
  'message': 'Running',
  'restartCount': 0,
  'serviceName': 'SPCSRAYWORKERSERVICEIMAGECLASSIFICATIONPYTORCH',
  'startTime': '2024-05-09T18:14:17Z',
  'status': 'READY'},
 {'containerName': 'worker',
  'image': 'sfsenorthamerica-demo391.registry.snowflakecomputing.com/ray_db/ray_schema/spcs_ray_image_repoimageclassificationpytorch/ray_worker:0980aaba4599bc91a6387c8ddcad341c06628bf5',
  'instanceId': '1',
  'message': 'Running',
  'restartCount': 0,
  'serviceName': 'SPCSRAYWORKERSERVICEIMAGECLASSIFICATIONPYTORCH',
  'startTime': '2024-05-09T18:14:16Z',
  'status': 'READY'},
 {'containerName': 'worker',
  'image': 'sfsenorthamerica-demo391.registry.snowflakecomputing.com/ray_db/ray_schema/spcs_ray_image_repoimageclassificationpytorch/ray_worker:0980aab

### Get ray worker logs

In [30]:
pprint(snowflake_ray.get_ray_worker_logs())

("[{'SYSTEM$GET_SERVICE_LOGS': '+ WORKLOAD=rayworker\\n++ ifconfig eth0\\n++ "
 "sed -En -e \\'s/.*inet ([0-9.]+).*/\\\\1/p\\'\\n+ eth0Ip=10.244.30.10\\n+ "
 "echo \\'WORKLOAD: rayworker\\'\\nWORKLOAD: rayworker\\n+ \\'[\\' rayworker "
 "== rayhead \\']\\'\\n+ \\'[\\' rayworker == rayworker \\']\\'\\n+ \\'[\\' -z "
 "SPCSRAYHEADSERVICEIMAGECLASSIFICATIONPYTORCH:6379 \\']\\'\\n+ export "
 'RAY_ENABLE_RECORD_ACTOR_TASK_LOGGING=1\\n+ '
 'RAY_ENABLE_RECORD_ACTOR_TASK_LOGGING=1\\n+ export '
 'RAY_BACKEND_LOG_LEVEL=debug\\n+ RAY_BACKEND_LOG_LEVEL=debug\\n+ export '
 'HOST_IP=10.244.30.10\\n+ HOST_IP=10.244.30.10\\n+ export NCCL_DEBUG=INFO\\n+ '
 'NCCL_DEBUG=INFO\\n+ export NCCL_SOCKET_IFNAME=eth0\\n+ '
 'NCCL_SOCKET_IFNAME=eth0\\n+ ray start --node-ip-address=10.244.30.10 '
 '--disable-usage-stats '
 '--address=SPCSRAYHEADSERVICEIMAGECLASSIFICATIONPYTORCH:6379 '
 '\\\'--resources={"custom_llm_serving_label": 1}\\\' '
 '--object-manager-port=8076 --node-manager-port=8077 '
 '--runtime-env-age

### Delete all services

In [None]:
#snowflake_ray.delete_all_services()

INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Deleted service: SPCSRAYHEADSERVICELLMSERVING
INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Deleted service: SPCSRAYWORKERSERVICELLMSERVING


### Suspend all compute pools

In [24]:
#snowflake_ray.suspend_all_compute_pools()

INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Deleted service: SPCSRAYHEADSERVICELLMSERVINGFORSUMMIT
INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Deleted service: SPCSRAYWORKERSERVICELLMSERVINGFORSUMMIT
INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Suspended compute pool: RAY_HEAD_CP_LLM_SERVING_TEST
INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Suspended compute pool: RAY_WORKER_CP_LLM_SERVING_TEST


### Delete all compute pools

In [10]:
snowflake_ray.delete_all_compute_pools()

INFO:snowflakeray.deploy_client.utils.cluster_init_helper:Deleted compute pool: RAY_WORKER_CP_IMAGECLASSIFCATION


#### Check compute pools

In [11]:
session.sql("show compute pools like 'RAY_HEAD_CP_IMAGECLASSIFCATION'").show()

-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"name"  |"state"  |"min_nodes"  |"max_nodes"  |"instance_family"  |"num_services"  |"num_jobs"  |"auto_suspend_secs"  |"auto_resume"  |"active_nodes"  |"idle_nodes"  |"created_on"  |"resumed_on"  |"updated_on"  |"owner"  |"comment"  |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|        |         |             |             |                   |                |            |                     |               |                |              |              |              |              |         |           |
--------------------------------------------------------

In [12]:
session.sql("show compute pools like 'RAY_WORKER_CP_IMAGECLASSIFCATION'").show()

-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"name"  |"state"  |"min_nodes"  |"max_nodes"  |"instance_family"  |"num_services"  |"num_jobs"  |"auto_suspend_secs"  |"auto_resume"  |"active_nodes"  |"idle_nodes"  |"created_on"  |"resumed_on"  |"updated_on"  |"owner"  |"comment"  |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|        |         |             |             |                   |                |            |                     |               |                |              |              |              |              |         |           |
--------------------------------------------------------

### Close snowpark session

In [13]:
session.close()