From 923e9e6f6266ffa74c7c2a52c2f824fb9e3f29e0 Mon Sep 17 00:00:00 2001 From: Sasha Belousov <141743480+BelSasha@users.noreply.github.com> Date: Mon, 15 Jul 2024 09:50:05 +0300 Subject: [PATCH] Adding more cluster status tests (#992) Co-authored-by: Alexandra Belousov --- .../test_clusters/test_cluster.py | 1 + .../test_clusters/test_on_demand_cluster.py | 50 +++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/tests/test_resources/test_clusters/test_cluster.py b/tests/test_resources/test_clusters/test_cluster.py index f3a551819..67b820de7 100644 --- a/tests/test_resources/test_clusters/test_cluster.py +++ b/tests/test_resources/test_clusters/test_cluster.py @@ -680,6 +680,7 @@ def test_rh_status_stopped(self, cluster): @pytest.mark.level("local") @pytest.mark.clustertest def test_send_status_to_db(self, cluster): + import json cluster.save() diff --git a/tests/test_resources/test_clusters/test_on_demand_cluster.py b/tests/test_resources/test_clusters/test_on_demand_cluster.py index 5d081f356..3360ea7f7 100644 --- a/tests/test_resources/test_clusters/test_on_demand_cluster.py +++ b/tests/test_resources/test_clusters/test_on_demand_cluster.py @@ -1,4 +1,5 @@ import asyncio +import subprocess import time import pytest @@ -181,6 +182,9 @@ def test_fn_to_docker_container(self, ondemand_aws_cluster): remote_torch_exists = rh.function(torch_exists).to(ondemand_aws_cluster) assert remote_torch_exists() + #################################################################################################### + # Status tests + #################################################################################################### @pytest.mark.level("minimal") @pytest.mark.skip("Test requires terminating the cluster") def test_set_status_after_teardown(self, cluster): @@ -202,6 +206,52 @@ def test_set_status_after_teardown(self, cluster): assert get_status_data["resource_type"] == cluster_config.get("resource_type") assert get_status_data["status"] == ResourceServerStatus.terminated + @pytest.mark.level("minimal") + def test_status_autostop_cluster(self, cluster): + cluster_config = cluster.config() + cluster_uri = rns_client.format_rns_address(cluster.rns_address) + api_server_url = cluster_config.get("api_server_url", rns_client.api_server_url) + cluster_name_no_owner = cluster.rns_address.split("/")[-1] + + # Mocking autostop by running sky down + result_teardown = subprocess.run( + ["sky", "down", "-y", cluster_name_no_owner], capture_output=True, text=True + ) + assert result_teardown.returncode == 0 + + get_status_data_resp = requests.get( + f"{api_server_url}/resource/{cluster_uri}/cluster/status", + headers=rns_client.request_headers(), + ) + assert get_status_data_resp.status_code == 200 + # For UI displaying purposes, the cluster/status endpoint returns cluster status history. + # The latest status info is the first element in the list returned by the endpoint. + get_status_data = get_status_data_resp.json()["data"][0] + assert get_status_data["resource_type"] == cluster_config.get("resource_type") + assert get_status_data["status"] == ResourceServerStatus.terminated + + @pytest.mark.level("minimal") + def test_status_cluster_rh_daemon_stopped(self, cluster): + cluster_config = cluster.config() + cluster_uri = rns_client.format_rns_address(cluster.rns_address) + api_server_url = cluster_config.get("api_server_url", rns_client.api_server_url) + + cluster.run(["runhouse stop"]) + + get_status_data_resp = requests.get( + f"{api_server_url}/resource/{cluster_uri}/cluster/status", + headers=rns_client.request_headers(), + ) + assert get_status_data_resp.status_code == 200 + # For UI displaying purposes, the cluster/status endpoint returns cluster status history. + # The latest status info is the first element in the list returned by the endpoint. + get_status_data = get_status_data_resp.json()["data"][0] + assert get_status_data["resource_type"] == cluster_config.get("resource_type") + assert get_status_data["status"] == ResourceServerStatus.runhouse_daemon_down + + #################################################################################################### + # Logs surfacing tests + #################################################################################################### @pytest.mark.level("minimal") def test_logs_surfacing_scheduler_basic_flow(self, cluster):