You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
linux://python/ray/tests:test_usage_stats has been flaky with following errors:
=================================== FAILURES ===================================
--
| __________________________ test_usage_lib_report_data __________________________
|
| monkeypatch = <_pytest.monkeypatch.MonkeyPatch object at 0x7f591d758898>
| shutdown_only = None
| tmp_path = PosixPath('/tmp/pytest-of-root/pytest-3/test_usage_lib_report_data0')
|
| @pytest.mark.skipif(
| sys.platform == "win32",
| reason="Test depends on runtime env feature not supported on Windows.",
| )
| def test_usage_lib_report_data(monkeypatch, shutdown_only, tmp_path):
| with monkeypatch.context() as m:
| m.setenv("RAY_USAGE_STATS_ENABLED", "1")
| m.setenv("RAY_USAGE_STATS_REPORT_URL", "http://127.0.0.1:8000")
| # Runtime env is required to run this test in minimal installation test.
| ray.init(num_cpus=0, runtime_env={"pip": ["ray[serve]"]})
| """
| Make sure the generated data is following the schema.
| """
| cluster_metadata = ray_usage_lib.get_cluster_metadata(
| ray.experimental.internal_kv.internal_kv_get_gcs_client(), num_retries=20
| )
| cluster_config_file_path = tmp_path / "ray_bootstrap_config.yaml"
| cluster_config_file_path.write_text(
| """
| cluster_name: minimal
| max_workers: 1
| provider:
| type: aws
| region: us-west-2
| availability_zone: us-west-2a
| """
| )
| cluster_config_to_report = ray_usage_lib.get_cluster_config_to_report(
| cluster_config_file_path
| )
| d = ray_usage_lib.generate_report_data(
| cluster_metadata, cluster_config_to_report, 2, 2, 2
| )
| validate(instance=asdict(d), schema=schema)
|
| """
| Make sure writing to a file works as expected
| """
| client = ray_usage_lib.UsageReportClient()
| temp_dir = Path(tmp_path)
| client.write_usage_data(d, temp_dir)
|
| wait_for_condition(lambda: file_exists(temp_dir))
|
| """
| Make sure report usage data works as expected
| """
|
| @ray.remote(num_cpus=0, runtime_env={"pip": ["ray[serve]"]})
| class ServeInitator:
| def __init__(self):
| # Start the ray serve server to verify requests are sent
| # to the right place.
| from ray import serve
|
| serve.start()
|
| @serve.deployment(ray_actor_options={"num_cpus": 0})
| async def usage(request):
| body = await request.json()
| if body == asdict(d):
| return True
| else:
| return False
|
| usage.deploy()
|
| def ready(self):
| pass
|
| # We need to start a serve with runtime env to make this test
| # work with minimal installation.
| s = ServeInitator.remote()
| > ray.get(s.ready.remote())
|
| /ray/python/ray/tests/test_usage_stats.py:444:
| _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
| /ray/python/ray/_private/client_mode_hook.py:105: in wrapper
| return func(*args, **kwargs)
| _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
| object_refs = [ObjectRef(16310a0f0a45af5cf3019b4336c3332f5b1656030100000001000000)]
|
| @PublicAPI
| @client_mode_hook(auto_init=True)
| def get(
| object_refs: Union[ray.ObjectRef, List[ray.ObjectRef]],
| *,
| timeout: Optional[float] = None,
| ) -> Union[Any, List[Any]]:
| """Get a remote object or a list of remote objects from the object store.
|
| This method blocks until the object corresponding to the object ref is
| available in the local object store. If this object is not in the local
| object store, it will be shipped from an object store that has it (once the
| object has been created). If object_refs is a list, then the objects
| corresponding to each object in the list will be returned.
|
| Ordering for an input list of object refs is preserved for each object
| returned. That is, if an object ref to A precedes an object ref to B in the
| input list, then A will precede B in the returned list.
|
| This method will issue a warning if it's running inside async context,
| you can use ``await object_ref`` instead of ``ray.get(object_ref)``. For
| a list of object refs, you can use ``await asyncio.gather(*object_refs)``.
|
| Args:
| object_refs: Object ref of the object to get or a list of object refs
| to get.
| timeout (Optional[float]): The maximum amount of time in seconds to
| wait before returning.
|
| Returns:
| A Python object or a list of Python objects.
|
| Raises:
| GetTimeoutError: A GetTimeoutError is raised if a timeout is set and
| the get takes longer than timeout to return.
| Exception: An exception is raised if the task that created the object
| or that created one of the objects raised an exception.
| """
| worker = global_worker
| worker.check_connected()
|
| if hasattr(worker, "core_worker") and worker.core_worker.current_actor_is_asyncio():
| global blocking_get_inside_async_warned
| if not blocking_get_inside_async_warned:
| logger.warning(
| "Using blocking ray.get inside async actor. "
| "This blocks the event loop. Please use `await` "
| "on object ref with asyncio.gather if you want to "
| "yield execution to the event loop instead."
| )
| blocking_get_inside_async_warned = True
|
| with profiling.profile("ray.get"):
| is_individual_id = isinstance(object_refs, ray.ObjectRef)
| if is_individual_id:
| object_refs = [object_refs]
|
| if not isinstance(object_refs, list):
| raise ValueError(
| "'object_refs' must either be an object ref "
| "or a list of object refs."
| )
|
| # TODO(ujvl): Consider how to allow user to retrieve the ready objects.
| values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
| for i, value in enumerate(values):
| if isinstance(value, RayError):
| if isinstance(value, ray.exceptions.ObjectLostError):
| worker.core_worker.dump_object_store_memory_usage()
| if isinstance(value, RayTaskError):
| raise value.as_instanceof_cause()
| else:
| > raise value
| E ray.exceptions.RuntimeEnvSetupError: Failed to setup runtime environment.
| E Could not create the actor because its associated runtime env failed to be created.
| E Traceback (most recent call last):
| E File "/ray/python/ray/dashboard/modules/runtime_env/runtime_env_agent.py", line 388, in _create_runtime_env_with_retry
| E runtime_env_setup_task, timeout=setup_timeout_seconds
| E File "/opt/miniconda/lib/python3.6/asyncio/tasks.py", line 358, in wait_for
| E return fut.result()
| E File "/ray/python/ray/dashboard/modules/runtime_env/runtime_env_agent.py", line 297, in _setup_runtime_env
| E uri, runtime_env, context, logger=per_job_logger
| E File "/ray/python/ray/_private/runtime_env/pip.py", line 440, in create
| E return await task
| E File "/ray/python/ray/_private/runtime_env/pip.py", line 431, in _create_for_hash
| E await PipProcessor(target_dir, runtime_env, logger)
| E File "/ray/python/ray/_private/runtime_env/pip.py", line 331, in _run
| E await self._create_or_get_virtualenv(path, exec_cwd, logger)
| E File "/ray/python/ray/_private/runtime_env/pip.py", line 273, in _create_or_get_virtualenv
| E await check_output_cmd(create_venv_cmd, logger=logger, cwd=cwd, env=env)
| E File "/ray/python/ray/_private/runtime_env/utils.py", line 102, in check_output_cmd
| E proc.returncode, cmd, output=stdout, cmd_index=cmd_index
| E ray._private.runtime_env.utils.SubprocessCalledProcessError: Run cmd[7] failed with the following details.
| E Command '['/opt/miniconda/bin/python3', '-m', 'virtualenv', '--app-data', '/tmp/ray/session_2022-04-15_03-50-51_422041_2132/runtime_resources/pip/5f03e216e7a7205e2f91d630e44c9c6e5040e702/virtualenv_app_data', '--reset-app-data', '--no-periodic-update', '--system-site-packages', '--no-download', '/tmp/ray/session_2022-04-15_03-50-51_422041_2132/runtime_resources/pip/5f03e216e7a7205e2f91d630e44c9c6e5040e702/virtualenv']' returned non-zero exit status 1.
| E Last 50 lines of stdout:
| E RuntimeError: failed to build image setuptools, pip because:
| E Traceback (most recent call last):
| E File "/opt/miniconda/lib/python3.6/site-packages/virtualenv/seed/embed/via_app_data/via_app_data.py", line 57, in _install
| E installer.install(creator.interpreter.version_info)
| E File "/opt/miniconda/lib/python3.6/site-packages/virtualenv/seed/embed/via_app_data/pip_install/base.py", line 35, in install
| E self._uninstall_previous_version()
| E File "/opt/miniconda/lib/python3.6/site-packages/virtualenv/seed/embed/via_app_data/pip_install/base.py", line 151, in _uninstall_previous_version
| E dist_name = self._dist_info.stem.split("-")[0]
| E File "/opt/miniconda/lib/python3.6/site-packages/virtualenv/seed/embed/via_app_data/pip_install/base.py", line 115, in _dist_info
| E raise RuntimeError(msg) # pragma: no cover
| E RuntimeError: no .dist-info at /tmp/ray/session_2022-04-15_03-50-51_422041_2132/runtime_resources/pip/5f03e216e7a7205e2f91d630e44c9c6e5040e702/virtualenv_app_data/wheel/3.6/image/1/CopyPipInstall/setuptools-59.6.0-py3-none-any, has pkg_resources, setuptools
| E
| E Traceback (most recent call last):
| E File "/opt/miniconda/lib/python3.6/site-packages/virtualenv/seed/embed/via_app_data/via_app_data.py", line 57, in _install
| E installer.install(creator.interpreter.version_info)
| E File "/opt/miniconda/lib/python3.6/site-packages/virtualenv/seed/embed/via_app_data/pip_install/base.py", line 35, in install
| E self._uninstall_previous_version()
| E File "/opt/miniconda/lib/python3.6/site-packages/virtualenv/seed/embed/via_app_data/pip_install/base.py", line 151, in _uninstall_previous_version
| E dist_name = self._dist_info.stem.split("-")[0]
| E File "/opt/miniconda/lib/python3.6/site-packages/virtualenv/seed/embed/via_app_data/pip_install/base.py", line 115, in _dist_info
| E raise RuntimeError(msg) # pragma: no cover
| E RuntimeError: no .dist-info at /tmp/ray/session_2022-04-15_03-50-51_422041_2132/runtime_resources/pip/5f03e216e7a7205e2f91d630e44c9c6e5040e702/virtualenv_app_data/wheel/3.6/image/1/CopyPipInstall/pip-21.3.1-py3-none-any, has pip
|
| /ray/python/ray/worker.py:1812: RuntimeEnvSetupError
Medium: It is a significant difficulty but I can work around it.
The text was updated successfully, but these errors were encountered:
scv119
added
bug
Something that is supposed to be working; but isn't
triage
Needs triage (eg: priority, bug/not-bug, and owning component)
core
Issues that should be addressed in Ray Core
labels
Apr 18, 2022
scv119
added
P1
Issue that should be fixed within a few weeks
and removed
triage
Needs triage (eg: priority, bug/not-bug, and owning component)
labels
Apr 18, 2022
scv119
added this to the
Core Nightly Regressions milestone
Apr 18, 2022
@scv119 actually, @architkulkarni is looking at this already. The test is related to runtime env (the usage stats test uses runtime env to run some test code with minimal installation)
I haven't started debugging this yet. It probably has the same root cause as other runtime_env test flakiness. I'll have some time to look at this today and later this week
What happened + What you expected to happen
linux://python/ray/tests:test_usage_stats has been flaky with following errors:
@rkooo567 @jjyao could help debug or triage?
Versions / Dependencies
latest master
Reproduction script
check buildkite or flaky test dashboard.
Issue Severity
Medium: It is a significant difficulty but I can work around it.
The text was updated successfully, but these errors were encountered: