Skip to content
Permalink
Browse files
fix: always wait for input files before starting jobs, also upon loca…
…l execution and within group jobs. This should add further robustness against NFS latency issues. (#1486)

* fix: always wait for input files before starting jobs, also upon local execution and within group jobs. This should add further robustness against NFS latency issues.

* try downgrading irods client
  • Loading branch information
johanneskoester committed Mar 16, 2022
1 parent adae8f1 commit cab2adbc2278a2c1689414d2a3f172bb1d5c84d1
Show file tree
Hide file tree
Showing 8 changed files with 12 additions and 42 deletions.
@@ -599,6 +599,7 @@ def snakemake(
check_envvars=not lint, # for linting, we do not need to check whether requested envvars exist
all_temp=all_temp,
local_groupid=local_groupid,
latency_wait=latency_wait,
)
success = True

@@ -771,7 +772,6 @@ def snakemake(
archive=archive,
delete_all_output=delete_all_output,
delete_temp_output=delete_temp_output,
latency_wait=latency_wait,
wait_for_files=wait_for_files,
detailed_summary=detailed_summary,
nolock=not lock,
@@ -65,7 +65,6 @@ def __init__(
quiet=False,
printshellcmds=False,
printthreads=True,
latency_wait=3,
keepincomplete=False,
keepmetadata=True,
):
@@ -75,7 +74,7 @@ def __init__(
self.printreason = printreason
self.printshellcmds = printshellcmds
self.printthreads = printthreads
self.latency_wait = latency_wait
self.latency_wait = workflow.latency_wait
self.keepincomplete = keepincomplete
self.keepmetadata = keepmetadata

@@ -221,7 +220,6 @@ def __init__(
printreason=False,
quiet=False,
printshellcmds=False,
latency_wait=3,
assume_shared_fs=True,
keepincomplete=False,
keepmetadata=False,
@@ -232,7 +230,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
keepincomplete=keepincomplete,
keepmetadata=keepmetadata,
)
@@ -424,7 +421,6 @@ def __init__(
quiet=False,
printshellcmds=False,
use_threads=False,
latency_wait=3,
cores=1,
keepincomplete=False,
keepmetadata=True,
@@ -435,7 +431,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
keepincomplete=keepincomplete,
keepmetadata=keepmetadata,
)
@@ -659,7 +654,6 @@ def __init__(
printreason=False,
quiet=False,
printshellcmds=False,
latency_wait=3,
cluster_config=None,
local_input=None,
restart_times=None,
@@ -680,7 +674,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
assume_shared_fs=assume_shared_fs,
keepincomplete=keepincomplete,
keepmetadata=keepmetadata,
@@ -957,7 +950,6 @@ def __init__(
printreason=False,
quiet=False,
printshellcmds=False,
latency_wait=3,
restart_times=0,
assume_shared_fs=True,
max_status_checks_per_second=1,
@@ -989,7 +981,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
cluster_config=cluster_config,
restart_times=restart_times,
assume_shared_fs=assume_shared_fs,
@@ -1322,7 +1313,6 @@ def __init__(
printreason=False,
quiet=False,
printshellcmds=False,
latency_wait=3,
restart_times=0,
assume_shared_fs=True,
keepincomplete=False,
@@ -1336,7 +1326,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
cluster_config=cluster_config,
restart_times=restart_times,
assume_shared_fs=assume_shared_fs,
@@ -1432,7 +1421,6 @@ def __init__(
printshellcmds=False,
drmaa_args="",
drmaa_log_dir=None,
latency_wait=3,
cluster_config=None,
restart_times=0,
assume_shared_fs=True,
@@ -1448,7 +1436,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
cluster_config=cluster_config,
restart_times=restart_times,
assume_shared_fs=assume_shared_fs,
@@ -1635,7 +1622,6 @@ def __init__(
printreason=False,
quiet=False,
printshellcmds=False,
latency_wait=3,
cluster_config=None,
local_input=None,
restart_times=None,
@@ -1663,7 +1649,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
cluster_config=cluster_config,
local_input=local_input,
restart_times=restart_times,
@@ -2092,7 +2077,6 @@ def __init__(
printreason=False,
quiet=False,
printshellcmds=False,
latency_wait=3,
local_input=None,
restart_times=None,
max_status_checks_per_second=1,
@@ -2147,7 +2131,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
local_input=local_input,
restart_times=restart_times,
exec_job=exec_job,
@@ -28,7 +28,6 @@ def __init__(
printreason=False,
quiet=False,
printshellcmds=False,
latency_wait=3,
cluster_config=None,
local_input=None,
restart_times=None,
@@ -83,7 +82,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
cluster_config=cluster_config,
local_input=local_input,
restart_times=restart_times,
@@ -51,7 +51,6 @@ def __init__(
regions=None,
location=None,
cache=False,
latency_wait=3,
local_input=None,
restart_times=None,
exec_job=None,
@@ -127,7 +126,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
restart_times=restart_times,
exec_job=exec_job,
assume_shared_fs=False,
@@ -21,6 +21,7 @@
_IOFile,
is_flagged,
get_flag_value,
wait_for_files,
)
from snakemake.utils import format, listfiles
from snakemake.exceptions import RuleException, ProtectedOutputException, WorkflowError
@@ -764,6 +765,9 @@ def prepare(self):
if self.benchmark:
self.benchmark.prepare()

# wait for input files
wait_for_files(self.input, latency_wait=self.dag.workflow.latency_wait)

if not self.is_shadow:
return

@@ -94,7 +94,6 @@ def __init__(
keepgoing=False,
max_jobs_per_second=None,
max_status_checks_per_second=100,
latency_wait=3,
greediness=1.0,
force_use_threads=False,
assume_shared_fs=True,
@@ -169,7 +168,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
)
elif touch:
self._executor = TouchExecutor(
@@ -178,7 +176,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
)
elif cluster or cluster_sync or (drmaa is not None):
if not workflow.immediate_submit:
@@ -191,7 +188,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
cores=local_cores,
keepincomplete=keepincomplete,
keepmetadata=keepmetadata,
@@ -219,7 +215,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
assume_shared_fs=assume_shared_fs,
keepincomplete=keepincomplete,
keepmetadata=keepmetadata,
@@ -240,7 +235,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
cluster_config=cluster_config,
assume_shared_fs=assume_shared_fs,
max_status_checks_per_second=max_status_checks_per_second,
@@ -255,7 +249,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
cores=local_cores,
keepincomplete=keepincomplete,
keepmetadata=keepmetadata,
@@ -269,7 +262,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
cluster_config=cluster_config,
keepincomplete=keepincomplete,
keepmetadata=keepmetadata,
@@ -283,7 +275,6 @@ def __init__(
quiet=quiet,
printshellcmds=printshellcmds,
use_threads=use_threads,
latency_wait=latency_wait,
cores=local_cores,
keepincomplete=keepincomplete,
keepmetadata=keepmetadata,
@@ -300,7 +291,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
keepincomplete=keepincomplete,
keepmetadata=keepmetadata,
)
@@ -312,7 +302,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
cores=local_cores,
)

@@ -327,7 +316,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
preemption_default=preemption_default,
preemptible_rules=preemptible_rules,
)
@@ -339,7 +327,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
cores=local_cores,
keepincomplete=keepincomplete,
)
@@ -351,7 +338,6 @@ def __init__(
printreason=printreason,
quiet=quiet,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
tes_url=tes,
container_image=container_image,
)
@@ -365,7 +351,6 @@ def __init__(
quiet=quiet,
printshellcmds=printshellcmds,
use_threads=use_threads,
latency_wait=latency_wait,
cores=cores,
keepincomplete=keepincomplete,
keepmetadata=keepmetadata,
@@ -149,6 +149,7 @@ def __init__(
max_threads=None,
all_temp=False,
local_groupid="local",
latency_wait=3,
):
"""
Create the controller.
@@ -234,6 +235,7 @@ def __init__(
self.all_temp = all_temp
self.scheduler = None
self.local_groupid = local_groupid
self.latency_wait = latency_wait

_globals = globals()
_globals["workflow"] = self
@@ -599,7 +601,6 @@ def execute(
delete_all_output=False,
delete_temp_output=False,
detailed_summary=False,
latency_wait=3,
wait_for_files=None,
nolock=False,
unlock=False,
@@ -701,7 +702,9 @@ def files(items):

if wait_for_files is not None:
try:
snakemake.io.wait_for_files(wait_for_files, latency_wait=latency_wait)
snakemake.io.wait_for_files(
wait_for_files, latency_wait=self.latency_wait
)
except IOError as e:
logger.error(str(e))
return False
@@ -1024,7 +1027,6 @@ def files(items):
container_image=container_image,
printreason=printreason,
printshellcmds=printshellcmds,
latency_wait=latency_wait,
greediness=greediness,
force_use_threads=force_use_threads,
assume_shared_fs=self.assume_shared_fs,
@@ -38,7 +38,7 @@ dependencies:
- ratelimiter
- configargparse
- appdirs
- python-irodsclient
- python-irodsclient <1.1.2 # bug in 1.1.2 leads to KeyError
- cwltool
- jsonschema
- pandas

0 comments on commit cab2adb

Please sign in to comment.