You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
When concurrent calls are made to the link method, some of them will occasionally fail. This does not happen if the calls are made sequentially, which indicates a race condition in the server.
Error message:
Exception: {"apiVersion":"v1","warning":"","error":"File { path : \"opencga_stress_test_0/\" } from study { id : 5 } already exists.","queryOptions":{"metadata":true,"skipCount":true,"skip":0,"limit":1000},"response":[{"id":"","time":0,"dbTime":-1,"numResults":-1,"numTotalResults":-1,"warningMsg":"Future errors will ONLY be shown in the QueryResponse body","errorMsg":"DEPRECATED: org.opencb.opencga.catalog.exceptions.CatalogDBException: File { path : \"opencga_stress_test_0/\" } from study { id : 5 } already exists.","resultType":"","result":[]}]}
Method invocation that originates the error:
catalog_file = self.pycga.files.link(
study=self.study_id, path=par_dir_path, uri=file_abs_path,
parents=True, # needed to crete parent directories
createFolder=False, # otherwise linking extra files under same dir fails because parents=True
).get()[0]
It seems that OpenCGA is trying to create parent directories, even when createFolder=False.
This is the stress test that I am using to observe the problem:
def strees_test_worker(queue, results, connector):
index, file_path = queue.get()
try:
results[index] = connector.file_register_with_metadata(
file_path, ['LP1234', 'LP1235'],
run_id=2, delivery_ids=['1234', '4567'],
delivery_format_version=DeliveryFormatVersion.V4)
except Exception as e:
results[index] = e
queue.task_done()
def stress_opencga(connector, dir_path):
file_amount = 100
# create files:
file_paths = [os.path.join(dir_path, 'temp_file{}.txt'.format(i)) for i in range(file_amount)]
for file_path in file_paths:
open(file_path, 'w+').close()
# register all those files:
queue = Queue(maxsize=0)
results = [None] * file_amount
for i in range(file_amount):
thread = threading.Thread(target=strees_test_worker,
kwargs=dict(queue=queue,
results=results,
connector=connector))
thread.setDaemon(True)
thread.start()
for i in range(file_amount):
queue.put((i, file_paths[i]))
queue.join()
# process results:
for result in results:
if isinstance(result, BaseException):
raise result
@pytest.mark.parametrize('use_retries', [True, False])
def test_stress(connector, use_retries):
"""Register a large number of files pseudo-concurrently"""
if use_retries:
connector.pycga.configuration.retry = dict(
max_attempts=10,
min_retry_seconds=1,
max_retry_seconds=2
) # enable retries (disabled by default)
# look for a directory name that does NOT exist in catalog:
dir_initial_name = 'opencga_stress_test'
dir_name = dir_initial_name
n = 0
while connector.pycga.files.search(connector.study_id, path=dir_name+'/').get():
dir_name = "{}_{}".format(dir_initial_name, n)
n += 1
dir_path = os.path.join('/genomes/', dir_name)
directory_existed = os.path.exists(dir_path)
if directory_existed:
if not os.path.isdir(dir_path):
raise Exception("Cant perform test - path exists and is not a directory: " + dir_path)
else:
os.makedirs(dir_path)
try:
stress_opencga(connector, dir_path)
finally:
if not directory_existed:
shutil.rmtree(dir_path)
The text was updated successfully, but these errors were encountered:
When concurrent calls are made to the link method, some of them will occasionally fail. This does not happen if the calls are made sequentially, which indicates a race condition in the server.
Error message:
Method invocation that originates the error:
It seems that OpenCGA is trying to create parent directories, even when
createFolder=False
.This is the stress test that I am using to observe the problem:
The text was updated successfully, but these errors were encountered: