From 6436eb82a1119c61d445a0ab19a7efe10988bed5 Mon Sep 17 00:00:00 2001 From: Radu Carpa Date: Mon, 15 Feb 2021 09:41:28 +0100 Subject: [PATCH] Testing: improve download client test coverage; #2311 - check the status of different results to ensure that files are correctly overwritten or not - check the resulting path with/without no_subdir set - add test for downloading from an archive - implicitly test XRD download because it is used in the archive test - add tests for downloading multiple files via a wildcard - implicitly test some upload logic - some verifications that trace_copy_out works as expected - ensure that checksum validation is enforced (or ignored if option set) --- lib/rucio/tests/test_download.py | 297 +++++++++++++++++++++++++++---- 1 file changed, 267 insertions(+), 30 deletions(-) diff --git a/lib/rucio/tests/test_download.py b/lib/rucio/tests/test_download.py index 405a32dea0..6bee20078b 100644 --- a/lib/rucio/tests/test_download.py +++ b/lib/rucio/tests/test_download.py @@ -20,24 +20,30 @@ # - Patrick Austin , 2020 # - Benedikt Ziemons , 2020 # - Thomas Beermann , 2021 +# - Radu Carpa , 2021 # # PY3K COMPATIBLE - import logging -import os.path import shutil import unittest +from tempfile import TemporaryDirectory +from zipfile import ZipFile + +import pytest from rucio.client.client import Client +from rucio.client.didclient import DIDClient from rucio.client.downloadclient import DownloadClient from rucio.client.uploadclient import UploadClient from rucio.common.config import config_get, config_get_bool +from rucio.common.exception import InputValidationError, NoFilesDownloaded from rucio.common.utils import generate_uuid +from rucio.rse import rsemanager as rsemgr +from rucio.rse.protocols.posix import Default as PosixProtocol from rucio.tests.common import file_generator class TestDownloadClient(unittest.TestCase): - def setUp(self): if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): self.vo = {'vo': config_get('client', 'vo', raise_exception=False, default='tst')} @@ -48,40 +54,271 @@ def setUp(self): logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.DEBUG) self.client = Client() + self.did_client = DIDClient() self.upload_client = UploadClient(_client=self.client, logger=logger.log) self.download_client = DownloadClient(client=self.client, logger=logger.log) - self.file_path = file_generator() - self.scope = 'mock' - self.name = os.path.basename(self.file_path) - self.rse = 'MOCK4' - self.guid = generate_uuid() - - item = {'path': self.file_path, - 'rse': self.rse, - 'did_scope': self.scope, - 'did_name': self.name, - 'guid': self.guid} + def _upoad_test_file(self, rse, scope, name, path=None): + item = { + 'path': path if path else file_generator(), + 'rse': rse, + 'did_scope': scope, + 'did_name': name, + 'guid': generate_uuid(), + } assert self.upload_client.upload([item]) == 0 + return item + + @staticmethod + def _check_download_result(actual_result, expected_result): + assert len(expected_result) == len(actual_result) + expected_result = sorted(expected_result, key=lambda x: x['did']) + actual_result = sorted(actual_result, key=lambda x: x['did']) + for i, expected in enumerate(expected_result): + for param_name, expected_value in expected.items(): + assert param_name and actual_result[i][param_name] == expected[param_name] + + def test_download_without_base_dir(self): + rse = 'MOCK4' + scope = 'mock' + item = self._upoad_test_file(rse, scope, 'testDownloadNoBasedir' + generate_uuid()) + did = '%s:%s' % (scope, item['did_name']) + try: + # download to the default location, i.e. to ./ + result = self.download_client.download_dids([{'did': did}]) + self._check_download_result( + actual_result=result, + expected_result=[ + { + 'did': did, + 'clientState': 'DONE', + } + ], + ) + + # re-downloading the same file again should not overwrite it + result = self.download_client.download_dids([{'did': did}]) + self._check_download_result( + actual_result=result, + expected_result=[ + { + 'did': did, + 'clientState': 'ALREADY_DONE', + } + ], + ) + finally: + shutil.rmtree(scope) + + def test_download_multiple(self): + rse = 'MOCK4' + scope = 'mock' + base_name = 'testDownloadItem' + generate_uuid() + item000 = self._upoad_test_file(rse, scope, base_name + '.000') + item001 = self._upoad_test_file(rse, scope, base_name + '.001') + item100 = self._upoad_test_file(rse, scope, base_name + '.100') + + with TemporaryDirectory() as tmp_dir: + # Download specific DID + result = self.download_client.download_dids([{'did': '%s:%s' % (scope, item000['did_name']), 'base_dir': tmp_dir}]) + self._check_download_result( + actual_result=result, + expected_result=[ + { + 'did': '%s:%s' % (scope, item000['did_name']), + 'clientState': 'DONE', + } + ], + ) + + # Download multiple files with wildcard. One file already exists on the file system. Will not be re-downloaded. + result = self.download_client.download_dids([{'did': '%s:%s.0*' % (scope, base_name), 'base_dir': tmp_dir}]) + self._check_download_result( + actual_result=result, + expected_result=[ + { + 'did': '%s:%s' % (scope, item000['did_name']), + 'clientState': 'ALREADY_DONE', + }, + { + 'did': '%s:%s' % (scope, item001['did_name']), + 'clientState': 'DONE', + }, + ], + ) + + # Download with filter + result = self.download_client.download_dids([{'filters': {'guid': item000['guid'], 'scope': scope}, 'base_dir': tmp_dir}]) + self._check_download_result( + actual_result=result, + expected_result=[ + { + 'did': '%s:%s' % (scope, item000['did_name']), + } + ], + ) + + # Download with wildcard and name + result = self.download_client.download_dids([{'did': '%s:*' % scope, 'filters': {'guid': item100['guid']}, 'base_dir': tmp_dir}]) + self._check_download_result( + actual_result=result, + expected_result=[ + { + 'did': '%s:%s' % (scope, item100['did_name']), + 'clientState': 'DONE', + } + ], + ) + + # Don't create subdirectories by scope + result = self.download_client.download_dids([{'did': '%s:%s.*' % (scope, base_name), 'base_dir': tmp_dir, 'no_subdir': True}]) + self._check_download_result( + actual_result=result, + expected_result=[ + { + 'did': '%s:%s' % (scope, item000['did_name']), + 'clientState': 'DONE', + 'dest_file_paths': ['%s/%s' % (tmp_dir, item000['did_name'])], + }, + { + 'did': '%s:%s' % (scope, item001['did_name']), + 'clientState': 'DONE', + 'dest_file_paths': ['%s/%s' % (tmp_dir, item001['did_name'])], + }, + { + 'did': '%s:%s' % (scope, item100['did_name']), + 'clientState': 'DONE', + 'dest_file_paths': ['%s/%s' % (tmp_dir, item100['did_name'])], + }, + ], + ) + + # Re-download file existing on the file system with no-subdir set. It must be overwritten. + result = self.download_client.download_dids([{'did': '%s:%s' % (scope, item100['did_name']), 'base_dir': tmp_dir, 'no_subdir': True}]) + self._check_download_result( + actual_result=result, + expected_result=[ + { + 'did': '%s:%s' % (scope, item100['did_name']), + 'clientState': 'ALREADY_DONE', # TODO: fix #4323 and change this to 'DONE' if decided to overwrite + 'dest_file_paths': ['%s/%s' % (tmp_dir, item100['did_name'])], + } + ], + ) + + @pytest.mark.xfail(reason='XRD1 must be initialized https://github.com/rucio/rucio/pull/4165/') + def test_download_from_archive_on_xrd(self): + scope = 'test' + rse = 'XRD1' + base_name = 'testDownloadArchive' + generate_uuid() + with TemporaryDirectory() as tmp_dir: + # Create a zip archive with two files and upload it + name000 = base_name + '.000' + data000 = '000' + name001 = base_name + '.001' + data001 = '001' + zip_name = base_name + '.zip' + zip_path = '%s/%s' % (tmp_dir, zip_name) + with ZipFile(zip_path, 'w') as myzip: + myzip.writestr(name000, data=data000) + myzip.writestr(name001, data=data001) + self._upoad_test_file(rse, scope, zip_name, path=zip_path) + self.did_client.add_files_to_archive( + scope, + zip_name, + [ + {'scope': scope, 'name': name000, 'bytes': len(data000), 'type': 'FILE', 'meta': {'guid': str(generate_uuid())}}, + {'scope': scope, 'name': name001, 'bytes': len(data001), 'type': 'FILE', 'meta': {'guid': str(generate_uuid())}}, + ], + ) + + # Download one file from the archive + result = self.download_client.download_dids([{'did': '%s:%s' % (scope, name000), 'base_dir': tmp_dir, 'ignore_checksum': True}]) + self._check_download_result( + actual_result=result, + expected_result=[ + { + 'did': '%s:%s' % (scope, name000), + 'clientState': 'DONE', + }, + ], + ) + with open('%s/%s/%s' % (tmp_dir, scope, name000), 'r') as file: + assert file.read() == data000 + + # Download both files from the archive + result = self.download_client.download_dids([{'did': '%s:%s.00*' % (scope, base_name), 'base_dir': tmp_dir, 'ignore_checksum': True}]) + self._check_download_result( + actual_result=result, + expected_result=[ + { + 'did': '%s:%s' % (scope, name000), + 'clientState': 'ALREADY_DONE', + }, + { + 'did': '%s:%s' % (scope, name001), + 'clientState': 'DONE', + }, + ], + ) + with open('%s/%s/%s' % (tmp_dir, scope, name001), 'r') as file: + assert file.read() == data001 + + def test_trace_copy_out_and_checksum_validation(self): + rse = 'MOCK4' + scope = 'mock' + name = 'testDownloadTraces' + generate_uuid() + self._upoad_test_file(rse, scope, name) + + with TemporaryDirectory() as tmp_dir: + # Try downloading non-existing did + traces = [] + with pytest.raises(NoFilesDownloaded): + self.download_client.download_dids([{'did': 'some:randomNonExistingDid', 'base_dir': tmp_dir}], traces_copy_out=traces) + assert len(traces) == 1 and traces[0]['clientState'] == 'FILE_NOT_FOUND' + + # Download specific DID + traces = [] + self.download_client.download_dids([{'did': '%s:%s' % (scope, name), 'base_dir': tmp_dir}], traces_copy_out=traces) + assert len(traces) == 1 and traces[0]['clientState'] == 'DONE' + + # Download same DID again + traces = [] + result = self.download_client.download_dids([{'did': '%s:%s' % (scope, name), 'base_dir': tmp_dir}], traces_copy_out=traces) + assert len(traces) == 1 and traces[0]['clientState'] == 'ALREADY_DONE' - def tearDown(self): - shutil.rmtree('mock') + pfn = result[0]['sources'][0]['pfn'] - def test_download_item(self): - """ DOWNLOAD (CLIENT): Download DIDs """ + # Switch to a new empty directory + with TemporaryDirectory() as tmp_dir: + # Wildcards in did name are not allowed on pfn downloads + traces = [] + with pytest.raises(InputValidationError): + self.download_client.download_pfns([{'did': '%s:*' % scope, 'pfn': pfn, 'rse': rse, 'base_dir': tmp_dir}], traces_copy_out=traces) + assert not traces - # Download specific DID - result = self.download_client.download_dids([{'did': '%s:%s' % (self.scope, self.name)}]) - assert result + # Same pfn, but without wildcard in the did should work + traces = [] + self.download_client.download_pfns([{'did': '%s:%s' % (scope, name), 'pfn': pfn, 'rse': rse, 'base_dir': tmp_dir}], traces_copy_out=traces) + assert len(traces) == 1 and traces[0]['clientState'] == 'DONE' - # Download with wildcard - result = self.download_client.download_dids([{'did': '%s:%s' % (self.scope, self.name[:-2] + '*')}]) - assert result + # Switch to a new empty directory + with TemporaryDirectory() as tmp_dir: + # Simulate checksum corruption by changing the source file. We rely on the particularity + # that the MOCK4 rse uses the posix protocol: files are stored on the local file system + protocol = rsemgr.create_protocol(rsemgr.get_rse_info(rse, vo=self.client.vo), operation='read') + assert isinstance(protocol, PosixProtocol) + mock_rse_local_path = protocol.pfn2path(pfn) + with open(mock_rse_local_path, 'w') as f: + f.write('some completely other data') - # Download with filter - result = self.download_client.download_dids([{'filters': {'guid': self.guid, 'scope': self.scope}}]) - assert result + # Download fails checksum validation + traces = [] + with pytest.raises(NoFilesDownloaded): + self.download_client.download_dids([{'did': '%s:%s' % (scope, name), 'base_dir': tmp_dir}], traces_copy_out=traces) + assert len(traces) == 1 and traces[0]['clientState'] == 'FAIL_VALIDATE' - # Download with wildcard and name - result = self.download_client.download_dids([{'did': '%s:%s' % (self.scope, '*'), 'filters': {'guid': self.guid}}]) - assert result + # Ignore_checksum set. Download works. + traces = [] + self.download_client.download_dids([{'did': '%s:%s' % (scope, name), 'base_dir': tmp_dir, 'ignore_checksum': True}], traces_copy_out=traces) + assert len(traces) == 1 and traces[0]['clientState'] == 'DONE'