Skip to content

Commit

Permalink
Testing: improve download client test coverage; rucio#2311
Browse files Browse the repository at this point in the history
- check the status of different results to ensure that files are
  correctly overwritten or not
- check the resulting path with/without no_subdir set
- add test for downloading from an archive
- implicitly test XRD download because it is used in the archive test
- add tests for downloading multiple files via a wildcard
- implicitly test some upload logic
- some verifications that trace_copy_out works as expected
- ensure that checksum validation is enforced (or ignored if option set)
  • Loading branch information
rcarpa committed Feb 17, 2021
1 parent 202f197 commit 6436eb8
Showing 1 changed file with 267 additions and 30 deletions.
297 changes: 267 additions & 30 deletions lib/rucio/tests/test_download.py
Expand Up @@ -20,24 +20,30 @@
# - Patrick Austin <patrick.austin@stfc.ac.uk>, 2020
# - Benedikt Ziemons <benedikt.ziemons@cern.ch>, 2020
# - Thomas Beermann <thomas.beermann@cern.ch>, 2021
# - Radu Carpa <radu.carpa@cern.ch>, 2021
#
# PY3K COMPATIBLE

import logging
import os.path
import shutil
import unittest
from tempfile import TemporaryDirectory
from zipfile import ZipFile

import pytest

from rucio.client.client import Client
from rucio.client.didclient import DIDClient
from rucio.client.downloadclient import DownloadClient
from rucio.client.uploadclient import UploadClient
from rucio.common.config import config_get, config_get_bool
from rucio.common.exception import InputValidationError, NoFilesDownloaded
from rucio.common.utils import generate_uuid
from rucio.rse import rsemanager as rsemgr
from rucio.rse.protocols.posix import Default as PosixProtocol
from rucio.tests.common import file_generator


class TestDownloadClient(unittest.TestCase):

def setUp(self):
if config_get_bool('common', 'multi_vo', raise_exception=False, default=False):
self.vo = {'vo': config_get('client', 'vo', raise_exception=False, default='tst')}
Expand All @@ -48,40 +54,271 @@ def setUp(self):
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.DEBUG)
self.client = Client()
self.did_client = DIDClient()
self.upload_client = UploadClient(_client=self.client, logger=logger.log)
self.download_client = DownloadClient(client=self.client, logger=logger.log)

self.file_path = file_generator()
self.scope = 'mock'
self.name = os.path.basename(self.file_path)
self.rse = 'MOCK4'
self.guid = generate_uuid()

item = {'path': self.file_path,
'rse': self.rse,
'did_scope': self.scope,
'did_name': self.name,
'guid': self.guid}
def _upoad_test_file(self, rse, scope, name, path=None):
item = {
'path': path if path else file_generator(),
'rse': rse,
'did_scope': scope,
'did_name': name,
'guid': generate_uuid(),
}
assert self.upload_client.upload([item]) == 0
return item

@staticmethod
def _check_download_result(actual_result, expected_result):
assert len(expected_result) == len(actual_result)
expected_result = sorted(expected_result, key=lambda x: x['did'])
actual_result = sorted(actual_result, key=lambda x: x['did'])
for i, expected in enumerate(expected_result):
for param_name, expected_value in expected.items():
assert param_name and actual_result[i][param_name] == expected[param_name]

def test_download_without_base_dir(self):
rse = 'MOCK4'
scope = 'mock'
item = self._upoad_test_file(rse, scope, 'testDownloadNoBasedir' + generate_uuid())
did = '%s:%s' % (scope, item['did_name'])
try:
# download to the default location, i.e. to ./
result = self.download_client.download_dids([{'did': did}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': did,
'clientState': 'DONE',
}
],
)

# re-downloading the same file again should not overwrite it
result = self.download_client.download_dids([{'did': did}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': did,
'clientState': 'ALREADY_DONE',
}
],
)
finally:
shutil.rmtree(scope)

def test_download_multiple(self):
rse = 'MOCK4'
scope = 'mock'
base_name = 'testDownloadItem' + generate_uuid()
item000 = self._upoad_test_file(rse, scope, base_name + '.000')
item001 = self._upoad_test_file(rse, scope, base_name + '.001')
item100 = self._upoad_test_file(rse, scope, base_name + '.100')

with TemporaryDirectory() as tmp_dir:
# Download specific DID
result = self.download_client.download_dids([{'did': '%s:%s' % (scope, item000['did_name']), 'base_dir': tmp_dir}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': '%s:%s' % (scope, item000['did_name']),
'clientState': 'DONE',
}
],
)

# Download multiple files with wildcard. One file already exists on the file system. Will not be re-downloaded.
result = self.download_client.download_dids([{'did': '%s:%s.0*' % (scope, base_name), 'base_dir': tmp_dir}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': '%s:%s' % (scope, item000['did_name']),
'clientState': 'ALREADY_DONE',
},
{
'did': '%s:%s' % (scope, item001['did_name']),
'clientState': 'DONE',
},
],
)

# Download with filter
result = self.download_client.download_dids([{'filters': {'guid': item000['guid'], 'scope': scope}, 'base_dir': tmp_dir}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': '%s:%s' % (scope, item000['did_name']),
}
],
)

# Download with wildcard and name
result = self.download_client.download_dids([{'did': '%s:*' % scope, 'filters': {'guid': item100['guid']}, 'base_dir': tmp_dir}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': '%s:%s' % (scope, item100['did_name']),
'clientState': 'DONE',
}
],
)

# Don't create subdirectories by scope
result = self.download_client.download_dids([{'did': '%s:%s.*' % (scope, base_name), 'base_dir': tmp_dir, 'no_subdir': True}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': '%s:%s' % (scope, item000['did_name']),
'clientState': 'DONE',
'dest_file_paths': ['%s/%s' % (tmp_dir, item000['did_name'])],
},
{
'did': '%s:%s' % (scope, item001['did_name']),
'clientState': 'DONE',
'dest_file_paths': ['%s/%s' % (tmp_dir, item001['did_name'])],
},
{
'did': '%s:%s' % (scope, item100['did_name']),
'clientState': 'DONE',
'dest_file_paths': ['%s/%s' % (tmp_dir, item100['did_name'])],
},
],
)

# Re-download file existing on the file system with no-subdir set. It must be overwritten.
result = self.download_client.download_dids([{'did': '%s:%s' % (scope, item100['did_name']), 'base_dir': tmp_dir, 'no_subdir': True}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': '%s:%s' % (scope, item100['did_name']),
'clientState': 'ALREADY_DONE', # TODO: fix #4323 and change this to 'DONE' if decided to overwrite
'dest_file_paths': ['%s/%s' % (tmp_dir, item100['did_name'])],
}
],
)

@pytest.mark.xfail(reason='XRD1 must be initialized https://github.com/rucio/rucio/pull/4165/')
def test_download_from_archive_on_xrd(self):
scope = 'test'
rse = 'XRD1'
base_name = 'testDownloadArchive' + generate_uuid()
with TemporaryDirectory() as tmp_dir:
# Create a zip archive with two files and upload it
name000 = base_name + '.000'
data000 = '000'
name001 = base_name + '.001'
data001 = '001'
zip_name = base_name + '.zip'
zip_path = '%s/%s' % (tmp_dir, zip_name)
with ZipFile(zip_path, 'w') as myzip:
myzip.writestr(name000, data=data000)
myzip.writestr(name001, data=data001)
self._upoad_test_file(rse, scope, zip_name, path=zip_path)
self.did_client.add_files_to_archive(
scope,
zip_name,
[
{'scope': scope, 'name': name000, 'bytes': len(data000), 'type': 'FILE', 'meta': {'guid': str(generate_uuid())}},
{'scope': scope, 'name': name001, 'bytes': len(data001), 'type': 'FILE', 'meta': {'guid': str(generate_uuid())}},
],
)

# Download one file from the archive
result = self.download_client.download_dids([{'did': '%s:%s' % (scope, name000), 'base_dir': tmp_dir, 'ignore_checksum': True}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': '%s:%s' % (scope, name000),
'clientState': 'DONE',
},
],
)
with open('%s/%s/%s' % (tmp_dir, scope, name000), 'r') as file:
assert file.read() == data000

# Download both files from the archive
result = self.download_client.download_dids([{'did': '%s:%s.00*' % (scope, base_name), 'base_dir': tmp_dir, 'ignore_checksum': True}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': '%s:%s' % (scope, name000),
'clientState': 'ALREADY_DONE',
},
{
'did': '%s:%s' % (scope, name001),
'clientState': 'DONE',
},
],
)
with open('%s/%s/%s' % (tmp_dir, scope, name001), 'r') as file:
assert file.read() == data001

def test_trace_copy_out_and_checksum_validation(self):
rse = 'MOCK4'
scope = 'mock'
name = 'testDownloadTraces' + generate_uuid()
self._upoad_test_file(rse, scope, name)

with TemporaryDirectory() as tmp_dir:
# Try downloading non-existing did
traces = []
with pytest.raises(NoFilesDownloaded):
self.download_client.download_dids([{'did': 'some:randomNonExistingDid', 'base_dir': tmp_dir}], traces_copy_out=traces)
assert len(traces) == 1 and traces[0]['clientState'] == 'FILE_NOT_FOUND'

# Download specific DID
traces = []
self.download_client.download_dids([{'did': '%s:%s' % (scope, name), 'base_dir': tmp_dir}], traces_copy_out=traces)
assert len(traces) == 1 and traces[0]['clientState'] == 'DONE'

# Download same DID again
traces = []
result = self.download_client.download_dids([{'did': '%s:%s' % (scope, name), 'base_dir': tmp_dir}], traces_copy_out=traces)
assert len(traces) == 1 and traces[0]['clientState'] == 'ALREADY_DONE'

def tearDown(self):
shutil.rmtree('mock')
pfn = result[0]['sources'][0]['pfn']

def test_download_item(self):
""" DOWNLOAD (CLIENT): Download DIDs """
# Switch to a new empty directory
with TemporaryDirectory() as tmp_dir:
# Wildcards in did name are not allowed on pfn downloads
traces = []
with pytest.raises(InputValidationError):
self.download_client.download_pfns([{'did': '%s:*' % scope, 'pfn': pfn, 'rse': rse, 'base_dir': tmp_dir}], traces_copy_out=traces)
assert not traces

# Download specific DID
result = self.download_client.download_dids([{'did': '%s:%s' % (self.scope, self.name)}])
assert result
# Same pfn, but without wildcard in the did should work
traces = []
self.download_client.download_pfns([{'did': '%s:%s' % (scope, name), 'pfn': pfn, 'rse': rse, 'base_dir': tmp_dir}], traces_copy_out=traces)
assert len(traces) == 1 and traces[0]['clientState'] == 'DONE'

# Download with wildcard
result = self.download_client.download_dids([{'did': '%s:%s' % (self.scope, self.name[:-2] + '*')}])
assert result
# Switch to a new empty directory
with TemporaryDirectory() as tmp_dir:
# Simulate checksum corruption by changing the source file. We rely on the particularity
# that the MOCK4 rse uses the posix protocol: files are stored on the local file system
protocol = rsemgr.create_protocol(rsemgr.get_rse_info(rse, vo=self.client.vo), operation='read')
assert isinstance(protocol, PosixProtocol)
mock_rse_local_path = protocol.pfn2path(pfn)
with open(mock_rse_local_path, 'w') as f:
f.write('some completely other data')

# Download with filter
result = self.download_client.download_dids([{'filters': {'guid': self.guid, 'scope': self.scope}}])
assert result
# Download fails checksum validation
traces = []
with pytest.raises(NoFilesDownloaded):
self.download_client.download_dids([{'did': '%s:%s' % (scope, name), 'base_dir': tmp_dir}], traces_copy_out=traces)
assert len(traces) == 1 and traces[0]['clientState'] == 'FAIL_VALIDATE'

# Download with wildcard and name
result = self.download_client.download_dids([{'did': '%s:%s' % (self.scope, '*'), 'filters': {'guid': self.guid}}])
assert result
# Ignore_checksum set. Download works.
traces = []
self.download_client.download_dids([{'did': '%s:%s' % (scope, name), 'base_dir': tmp_dir, 'ignore_checksum': True}], traces_copy_out=traces)
assert len(traces) == 1 and traces[0]['clientState'] == 'DONE'

0 comments on commit 6436eb8

Please sign in to comment.