Skip to content

Commit

Permalink
Testing: improve download client test coverage; rucio#2311
Browse files Browse the repository at this point in the history
- check the status of different results to ensure that files are
  correctly overwritten or not
- check the resulting path with/without no_subdir set
- add test for downloading from an archive
- implicitly test XRD download because it is used in the archive test
- add tests for downloading multiple files via a wildcard
- implicitly test some upload logic
  • Loading branch information
rcarpa committed Feb 16, 2021
1 parent 672a1eb commit f6a17a5
Showing 1 changed file with 205 additions and 30 deletions.
235 changes: 205 additions & 30 deletions lib/rucio/tests/test_download.py
Expand Up @@ -20,15 +20,19 @@
# - Patrick Austin <patrick.austin@stfc.ac.uk>, 2020
# - Benedikt Ziemons <benedikt.ziemons@cern.ch>, 2020
# - Thomas Beermann <thomas.beermann@cern.ch>, 2021
# - Radu Carpa <radu.carpa@cern.ch>, 2021
#
# PY3K COMPATIBLE

import logging
import os.path
import shutil
import unittest
from tempfile import TemporaryDirectory
from zipfile import ZipFile

import pytest

from rucio.client.client import Client
from rucio.client.didclient import DIDClient
from rucio.client.downloadclient import DownloadClient
from rucio.client.uploadclient import UploadClient
from rucio.common.config import config_get, config_get_bool
Expand All @@ -37,7 +41,6 @@


class TestDownloadClient(unittest.TestCase):

def setUp(self):
if config_get_bool('common', 'multi_vo', raise_exception=False, default=False):
self.vo = {'vo': config_get('client', 'vo', raise_exception=False, default='tst')}
Expand All @@ -48,40 +51,212 @@ def setUp(self):
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.DEBUG)
self.client = Client()
self.did_client = DIDClient()
self.upload_client = UploadClient(_client=self.client, logger=logger.log)
self.download_client = DownloadClient(client=self.client, logger=logger.log)

self.file_path = file_generator()
self.scope = 'mock'
self.name = os.path.basename(self.file_path)
self.rse = 'MOCK4'
self.guid = generate_uuid()

item = {'path': self.file_path,
'rse': self.rse,
'did_scope': self.scope,
'did_name': self.name,
'guid': self.guid}
def _upoad_test_file(self, rse, scope, name, path=None):
item = {
'path': path if path else file_generator(),
'rse': rse,
'did_scope': scope,
'did_name': name,
'guid': generate_uuid(),
}
assert self.upload_client.upload([item]) == 0
return item

@staticmethod
def _check_download_result(actual_result, expected_result):
assert len(expected_result) == len(actual_result)
expected_result = sorted(expected_result, key=lambda x: x['did'])
actual_result = sorted(actual_result, key=lambda x: x['did'])
for i, expected in enumerate(expected_result):
for param_name, expected_value in expected.items():
assert param_name and actual_result[i][param_name] == expected[param_name]

def test_download_without_base_dir(self):
rse = 'MOCK4'
scope = 'mock'
item = self._upoad_test_file(rse, scope, 'testDownloadNoBasedir' + generate_uuid())
did = '%s:%s' % (scope, item['did_name'])
try:
# download to the default location, i.e. to ./
result = self.download_client.download_dids([{'did': did}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': did,
'clientState': 'DONE',
}
],
)

# re-downloading the same file again should not overwrite it
result = self.download_client.download_dids([{'did': did}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': did,
'clientState': 'ALREADY_DONE',
}
],
)
finally:
shutil.rmtree(scope)

def test_download_multiple(self):
rse = 'MOCK4'
scope = 'mock'
base_name = 'testDownloadItem' + generate_uuid()
item000 = self._upoad_test_file(rse, scope, base_name + '.000')
item001 = self._upoad_test_file(rse, scope, base_name + '.001')
item100 = self._upoad_test_file(rse, scope, base_name + '.100')

with TemporaryDirectory() as tmp_dir:
# Download specific DID
result = self.download_client.download_dids([{'did': '%s:%s' % (scope, item000['did_name']), 'base_dir': tmp_dir}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': '%s:%s' % (scope, item000['did_name']),
'clientState': 'DONE',
}
],
)

# Download multiple files with wildcard. One file already exists on the file system. Will not be re-downloaded.
result = self.download_client.download_dids([{'did': '%s:%s.0*' % (scope, base_name), 'base_dir': tmp_dir}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': '%s:%s' % (scope, item000['did_name']),
'clientState': 'ALREADY_DONE',
},
{
'did': '%s:%s.001' % (scope, item001['did_name']),
'clientState': 'DONE',
},
],
)

# Download with filter
result = self.download_client.download_dids([{'filters': {'guid': item000['guid'], 'scope': scope}, 'base_dir': tmp_dir}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': '%s:%s' % (scope, item000['did_name']),
}
],
)

def tearDown(self):
shutil.rmtree('mock')
# Download with wildcard and name
result = self.download_client.download_dids([{'did': '%s:*' % scope, 'filters': {'guid': item100['guid']}, 'base_dir': tmp_dir}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': '%s:%s' % (scope, item100['did_name']),
'clientState': 'DONE',
}
],
)

def test_download_item(self):
""" DOWNLOAD (CLIENT): Download DIDs """
# Don't create subdirectories by scope
result = self.download_client.download_dids([{'did': '%s:%s.*' % (scope, base_name), 'base_dir': tmp_dir, 'no_subdir': True}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': '%s:%s' % (scope, item000['did_name']),
'clientState': 'DONE',
'dest_file_paths': ['%s/%s' % (tmp_dir, item000['did_name'])],
},
{
'did': '%s:%s' % (scope, item001['did_name']),
'clientState': 'DONE',
'dest_file_paths': ['%s/%s' % (tmp_dir, item001['did_name'])],
},
{
'did': '%s:%s' % (scope, item100['did_name']),
'clientState': 'DONE',
'dest_file_paths': ['%s/%s' % (tmp_dir, item100['did_name'])],
},
],
)

# Download specific DID
result = self.download_client.download_dids([{'did': '%s:%s' % (self.scope, self.name)}])
assert result
# Re-download file existing on the file system with no-subdir set. It must be overwritten.
result = self.download_client.download_dids([{'did': '%s:%s' % (scope, item100['did_name']), 'base_dir': tmp_dir, 'no_subdir': True}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': '%s:%s' % (scope, item100['did_name']),
'clientState': 'ALREADY_DONE', # TODO: fix #4323 and change this to 'DONE' if decided to overwrite
'dest_file_paths': ['%s/%s' % (tmp_dir, item100['did_name'])],
}
],
)

# Download with wildcard
result = self.download_client.download_dids([{'did': '%s:%s' % (self.scope, self.name[:-2] + '*')}])
assert result
@pytest.mark.xfail(reason='XRD1 must be initialized https://github.com/rucio/rucio/pull/4165/')
def test_download_from_archive_on_xrd(self):
scope = 'test'
rse = 'XRD1'
base_name = 'testDownloadArchive' + generate_uuid()
with TemporaryDirectory() as tmp_dir:
# Create a zip archive with two files and upload it
name000 = base_name + '.000'
data000 = '000'
name001 = base_name + '.001'
data001 = '001'
zip_name = base_name + '.zip'
zip_path = '%s/%s' % (tmp_dir, zip_name)
with ZipFile(zip_path, 'w') as myzip:
myzip.writestr(name000, data=data000)
myzip.writestr(name001, data=data001)
self._upoad_test_file(rse, scope, zip_name, path=zip_path)
self.did_client.add_files_to_archive(
scope,
zip_name,
[
{'scope': scope, 'name': name000, 'bytes': len(data000), 'type': 'FILE', 'meta': {'guid': str(generate_uuid())}},
{'scope': scope, 'name': name001, 'bytes': len(data001), 'type': 'FILE', 'meta': {'guid': str(generate_uuid())}},
],
)

# Download with filter
result = self.download_client.download_dids([{'filters': {'guid': self.guid, 'scope': self.scope}}])
assert result
# Download one file from the archive
result = self.download_client.download_dids([{'did': '%s:%s' % (scope, name000), 'base_dir': tmp_dir, 'ignore_checksum': True}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': '%s:%s' % (scope, name000),
'clientState': 'DONE',
},
],
)
with open('%s/%s/%s' % (tmp_dir, scope, name000), 'r') as file:
assert file.read() == data000

# Download with wildcard and name
result = self.download_client.download_dids([{'did': '%s:%s' % (self.scope, '*'), 'filters': {'guid': self.guid}}])
assert result
# Download both files from the archive
result = self.download_client.download_dids([{'did': '%s:%s.00*' % (scope, base_name), 'base_dir': tmp_dir, 'ignore_checksum': True}])
self._check_download_result(
actual_result=result,
expected_result=[
{
'did': '%s:%s' % (scope, name000),
'clientState': 'ALREADY_DONE',
},
{
'did': '%s:%s' % (scope, name001),
'clientState': 'DONE',
},
],
)
with open('%s/%s/%s' % (tmp_dir, scope, name001), 'r') as file:
assert file.read() == data001

0 comments on commit f6a17a5

Please sign in to comment.