Skip to content

Commit

Permalink
Core: Add update_rule boost option #4634
Browse files Browse the repository at this point in the history
The timeout between the retries of the transitioning of a rule from `STUCK` to
`REPLICATING` are quite big. This commit introduces the feature `--boost-rule`
to allow almost instant transition between the states.
  • Loading branch information
Joel Dierkes authored and bari12 committed Oct 29, 2021
1 parent 29127b6 commit 2c61ae6
Show file tree
Hide file tree
Showing 6 changed files with 110 additions and 12 deletions.
6 changes: 6 additions & 0 deletions bin/rucio
Expand Up @@ -47,7 +47,10 @@
# - Rakshita Varadarajan <rakshitajps@gmail.com>, 2021
# - Christoph Ames <christoph.ames@physik.uni-muenchen.de>, 2021
# - James Perry <j.perry@epcc.ed.ac.uk>, 2021
# - Petr Vokac <petr.vokac@fjfi.cvut.cz>, 2021
# - David Población Criado <david.poblacion.criado@cern.ch>, 2021
# - zoepap05 <90753392+zoepap05@users.noreply.github.com>, 2021
# - KosKyr <90753277+KosKyr@users.noreply.github.com>, 2021
# - Joel Dierkes <joel.dierkes@cern.ch>, 2021

from __future__ import print_function
Expand Down Expand Up @@ -1348,6 +1351,8 @@ def update_rule(args):
options['priority'] = int(args.priority)
if args.child_rule_id:
options['child_rule_id'] = args.child_rule_id
if args.boost_rule:
options['boost_rule'] = args.boost_rule
client.update_replication_rule(rule_id=args.rule_id, options=options)
print('Updated Rule')
return SUCCESS
Expand Down Expand Up @@ -2355,6 +2360,7 @@ You can filter by account::
update_rule_parser.add_argument('--cancel-requests', dest='cancel_requests', action='store_true', help='Cancel requests when setting rules to stuck.')
update_rule_parser.add_argument('--priority', dest='priority', action='store', help='Priority of the requests of the rule.')
update_rule_parser.add_argument('--child-rule-id', dest='child_rule_id', action='store', help='Child rule id of the rule.')
update_rule_parser.add_argument('--boost-rule', dest='boost_rule', action='store_true', help='Quickens the transition of a rule from STUCK to REPLICATING.')

# The move_rule command
move_rule_parser = subparsers.add_parser('move-rule', help='Move a replication rule to another RSE.')
Expand Down
21 changes: 10 additions & 11 deletions lib/rucio/core/permission/atlas.py
@@ -1,4 +1,5 @@
# Copyright 2016-2020 CERN for the benefit of the ATLAS collaboration.
# -*- coding: utf-8 -*-
# Copyright 2016-2021 CERN
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -13,18 +14,20 @@
# limitations under the License.
#
# Authors:
# - Vincent Garonne <vgaronne@gmail.com>, 2016
# - Vincent Garonne <vincent.garonne@cern.ch>, 2016
# - Martin Barisits <martin.barisits@cern.ch>, 2016-2020
# - Cedric Serfon <cedric.serfon@cern.ch>, 2016-2021
# - Mario Lassnig <mario.lassnig@cern.ch>, 2018-2020
# - Hannes Hansen <hannes.jakob.hansen@cern.ch>, 2018-2019
# - Andrew Lister <andrew.lister@stfc.ac.uk>, 2019
# - Ruturaj Gujar <ruturaj.gujar23@gmail.com>, 2019
# - Eric Vaandering, <ewv@fnal.gov>, 2020
# - Jaroslav Guenther <jaroslav.guenther@cern.ch>, 2019
# - Eli Chadwick <eli.chadwick@stfc.ac.uk>, 2020
# - Patrick Austin <patrick.austin@stfc.ac.uk>, 2020
#
# PY3K COMPATIBLE
# - Eric Vaandering <ewv@fnal.gov>, 2020
# - Dimitrios Christidis <dimitrios.christidis@cern.ch>, 2021
# - David Población Criado <david.poblacion.criado@cern.ch>, 2021
# - Joel Dierkes <joel.dierkes@cern.ch>, 2021

from typing import TYPE_CHECKING

Expand Down Expand Up @@ -524,12 +527,8 @@ def perm_update_rule(issuer, kwargs):
if _is_root(issuer) or has_account_attribute(account=issuer, key='admin'):
return True

# Only admin accounts can change account, state, priority of a rule
if 'account' in kwargs['options'] or\
'state' in kwargs['options'] or\
'priority' in kwargs['options'] or\
'child_rule_id' in kwargs['options'] or\
'meta' in kwargs['options']:
admin_reserved = {'account', 'state', 'priority', 'child_rule_id', 'meta', 'boost_rule'}
if admin_reserved.intersection(kwargs['options'].keys()):
return False # Only priv accounts are allowed to change that

# Country admins are allowed to change the rest.
Expand Down
11 changes: 10 additions & 1 deletion lib/rucio/core/rule.py
Expand Up @@ -35,6 +35,8 @@
# - Radu Carpa <radu.carpa@cern.ch>, 2021
# - Rakshita Varadarajan <rakshitajps@gmail.com>, 2021
# - Rahul Chauhan <omrahulchauhan@gmail.com>, 2021
# - David Población Criado <david.poblacion.criado@cern.ch>, 2021
# - Joel Dierkes <joel.dierkes@cern.ch>, 2021

from __future__ import division

Expand Down Expand Up @@ -1286,7 +1288,7 @@ def update_rule(rule_id, options, session=None):
:raises: RuleNotFound if no Rule can be found, InputValidationError if invalid option is used, ScratchDiskLifetimeConflict if wrong ScratchDiskLifetime is used.
"""

valid_options = ['comment', 'locked', 'lifetime', 'account', 'state', 'activity', 'source_replica_expression', 'cancel_requests', 'priority', 'child_rule_id', 'eol_at', 'meta', 'purge_replicas']
valid_options = ['comment', 'locked', 'lifetime', 'account', 'state', 'activity', 'source_replica_expression', 'cancel_requests', 'priority', 'child_rule_id', 'eol_at', 'meta', 'purge_replicas', 'boost_rule']

for key in options:
if key not in valid_options:
Expand Down Expand Up @@ -1403,6 +1405,13 @@ def update_rule(rule_id, options, session=None):

insert_rule_history(rule=rule, recent=True, longterm=False, session=session)

# `boost_rule` should run after `stuck`, so lets not include it in the loop since the arguments are unordered
if 'boost_rule' in options:
for lock in session.query(models.ReplicaLock).filter_by(rule_id=rule.id, state=LockState.STUCK).all():
lock['updated_at'] -= timedelta(days=1)
rule['updated_at'] -= timedelta(days=1)
insert_rule_history(rule, recent=True, longterm=False, session=session)

except IntegrityError as error:
if match('.*ORA-00001.*', str(error.args[0])) \
or match('.*IntegrityError.*UNIQUE constraint failed.*', str(error.args[0])) \
Expand Down
48 changes: 48 additions & 0 deletions lib/rucio/tests/test_bin_rucio.py
Expand Up @@ -34,6 +34,7 @@
# - Rahul Chauhan <omrahulchauhan@gmail.com>, 2021
# - Simon Fayer <simon.fayer05@imperial.ac.uk>, 2021
# - David Población Criado <david.poblacion.criado@cern.ch>, 2021
# - Paul Millar <paul.millar@desy.de>, 2021
# - Joel Dierkes <joel.dierkes@cern.ch>, 2021

from __future__ import print_function
Expand Down Expand Up @@ -1733,3 +1734,50 @@ def test_update_rule_cancel_requests_args(self):
exitcode, out, err = execute(cmd)
assert '--stuck or --suspend must be specified when running --cancel-requests' in err
assert exitcode != 0

def test_update_rule_boost_rule_arg(self):
"""CLIENT(USER): update a rule with the `--boost_rule` option """
self.account_client.set_local_account_limit('root', self.def_rse, -1)
tmp_file1 = file_generator()
# add files
cmd = 'rucio upload --rse {0} --scope {1} {2}'.format(self.def_rse, self.user, tmp_file1)
print(self.marker + cmd)
exitcode, out, err = execute(cmd)
print(out, err)
# add rse
tmp_rse = rse_name_generator()
cmd = 'rucio-admin rse add {0}'.format(tmp_rse)
print(self.marker + cmd)
exitcode, out, err = execute(cmd)
print(out)
self.account_client.set_local_account_limit('root', tmp_rse, -1)

# add rse atributes
cmd = 'rucio-admin rse set-attribute --rse {0} --key spacetoken --value ATLASDELETERULE'.format(tmp_rse)
print(self.marker + cmd)
exitcode, out, err = execute(cmd)
print(out, err)
# add rules
cmd = "rucio add-rule {0}:{1} 1 'spacetoken=ATLASDELETERULE'".format(self.user, tmp_file1[5:])
print(self.marker + cmd)
exitcode, out, err = execute(cmd)
print(err)
print(out)
# get the rules for the file
cmd = r"rucio list-rules {0}:{1} | grep {0}:{1} | cut -f1 -d\ ".format(self.user, tmp_file1[5:])
print(self.marker + cmd)
exitcode, out, err = execute(cmd)
print(out, err)
(rule1, rule2) = out.split()

# update the rules
cmd = "rucio update-rule --boost-rule {0}".format(rule1)
print(self.marker + cmd)
exitcode, out, err = execute(cmd)
assert exitcode == 0
print(out, err)
cmd = "rucio update-rule --boost-rule {0}".format(rule2)
print(self.marker + cmd)
exitcode, out, err = execute(cmd)
print(out, err)
assert exitcode == 0
6 changes: 6 additions & 0 deletions lib/rucio/tests/test_permission.py
Expand Up @@ -21,6 +21,7 @@
# - Patrick Austin <patrick.austin@stfc.ac.uk>, 2020
# - Benedikt Ziemons <benedikt.ziemons@cern.ch>, 2020
# - Simon Fayer <simon.fayer05@imperial.ac.uk>, 2021
# - Joel Dierkes <joel.dierkes@cern.ch>, 2021

import unittest

Expand Down Expand Up @@ -84,3 +85,8 @@ def test_permission_get_auth_token_gss(self):
gsscred = 'rucio-dev@CERN.CH'
assert has_permission(issuer='root', action='get_auth_token_gss', kwargs={'account': 'root', 'gsscred': gsscred}, **self.vo)
assert not has_permission(issuer='root', action='get_auth_token_gss', kwargs={'account': self.usr, 'gsscred': gsscred}, **self.vo)

def test_permission_update_rule_boost(self):
kwargs = {'options': {'boost_rule': True}}
assert has_permission(issuer='root', action='update_rule', kwargs=kwargs, **self.vo)
assert not has_permission(issuer='jdoe', action='update_rule', kwargs=kwargs, **self.vo)
30 changes: 30 additions & 0 deletions lib/rucio/tests/test_rule.py
Expand Up @@ -30,6 +30,8 @@
# - Radu Carpa <radu.carpa@cern.ch>, 2021
# - Dimitrios Christidis <dimitrios.christidis@cern.ch>, 2021
# - Simon Fayer <simon.fayer05@imperial.ac.uk>, 2021
# - David Población Criado <david.poblacion.criado@cern.ch>, 2021
# - Joel Dierkes <joel.dierkes@cern.ch>, 2021

import json
import random
Expand Down Expand Up @@ -1189,6 +1191,34 @@ def mktree(scope, account):
assert(len(dsl3) == 0)


def test_rule_boost(vo, mock_scope, rse_factory, file_factory):
""" REPLICATION RULE (CORE): Update a replication rule to quicken the translation from stuck to replicating """
jdoe = InternalAccount('jdoe', vo)
_, tmp_rse_id = rse_factory.make_mock_rse()
rse, rse_id = rse_factory.make_mock_rse()
update_rse(rse_id, {'availability_write': False})
set_local_account_limit(jdoe, rse_id, -1)
files = create_files(3, mock_scope, tmp_rse_id)
dataset1 = 'dataset_' + str(uuid())
add_did(mock_scope, dataset1, DIDType.DATASET, jdoe)
attach_dids(mock_scope, dataset1, files, jdoe)

rule_id = add_rule(dids=[{'scope': mock_scope, 'name': dataset1}], account=jdoe, copies=1, rse_expression=rse, grouping='NONE', weight=None, lifetime=None, locked=False, subscription_id=None, ignore_availability=True)[0]
before_update_rule = {}
for file in files:
for filtered_lock in [lock for lock in get_replica_locks(scope=file['scope'], name=file['name'])]:
assert(filtered_lock['state'] == LockState.STUCK)
before_update_rule[filtered_lock['name']] = filtered_lock['updated_at']
before_update_rule_updated_at = get_rule(rule_id)['updated_at']

update_rule(rule_id, options={'boost_rule': True})

for file in files:
for filtered_lock in [lock for lock in get_replica_locks(scope=file['scope'], name=file['name'])]:
assert(before_update_rule[filtered_lock['name']] > filtered_lock['updated_at'])
assert(before_update_rule_updated_at > get_rule(rule_id)['updated_at'])


@pytest.mark.noparallel(reason='uses pre-defined RSE')
class TestReplicationRuleClient(unittest.TestCase):

Expand Down

0 comments on commit 2c61ae6

Please sign in to comment.