Skip to content

Commit

Permalink
Reimplement MBS-7241 without database-level locks
Browse files Browse the repository at this point in the history
This is intended to help resolve deadlocks (MBS-11345, MBS-12314) and
performance issues due to lock contention.

The new implementation is documented in `Data::Role::EntityCache`.
  • Loading branch information
mwiencek committed Dec 19, 2023
1 parent f0266c9 commit fbdd10f
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 77 deletions.
16 changes: 15 additions & 1 deletion lib/MusicBrainz/Server.pm
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use namespace::autoclean;
BEGIN { extends 'Catalyst' }

use Class::Load qw( load_class );
use Data::Dumper;
use DBDefs;
use Digest::SHA qw( sha256 );
use HTML::Entities ();
Expand All @@ -22,7 +23,7 @@ use MusicBrainz::Server::Entity::Util::JSON qw( to_json_array to_json_object );
use MusicBrainz::Server::Log qw( logger );
use MusicBrainz::Server::Validation qw( is_positive_integer );
use POSIX qw(SIGALRM);
use Scalar::Util qw( refaddr );
use Scalar::Util qw( looks_like_number refaddr );
use Sys::Hostname;
use Time::HiRes qw( clock_gettime CLOCK_REALTIME CLOCK_MONOTONIC );
use Try::Tiny;
Expand Down Expand Up @@ -475,6 +476,17 @@ around dispatch => sub {

my $max_request_time = DBDefs->DETERMINE_MAX_REQUEST_TIME($c->req);

if (
defined($max_request_time) &&
(!looks_like_number($max_request_time) || $max_request_time < 0)
) {
$c->log->warn(
'DETERMINE_MAX_REQUEST_TIME did not return a valid number: ' .
Dumper($max_request_time),
);
$max_request_time = undef;
}

if (defined($max_request_time) && $max_request_time > 0) {
alarm($max_request_time);

Expand All @@ -492,6 +504,8 @@ around dispatch => sub {
POSIX::sigaction(SIGALRM, $action);
}

$c->model('MB')->context->max_request_time($max_request_time);

$c->$orig(@args);

alarm(0);
Expand Down
9 changes: 9 additions & 0 deletions lib/MusicBrainz/Server/Context.pm
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,15 @@ sub create_script_context
return MusicBrainz::Server::Context->new(cache_manager => $cache_manager, database => 'MAINTENANCE', %args);
}

# `DBDefs::DETERMINE_MAX_REQUEST_TIME` must be called with a Catalyst request
# object. This attribute stores the result of that call so it can be accessed
# in the data layer.
has 'max_request_time' => (
is => 'rw',
isa => 'Int',
default => 0,
);

1;

=head1 COPYRIGHT AND LICENSE
Expand Down
156 changes: 80 additions & 76 deletions lib/MusicBrainz/Server/Data/Role/EntityCache.pm
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,60 @@ use namespace::autoclean;
use List::AllUtils qw( any natatime uniq );
use MusicBrainz::Server::Constants qw( %ENTITIES );
use MusicBrainz::Server::Log qw( log_debug );
use MusicBrainz::Server::Validation qw( is_database_row_id );
use MusicBrainz::Server::Replication qw( :replication_type );
use Readonly;
use Time::HiRes qw( time );

requires '_type';

Readonly our $MAX_CACHE_ENTRIES => 500;

=head1 Cache invalidation and locking
In order to prevent entities from being repopulated in the cache before a
database transaction that invalidates those entries commits (MBS-7241), we
"lock" entity IDs from being added to the cache if they're being deleted from
the cache in a concurrent request. These locks are simple keys in our Redis/
KeyDB store.
For example:
* Suppose request A updates artist ID=1 and calls _delete_from_cache(1).
In our (persistent) Redis store, we set `artist:cachelock:1` to '1'.
In our Redis cache, we delete artist ID=1;
* Request B starts before request A's database transaction commits, and
reads artist ID=1 from the DB, seeing the old version. Since this ID is
not in the Redis cache, we'll attempt to add it, but first check for the
presence of a cachelock key. Since `artist:cachelock:1` exists, we skip
adding artist ID=1 to the cache.
* Request A's database transaction commits, and the `artist:cachelock:1` key
is deleted from the Redis store, allowing future requests to add artist
ID=1 to the cache.
The cachelock keys are deleted after the database transaction ends, via
`add_post_commit_callback`, regardless of whether it commits or fails with an
error. Further, we set a TTL on the keys in case all else fails. The default
TTL is the "max request time" of request A, as determined by
`DBDefs::DETERMINE_MAX_REQUEST_TIME`. The idea behind this is that if the
request times out, any database handles used in the request will be closed.
In case the "max request time" is undefined or 0 (indicating no limit), we
set the TTL to `$CACHELOCK_TTL_FALLBACK`, which defaults to 10 minutes.
The process described above is only performed for `RT_MASTER` and
`RT_STANDALONE`. `RT_MIRROR` should not have any modifications to the
database outside of replication.
Originally, MBS-7241 was resolved by using using database-level locks
(`pg_advisory_xact_lock`). Lock contention and deadlocks (MBS-11345,
MBS-12314, ...) were a persistent issue with that approach. We also hit
limits on the number of locks being held in a single transaction (MBS-10497).
=cut

Readonly our $CACHELOCK_TTL_FALLBACK => 600;

sub _cache_id {
my ($self) = @_;

Expand Down Expand Up @@ -67,59 +113,28 @@ after merge => sub {
sub _create_cache_entries {
my ($self, $data) = @_;

my $cache_id = $self->_cache_id;
my $cache_prefix = $self->_type . ':';
my @ids = keys %{$data};
return unless @ids;

if (scalar(@ids) > $MAX_CACHE_ENTRIES) {
@ids = @ids[0..$MAX_CACHE_ENTRIES];
}

my $ttl = DBDefs->ENTITY_CACHE_TTL;
my $cache_prefix = $self->_type . ':';

if (DBDefs->DB_READ_ONLY) {
# There's no point in acquiring advisory locks for caching if
# DB_READ_ONLY is set, because there should be no concurrent
# writes in that case. While it's possible to have some servers
# set in DB_READ_ONLY and others still accepting writes, that's
# never done intentionally, and there's not much we can do
# about it: READONLY connections (which are made when
# DB_READ_ONLY is set) typically go to a standby server
# different from the master. Locks are not shared between
# primary and standby servers.
return map {
[$cache_prefix . $_, $data->{$_}, ($ttl ? $ttl : ())]
} @ids;
}

my @entries;
my $it = natatime 100, @ids;
while (my @next_ids = $it->()) {
# MBS-7241: Try to acquire deletion locks on all of the cache
# keys, so that we don't repopulate those which are being
# deleted in a concurrent transaction. (This is non-blocking;
# if we fail to acquire the lock for a particular id, we skip
# the key.)
#
# MBS-10497: `id % 50` is used to limit the number of locks
# obtained per entity type per transaction to be within
# PostgreSQL's default configuration value for
# max_locks_per_transaction, 64. Note that 64 is not a hard
# limit, just an average. A transaction can have as many locks
# as will fit in the lock table.
my $locks = $self->c->sql->select_list_of_hashes(
'SELECT id, pg_try_advisory_xact_lock(?, id % 50) AS got_lock ' .
' FROM unnest(?::integer[]) AS id',
$cache_id,
\@next_ids,
if (DBDefs->REPLICATION_TYPE != RT_MIRROR) {
my $locks = $self->c->store->get_multi(
map { $cache_prefix . 'cachelock:' . $_ } @ids,
);
push @entries, map {
my $id = $_->{id};
[$cache_prefix . $id, $data->{$id}, ($ttl ? $ttl : ())]
} grep { $_->{got_lock} } @$locks;
@ids = grep {
my $cachelock_key = $cache_prefix . 'cachelock:' . $_;
!defined $locks->{$cachelock_key}
} @ids;
}

@entries;
return map {
[$cache_prefix . $_, $data->{$_}, ($ttl ? $ttl : ())]
} @ids;
}

sub _add_to_cache {
Expand All @@ -135,43 +150,32 @@ sub _delete_from_cache {
@ids = uniq grep { defined } @ids;
return unless @ids;

my $cache_id = $self->_cache_id;

# MBS-7241: Lock cache deletions from `_create_cache_entries`
# above, so that these keys can't be repopulated until after the
# database transaction commits.
my @row_ids = uniq
# MBS-10497: Limit the number of locks obtained per cache id
# per transaction to 50; see the comment in
# `_create_cache_entries` above. This increases the chance of
# contention, but invalidating cache entries is infrequent
# enough that it shouldn't matter.
map { $_ % 50 }
grep { is_database_row_id($_) } @ids;
if (@row_ids) {
my $start_time = time;
$self->c->sql->do(
'SELECT pg_advisory_xact_lock(?, id) ' .
' FROM unnest(?::integer[]) AS id',
$cache_id,
\@row_ids,
);
my $elapsed_time = (time - $start_time);
if ($elapsed_time > 0.01) {
log_debug {
"[$start_time] _delete_from_cache: waited $elapsed_time" .
' seconds for ' . (scalar @row_ids) .
" locks with cache id $cache_id"
};
my $c = $self->c;
my $cache_prefix = $self->_type . ':';

if (DBDefs->REPLICATION_TYPE != RT_MIRROR) {
my $max_request_time = $c->max_request_time;
if (!defined($max_request_time) || $max_request_time == 0) {
$max_request_time = $CACHELOCK_TTL_FALLBACK;
}
my @cachelock_keys = map {
[$cache_prefix . 'cachelock:' . $_,
1,
($max_request_time + 1)]
} @ids;
$c->store->set_multi(@cachelock_keys);

if ($c->sql->is_in_transaction) {
$c->sql->add_post_commit_callback(sub {
$c->store->delete_multi(@cachelock_keys);
});
}
}

my $cache_prefix = $self->_type . ':';
my @keys = map { $cache_prefix . $_ } @ids;

my $cache = $self->c->cache($self->_type);
my $cache = $c->cache($self->_type);
my $method = @keys > 1 ? 'delete_multi' : 'delete';
$cache->$method(@keys);
$c->cache->$method(@keys);
}

1;
Expand Down
22 changes: 22 additions & 0 deletions lib/Sql.pm
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,18 @@ has 'sth' => (
clearer => 'clear_sth',
);

# Note: These callbacks run regardless of whether the commit succeeded!
has 'post_commit_callbacks' => (
traits => ['Array'],
is => 'rw',
isa => 'ArrayRef[CodeRef]',
default => sub { [] },
handles => {
all_post_commit_callbacks => 'elements',
add_post_commit_callback => 'push',
},
);

sub finish
{
my ($self) = @_;
Expand Down Expand Up @@ -273,6 +285,16 @@ sub commit
cluck $err unless ($self->quiet);
eval { $self->rollback };
croak $err;
}
finally {
my @callbacks = $self->all_post_commit_callbacks;
$self->post_commit_callbacks([]);
# Note: exceptions in `finally` blocks cannot be propagated "due to
# fundamental limitations of Perl."
#
# Never use `post_commit_callbacks` for anything that MUST complete
# to preserve database or cache consistency.
$_->() for @callbacks;
};
}

Expand Down

0 comments on commit fbdd10f

Please sign in to comment.