Skip to content

Commit

Permalink
slab_rebal: delete busy items if stuck
Browse files Browse the repository at this point in the history
if we loop through a slab too many times without freeing everything, delete
items stuck with high refcounts. they should bleed off so long as the
connections aren't jammed holding them.

should be possible to force rescues in this case as well, but that's more code
so will follow up later. Need a big-ish refactor.
  • Loading branch information
dormando committed Jun 23, 2017
1 parent 5a26aca commit d67d187
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 1 deletion.
3 changes: 3 additions & 0 deletions doc/protocol.txt
Expand Up @@ -726,6 +726,9 @@ integers separated by a colon (treat this as a floating point number).
| slab_reassign_busy_items |
| | 64u | Items busy during page move, requiring a |
| | | retry before page can be moved. |
| slab_reassign_busy_deletes |
| | 64u | Items busy during page move, requiring |
| | | deletion before page can be moved. |
| log_worker_dropped | 64u | Logs a worker never wrote due to full buf |
| log_worker_written | 64u | Logs written by a worker, to be picked up |
| log_watcher_skipped | 64u | Logs not sent to slow watchers. |
Expand Down
1 change: 1 addition & 0 deletions memcached.c
Expand Up @@ -2966,6 +2966,7 @@ static void server_stats(ADD_STAT add_stats, conn *c) {
APPEND_STAT("slab_reassign_evictions_nomem", "%llu", stats.slab_reassign_evictions_nomem);
APPEND_STAT("slab_reassign_inline_reclaim", "%llu", stats.slab_reassign_inline_reclaim);
APPEND_STAT("slab_reassign_busy_items", "%llu", stats.slab_reassign_busy_items);
APPEND_STAT("slab_reassign_busy_deletes", "%llu", stats.slab_reassign_busy_deletes);
APPEND_STAT("slab_reassign_running", "%u", stats_state.slab_reassign_running);
APPEND_STAT("slabs_moved", "%llu", stats.slabs_moved);
}
Expand Down
3 changes: 3 additions & 0 deletions memcached.h
Expand Up @@ -292,6 +292,7 @@ struct stats {
uint64_t slab_reassign_inline_reclaim; /* valid items lost during slab move */
uint64_t slab_reassign_chunk_rescues; /* chunked-item chunks recovered */
uint64_t slab_reassign_busy_items; /* valid temporarily unmovable */
uint64_t slab_reassign_busy_deletes; /* refcounted items killed */
uint64_t lru_crawler_starts; /* Number of item crawlers kicked off */
uint64_t lru_maintainer_juggles; /* number of LRU bg pokes */
uint64_t time_in_listen_disabled_us; /* elapsed time in microseconds while server unable to process new connections */
Expand Down Expand Up @@ -593,6 +594,8 @@ struct slab_rebalance {
uint32_t evictions_nomem;
uint32_t inline_reclaim;
uint32_t chunk_rescues;
uint32_t busy_deletes;
uint32_t busy_loops;
uint8_t done;
};

Expand Down
22 changes: 21 additions & 1 deletion slabs.c
Expand Up @@ -716,6 +716,8 @@ enum move_status {
MOVE_PASS=0, MOVE_FROM_SLAB, MOVE_FROM_LRU, MOVE_BUSY, MOVE_LOCKED
};

#define SLAB_MOVE_MAX_LOOPS 1000

/* refcount == 0 is safe since nobody can incr while item_lock is held.
* refcount != 0 is impossible since flags/etc can be modified in other
* threads. instead, note we found a busy one and bail. logic in do_item_get
Expand Down Expand Up @@ -781,18 +783,29 @@ static int slab_rebalance_move(void) {
if ((hold_lock = item_trylock(hv)) == NULL) {
status = MOVE_LOCKED;
} else {
bool is_linked = (it->it_flags & ITEM_LINKED);
refcount = refcount_incr(it);
if (refcount == 2) { /* item is linked but not busy */
/* Double check ITEM_LINKED flag here, since we're
* past a memory barrier from the mutex. */
if ((it->it_flags & ITEM_LINKED) != 0) {
if (is_linked) {
status = MOVE_FROM_LRU;
} else {
/* refcount == 1 + !ITEM_LINKED means the item is being
* uploaded to, or was just unlinked but hasn't been freed
* yet. Let it bleed off on its own and try again later */
status = MOVE_BUSY;
}
} else if (refcount > 2 && is_linked) {
// TODO: Mark items for delete/rescue and process
// outside of the main loop.
if (slab_rebal.busy_loops > SLAB_MOVE_MAX_LOOPS) {
slab_rebal.busy_deletes++;
// Only safe to hold slabs lock because refcount
// can't drop to 0 until we release item lock.
do_item_unlink(it, hv);
}
status = MOVE_BUSY;
} else {
if (settings.verbose > 2) {
fprintf(stderr, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n",
Expand Down Expand Up @@ -941,6 +954,7 @@ static int slab_rebalance_move(void) {
stats.slab_reassign_busy_items += slab_rebal.busy_items;
STATS_UNLOCK();
slab_rebal.busy_items = 0;
slab_rebal.busy_loops++;
} else {
slab_rebal.done++;
}
Expand All @@ -959,6 +973,7 @@ static void slab_rebalance_finish(void) {
uint32_t evictions_nomem;
uint32_t inline_reclaim;
uint32_t chunk_rescues;
uint32_t busy_deletes;

pthread_mutex_lock(&slabs_lock);

Expand Down Expand Up @@ -999,6 +1014,7 @@ static void slab_rebalance_finish(void) {
memory_release();
}

slab_rebal.busy_loops = 0;
slab_rebal.done = 0;
slab_rebal.s_clsid = 0;
slab_rebal.d_clsid = 0;
Expand All @@ -1009,9 +1025,12 @@ static void slab_rebalance_finish(void) {
inline_reclaim = slab_rebal.inline_reclaim;
rescues = slab_rebal.rescues;
chunk_rescues = slab_rebal.chunk_rescues;
busy_deletes = slab_rebal.busy_deletes;
slab_rebal.evictions_nomem = 0;
slab_rebal.inline_reclaim = 0;
slab_rebal.rescues = 0;
slab_rebal.chunk_rescues = 0;
slab_rebal.busy_deletes = 0;

slab_rebalance_signal = 0;

Expand All @@ -1023,6 +1042,7 @@ static void slab_rebalance_finish(void) {
stats.slab_reassign_evictions_nomem += evictions_nomem;
stats.slab_reassign_inline_reclaim += inline_reclaim;
stats.slab_reassign_chunk_rescues += chunk_rescues;
stats.slab_reassign_busy_deletes += busy_deletes;
stats_state.slab_reassign_running = false;
STATS_UNLOCK();

Expand Down
77 changes: 77 additions & 0 deletions t/slabhang.t
@@ -0,0 +1,77 @@
#!/usr/bin/perl

use strict;
use warnings;

use Test::More;

use FindBin qw($Bin);
use lib "$Bin/lib";
use MemcachedTest;

plan skip_all => 'Test is flaky. Needs special hooks.';

plan tests => 74;

# start up a server with 10 maximum connections
my $server = new_memcached("-m 16 -o modern");
my $sock = $server->sock;
my $hangsock = $server->new_sock;
my $value = "B"x260144;
my $key = 0;

# disable the normal automover.
print $sock "slabs automove 0\r\n";
is(scalar <$sock>, "OK\r\n", "automover disabled");

# These aren't set to expire.
my $mget = '';
for ($key = 0; $key < 70; $key++) {
$mget .= "key$key ";
print $sock "set key$key 0 0 260144\r\n$value\r\n";
is(scalar <$sock>, "STORED\r\n", "stored key$key");
}
chop $mget;

# Don't intend to read the results, need to fill the socket.
print $hangsock "get $mget\r\n";
#sleep 8;
my $stats = mem_stats($sock, "slabs");
my $source = 0;
for my $key (keys %$stats) {
if ($key =~ m/^(\d+):total_pages/) {
my $sid = $1;
if ($stats->{$key} > 10) {
$source = $sid;
last;
}
}
}
isnt($source, 0, "found the source slab: $source");

my $busy;
my $tomove = 4;
my $reassign = "slabs reassign $source 1\r\n";
while ($tomove) {
$busy = 0;
print $sock $reassign;
my $res = scalar <$sock>;
while ($res =~ m/^BUSY/) {
if ($hangsock && $busy > 5) {
# unjam the pipeline
$hangsock->close;
}
last if ($busy > 10);
sleep 1;
$busy++;
print $sock $reassign;
$res = scalar <$sock>;
}
last if ($busy > 10);
$tomove--;
}

ok($busy <= 10, "didn't time out moving pages");

$stats = mem_stats($sock);
isnt($stats->{"slab_reassign_busy_deletes"}, "0", "deleted some busy items");

0 comments on commit d67d187

Please sign in to comment.