Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

slab reassignment

Adds a "slabs reassign src dst" manual command, and a thread to safely process
slab moves in the background.

- slab freelist is now a linked list, reusing the item structure
- is -o slab_reassign is enabled, an extra background thread is started
- thread attempts to safely free up items when it's been told to move a page
  from one slab to another.

-o slab_automove is stubbed.

There are some limitations. Most notable is that you cannot repeatedly move
pages around without first having items use up the memory. Slabs with newly
assigned memory work off of a pointer, handing out chunks individually. We
would need to change that to quickly split chunks for all newly assigned pages
into that slabs freelist.

Further testing is required to ensure such is possible without impacting
performance.
  • Loading branch information...
commit 10698bae63a034c14d2fdbc3027a1308ce90faba 1 parent 40b7b4b
dormando dormando authored
2  globals.c
@@ -21,3 +21,5 @@ volatile rel_time_t current_time;
21 21 /** exported globals **/
22 22 struct stats stats;
23 23 struct settings settings;
  24 +struct slab_rebalance slab_rebal;
  25 +volatile int slab_rebalance_signal;
6 items.c
@@ -123,7 +123,6 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_tim
123 123 do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
124 124 /* Initialize the item block: */
125 125 it->slabs_clsid = 0;
126   - it->refcount = 0;
127 126 } else if ((it = slabs_alloc(ntotal, id)) == NULL) {
128 127 if (settings.evict_to_free == 0) {
129 128 itemstats[id].outofmemory++;
@@ -149,7 +148,6 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_tim
149 148 do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
150 149 /* Initialize the item block: */
151 150 it->slabs_clsid = 0;
152   - it->refcount = 0;
153 151 }
154 152 } else {
155 153 /* If the LRU is empty or locked, attempt to allocate memory */
@@ -181,11 +179,11 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_tim
181 179 /* Item initialization can happen outside of the lock; the item's already
182 180 * been removed from the slab LRU.
183 181 */
  182 + it->refcount = 1; /* the caller will have a reference */
184 183 pthread_mutex_unlock(&cache_lock);
  184 + it->next = it->prev = it->h_next = 0;
185 185 it->slabs_clsid = id;
186 186
187   - it->next = it->prev = it->h_next = 0;
188   - it->refcount = 1; /* the caller will have a reference */
189 187 DEBUG_REFCNT(it, '*');
190 188 it->it_flags = settings.use_cas ? ITEM_CAS : 0;
191 189 it->nkey = nkey;
69 memcached.c
@@ -102,6 +102,9 @@ struct stats stats;
102 102 struct settings settings;
103 103 time_t process_started; /* when the process was started */
104 104
  105 +struct slab_rebalance slab_rebal;
  106 +volatile int slab_rebalance_signal;
  107 +
105 108 /** file scope variables **/
106 109 static conn *listen_conn = NULL;
107 110 static struct event_base *main_base;
@@ -170,7 +173,9 @@ static void stats_init(void) {
170 173 stats.curr_bytes = stats.listen_disabled_num = 0;
171 174 stats.hash_power_level = stats.hash_bytes = stats.hash_is_expanding = 0;
172 175 stats.expired_unfetched = stats.evicted_unfetched = 0;
  176 + stats.slabs_moved = 0;
173 177 stats.accepting_conns = true; /* assuming we start in this state. */
  178 + stats.slab_reassign_running = false;
174 179
175 180 /* make the time we started always be 2 seconds before we really
176 181 did, so time(0) - time.started is never zero. if so, things
@@ -218,6 +223,8 @@ static void settings_init(void) {
218 223 settings.item_size_max = 1024 * 1024; /* The famous 1MB upper limit. */
219 224 settings.maxconns_fast = false;
220 225 settings.hashpower_init = 0;
  226 + settings.slab_reassign = false;
  227 + settings.slab_automove = false;
221 228 }
222 229
223 230 /*
@@ -2572,6 +2579,10 @@ static void server_stats(ADD_STAT add_stats, conn *c) {
2572 2579 APPEND_STAT("hash_is_expanding", "%u", stats.hash_is_expanding);
2573 2580 APPEND_STAT("expired_unfetched", "%llu", stats.expired_unfetched);
2574 2581 APPEND_STAT("evicted_unfetched", "%llu", stats.evicted_unfetched);
  2582 + if (settings.slab_reassign) {
  2583 + APPEND_STAT("slab_reassign_running", "%u", stats.slab_reassign_running);
  2584 + APPEND_STAT("slabs_moved", "%llu", stats.slabs_moved);
  2585 + }
2575 2586 STATS_UNLOCK();
2576 2587 }
2577 2588
@@ -2604,6 +2615,8 @@ static void process_stat_settings(ADD_STAT add_stats, void *c) {
2604 2615 APPEND_STAT("item_size_max", "%d", settings.item_size_max);
2605 2616 APPEND_STAT("maxconns_fast", "%s", settings.maxconns_fast ? "yes" : "no");
2606 2617 APPEND_STAT("hashpower_init", "%d", settings.hashpower_init);
  2618 + APPEND_STAT("slab_reassign", "%s", settings.slab_reassign ? "yes" : "no");
  2619 + APPEND_STAT("slab_automove", "%s", settings.slab_automove ? "yes" : "no");
2607 2620 }
2608 2621
2609 2622 static void process_stat(conn *c, token_t *tokens, const size_t ntokens) {
@@ -3290,6 +3303,45 @@ static void process_command(conn *c, char *command) {
3290 3303
3291 3304 conn_set_state(c, conn_closing);
3292 3305
  3306 + } else if (ntokens == 5 && (strcmp(tokens[COMMAND_TOKEN].value, "slabs") == 0 &&
  3307 + strcmp(tokens[COMMAND_TOKEN + 1].value, "reassign") == 0)) {
  3308 + int src, dst, rv;
  3309 +
  3310 + if (settings.slab_reassign == false) {
  3311 + out_string(c, "CLIENT_ERROR slab reassignment disabled");
  3312 + return;
  3313 + }
  3314 +
  3315 + src = strtol(tokens[2].value, NULL, 10);
  3316 + dst = strtol(tokens[3].value, NULL, 10);
  3317 +
  3318 + if (errno == ERANGE) {
  3319 + out_string(c, "CLIENT_ERROR bad command line format");
  3320 + return;
  3321 + }
  3322 +
  3323 + rv = slabs_reassign(src, dst);
  3324 + switch (rv) {
  3325 + case REASSIGN_OK:
  3326 + out_string(c, "OK");
  3327 + break;
  3328 + case REASSIGN_RUNNING:
  3329 + out_string(c, "BUSY");
  3330 + break;
  3331 + case REASSIGN_BADCLASS:
  3332 + out_string(c, "BADCLASS");
  3333 + break;
  3334 + case REASSIGN_NOSPARE:
  3335 + out_string(c, "NOSPARE");
  3336 + break;
  3337 + case REASSIGN_DEST_NOT_FULL:
  3338 + out_string(c, "NOTFULL");
  3339 + break;
  3340 + case REASSIGN_SRC_NOT_SAFE:
  3341 + out_string(c, "UNSAFE");
  3342 + break;
  3343 + }
  3344 + return;
3293 3345 } else if ((ntokens == 3 || ntokens == 4) && (strcmp(tokens[COMMAND_TOKEN].value, "verbosity") == 0)) {
3294 3346 process_verbosity_command(c, tokens, ntokens);
3295 3347 } else {
@@ -4639,11 +4691,15 @@ int main (int argc, char **argv) {
4639 4691 char *subopts_value;
4640 4692 enum {
4641 4693 MAXCONNS_FAST = 0,
4642   - HASHPOWER_INIT
  4694 + HASHPOWER_INIT,
  4695 + SLAB_REASSIGN,
  4696 + SLAB_AUTOMOVE
4643 4697 };
4644 4698 char *const subopts_tokens[] = {
4645 4699 [MAXCONNS_FAST] = "maxconns_fast",
4646 4700 [HASHPOWER_INIT] = "hashpower",
  4701 + [SLAB_REASSIGN] = "slab_reassign",
  4702 + [SLAB_AUTOMOVE] = "slab_automove",
4647 4703 NULL
4648 4704 };
4649 4705
@@ -4889,6 +4945,12 @@ int main (int argc, char **argv) {
4889 4945 return 1;
4890 4946 }
4891 4947 break;
  4948 + case SLAB_REASSIGN:
  4949 + settings.slab_reassign = true;
  4950 + break;
  4951 + case SLAB_AUTOMOVE:
  4952 + settings.slab_automove = true;
  4953 + break;
4892 4954 default:
4893 4955 printf("Illegal suboption \"%s\"\n", subopts_value);
4894 4956 return 1;
@@ -5042,6 +5104,11 @@ int main (int argc, char **argv) {
5042 5104 exit(EXIT_FAILURE);
5043 5105 }
5044 5106
  5107 + if (settings.slab_reassign &&
  5108 + start_slab_maintenance_thread() == -1) {
  5109 + exit(EXIT_FAILURE);
  5110 + }
  5111 +
5045 5112 /* initialise clock event */
5046 5113 clock_handler(0, 0, 0);
5047 5114
19 memcached.h
@@ -265,6 +265,8 @@ struct stats {
265 265 bool hash_is_expanding; /* If the hash table is being expanded */
266 266 uint64_t expired_unfetched; /* items reclaimed but never touched */
267 267 uint64_t evicted_unfetched; /* items evicted but never touched */
  268 + bool slab_reassign_running; /* slab reassign in progress */
  269 + uint64_t slabs_moved; /* times slabs were moved around */
268 270 };
269 271
270 272 #define MAX_VERBOSITY_LEVEL 2
@@ -298,6 +300,8 @@ struct settings {
298 300 int item_size_max; /* Maximum item size, and upper end for slabs */
299 301 bool sasl; /* SASL on/off */
300 302 bool maxconns_fast; /* Whether or not to early close connections */
  303 + bool slab_reassign; /* Whether or not slab reassignment is allowed */
  304 + bool slab_automove; /* Whether or not to automatically move slabs */
301 305 int hashpower_init; /* Starting hash power level */
302 306 };
303 307
@@ -452,6 +456,21 @@ struct conn {
452 456 /* current time of day (updated periodically) */
453 457 extern volatile rel_time_t current_time;
454 458
  459 +/* TODO: Move to slabs.h? */
  460 +extern volatile int slab_rebalance_signal;
  461 +
  462 +struct slab_rebalance {
  463 + void *slab_start;
  464 + void *slab_end;
  465 + void *slab_pos;
  466 + int s_clsid;
  467 + int d_clsid;
  468 + int busy_items;
  469 + uint8_t done;
  470 +};
  471 +
  472 +extern struct slab_rebalance slab_rebal;
  473 +
455 474 /*
456 475 * Functions
457 476 */
299 slabs.c
@@ -27,9 +27,8 @@ typedef struct {
27 27 unsigned int size; /* sizes of items */
28 28 unsigned int perslab; /* how many items per slab */
29 29
30   - void **slots; /* list of item ptrs */
31   - unsigned int sl_total; /* size of previous array */
32   - unsigned int sl_curr; /* first free slot */
  30 + void *slots; /* list of item ptrs */
  31 + unsigned int sl_curr; /* total free items in list */
33 32
34 33 void *end_page_ptr; /* pointer to next free item at end of page, or 0 */
35 34 unsigned int end_page_free; /* number of items remaining at end of last alloced page */
@@ -192,7 +191,8 @@ static int grow_slab_list (const unsigned int id) {
192 191
193 192 static int do_slabs_newslab(const unsigned int id) {
194 193 slabclass_t *p = &slabclass[id];
195   - int len = p->size * p->perslab;
  194 + int len = settings.slab_reassign ? settings.item_size_max
  195 + : p->size * p->perslab;
196 196 char *ptr;
197 197
198 198 if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0) ||
@@ -218,6 +218,7 @@ static int do_slabs_newslab(const unsigned int id) {
218 218 static void *do_slabs_alloc(const size_t size, unsigned int id) {
219 219 slabclass_t *p;
220 220 void *ret = NULL;
  221 + item *it = NULL;
221 222
222 223 if (id < POWER_SMALLEST || id > power_largest) {
223 224 MEMCACHED_SLABS_ALLOCATE_FAILED(size, 0);
@@ -225,7 +226,7 @@ static void *do_slabs_alloc(const size_t size, unsigned int id) {
225 226 }
226 227
227 228 p = &slabclass[id];
228   - assert(p->sl_curr == 0 || ((item *)p->slots[p->sl_curr - 1])->slabs_clsid == 0);
  229 + assert(p->sl_curr == 0 || ((item *)p->slots)->slabs_clsid == 0);
229 230
230 231 #ifdef USE_SYSTEM_MALLOC
231 232 if (mem_limit && mem_malloced + size > mem_limit) {
@@ -246,7 +247,10 @@ static void *do_slabs_alloc(const size_t size, unsigned int id) {
246 247 ret = NULL;
247 248 } else if (p->sl_curr != 0) {
248 249 /* return off our freelist */
249   - ret = p->slots[--p->sl_curr];
  250 + it = (item *)p->slots;
  251 + p->slots = it->next;
  252 + p->sl_curr--;
  253 + ret = (void *)it;
250 254 } else {
251 255 /* if we recently allocated a whole page, return from that */
252 256 assert(p->end_page_ptr != NULL);
@@ -270,6 +274,7 @@ static void *do_slabs_alloc(const size_t size, unsigned int id) {
270 274
271 275 static void do_slabs_free(void *ptr, const size_t size, unsigned int id) {
272 276 slabclass_t *p;
  277 + item *it;
273 278
274 279 assert(((item *)ptr)->slabs_clsid == 0);
275 280 assert(id >= POWER_SMALLEST && id <= power_largest);
@@ -285,15 +290,13 @@ static void do_slabs_free(void *ptr, const size_t size, unsigned int id) {
285 290 return;
286 291 #endif
287 292
288   - if (p->sl_curr == p->sl_total) { /* need more space on the free list */
289   - int new_size = (p->sl_total != 0) ? p->sl_total * 2 : 16; /* 16 is arbitrary */
290   - void **new_slots = realloc(p->slots, new_size * sizeof(void *));
291   - if (new_slots == 0)
292   - return;
293   - p->slots = new_slots;
294   - p->sl_total = new_size;
295   - }
296   - p->slots[p->sl_curr++] = ptr;
  293 + it = (item *)ptr;
  294 + it->prev = 0;
  295 + it->next = p->slots;
  296 + if (it->next) it->next->prev = it;
  297 + p->slots = it;
  298 +
  299 + p->sl_curr++;
297 300 p->requested -= size;
298 301 return;
299 302 }
@@ -453,3 +456,269 @@ void slabs_adjust_mem_requested(unsigned int id, size_t old, size_t ntotal)
453 456 p->requested = p->requested - old + ntotal;
454 457 pthread_mutex_unlock(&slabs_lock);
455 458 }
  459 +
  460 +static pthread_cond_t maintenance_cond = PTHREAD_COND_INITIALIZER;
  461 +static volatile int do_run_slab_thread = 1;
  462 +
  463 +#define DEFAULT_SLAB_BULK_CHECK 1
  464 +int slab_bulk_check = DEFAULT_SLAB_BULK_CHECK;
  465 +
  466 +static int slab_rebalance_start(void) {
  467 + slabclass_t *s_cls;
  468 + slabclass_t *d_cls;
  469 + int no_go = 0;
  470 +
  471 + pthread_mutex_lock(&cache_lock);
  472 + pthread_mutex_lock(&slabs_lock);
  473 +
  474 + if (slab_rebal.s_clsid < POWER_SMALLEST ||
  475 + slab_rebal.s_clsid > power_largest ||
  476 + slab_rebal.d_clsid < POWER_SMALLEST ||
  477 + slab_rebal.d_clsid > power_largest)
  478 + no_go = -2;
  479 +
  480 + s_cls = &slabclass[slab_rebal.s_clsid];
  481 + d_cls = &slabclass[slab_rebal.d_clsid];
  482 +
  483 + if (d_cls->end_page_ptr || s_cls->end_page_ptr ||
  484 + !grow_slab_list(slab_rebal.d_clsid)) {
  485 + no_go = -1;
  486 + }
  487 +
  488 + if (s_cls->slabs < 2)
  489 + no_go = -3;
  490 +
  491 + if (no_go != 0) {
  492 + pthread_mutex_unlock(&slabs_lock);
  493 + pthread_mutex_unlock(&cache_lock);
  494 + return no_go; /* Should use a wrapper function... */
  495 + }
  496 +
  497 + s_cls->killing = 1;
  498 +
  499 + slab_rebal.slab_start = s_cls->slab_list[s_cls->killing - 1];
  500 + slab_rebal.slab_end = (char *)slab_rebal.slab_start +
  501 + (s_cls->size * s_cls->perslab);
  502 + slab_rebal.slab_pos = slab_rebal.slab_start;
  503 + slab_rebal.done = 0;
  504 +
  505 + /* Also tells do_item_get to search for items in this slab */
  506 + slab_rebalance_signal = 2;
  507 +
  508 + if (settings.verbose > 1) {
  509 + fprintf(stderr, "Started a slab rebalance\n");
  510 + }
  511 +
  512 + pthread_mutex_unlock(&slabs_lock);
  513 + pthread_mutex_unlock(&cache_lock);
  514 +
  515 + STATS_LOCK();
  516 + stats.slab_reassign_running = true;
  517 + STATS_UNLOCK();
  518 +
  519 + return 0;
  520 +}
  521 +
  522 +/* refcount == 0 is safe since nobody can incr while cache_lock is held.
  523 + * refcount != 0 is impossible since flags/etc can be modified in other
  524 + * threads. instead, note we found a busy one and bail. logic in do_item_get
  525 + * will prevent busy items from continuing to be busy
  526 + */
  527 +static int slab_rebalance_move(void) {
  528 + slabclass_t *s_cls;
  529 + int x;
  530 + int was_busy = 0;
  531 +
  532 + pthread_mutex_lock(&cache_lock);
  533 + pthread_mutex_lock(&slabs_lock);
  534 +
  535 + s_cls = &slabclass[slab_rebal.s_clsid];
  536 +
  537 + for (x = 0; x < slab_bulk_check; x++) {
  538 + item *it = slab_rebal.slab_pos;
  539 + if (it->refcount == 0) {
  540 + if (it->it_flags & ITEM_SLABBED) {
  541 + /* remove from freelist linked list */
  542 + if (s_cls->slots == it) {
  543 + s_cls->slots = it->next;
  544 + }
  545 + if (it->next) it->next->prev = it->prev;
  546 + if (it->prev) it->prev->next = it->next;
  547 + s_cls->sl_curr--;
  548 + } else if (it->it_flags != 0) {
  549 + it->refcount = 1;
  550 + /* Call unlink with refcount == 1 so it won't free */
  551 + do_item_unlink_nolock(it, hash(ITEM_key(it), it->nkey, 0));
  552 + it->refcount = 0;
  553 + }
  554 + it->it_flags = 0;
  555 + it->slabs_clsid = 0;
  556 + } else {
  557 + if (settings.verbose > 2) {
  558 + fprintf(stderr, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n",
  559 + it->refcount, slab_rebal.s_clsid, slab_rebal.d_clsid);
  560 + }
  561 + slab_rebal.busy_items++;
  562 + was_busy++;
  563 + }
  564 +
  565 + slab_rebal.slab_pos = (char *)slab_rebal.slab_pos + s_cls->size;
  566 + if (slab_rebal.slab_pos >= slab_rebal.slab_end)
  567 + break;
  568 + }
  569 +
  570 + if (slab_rebal.slab_pos >= slab_rebal.slab_end) {
  571 + /* Some items were busy, start again from the top */
  572 + if (slab_rebal.busy_items) {
  573 + slab_rebal.slab_pos = slab_rebal.slab_start;
  574 + slab_rebal.busy_items = 0;
  575 + } else {
  576 + slab_rebal.done++;
  577 + }
  578 + }
  579 +
  580 + pthread_mutex_unlock(&slabs_lock);
  581 + pthread_mutex_unlock(&cache_lock);
  582 +
  583 + return was_busy;
  584 +}
  585 +
  586 +static void slab_rebalance_finish(void) {
  587 + slabclass_t *s_cls;
  588 + slabclass_t *d_cls;
  589 +
  590 + pthread_mutex_lock(&cache_lock);
  591 + pthread_mutex_lock(&slabs_lock);
  592 +
  593 + s_cls = &slabclass[slab_rebal.s_clsid];
  594 + d_cls = &slabclass[slab_rebal.d_clsid];
  595 +
  596 + /* At this point the stolen slab is completely clear */
  597 + s_cls->slab_list[s_cls->killing - 1] =
  598 + s_cls->slab_list[s_cls->slabs - 1];
  599 + s_cls->slabs--;
  600 + s_cls->killing = 0;
  601 +
  602 + memset(slab_rebal.slab_start, 0, (size_t)settings.item_size_max);
  603 +
  604 + d_cls->slab_list[d_cls->slabs++] = slab_rebal.slab_start;
  605 + d_cls->end_page_ptr = slab_rebal.slab_start;
  606 + d_cls->end_page_free = d_cls->perslab;
  607 +
  608 + slab_rebal.done = 0;
  609 + slab_rebal.s_clsid = 0;
  610 + slab_rebal.d_clsid = 0;
  611 + slab_rebal.slab_start = NULL;
  612 + slab_rebal.slab_end = NULL;
  613 + slab_rebal.slab_pos = NULL;
  614 +
  615 + slab_rebalance_signal = 0;
  616 +
  617 + pthread_mutex_unlock(&slabs_lock);
  618 + pthread_mutex_unlock(&cache_lock);
  619 +
  620 + STATS_LOCK();
  621 + stats.slab_reassign_running = false;
  622 + stats.slabs_moved++;
  623 + STATS_UNLOCK();
  624 +
  625 + if (settings.verbose > 1) {
  626 + fprintf(stderr, "finished a slab move\n");
  627 + }
  628 +}
  629 +
  630 +/* Slab rebalancer thread.
  631 + * Does not use spinlocks since it is not timing sensitive. Burn less CPU and
  632 + * go to sleep if locks are contended
  633 + */
  634 +static void *slab_maintenance_thread(void *arg) {
  635 + int was_busy = 0;
  636 +
  637 + while (do_run_slab_thread) {
  638 + /* TODO: Call code to make a calculated decision */
  639 +
  640 + if (slab_rebalance_signal == 1) {
  641 + if (slab_rebalance_start() < 0) {
  642 + /* Handle errors with more specifity as required. */
  643 + slab_rebalance_signal = 0;
  644 + }
  645 +
  646 + } else if (slab_rebalance_signal && slab_rebal.slab_start != NULL) {
  647 + /* If we have a decision to continue, continue it */
  648 + was_busy = slab_rebalance_move();
  649 + }
  650 +
  651 + if (slab_rebal.done) {
  652 + slab_rebalance_finish();
  653 + }
  654 +
  655 + /* Sleep a bit if no work to do, or waiting on busy objects */
  656 + if (was_busy || !slab_rebalance_signal)
  657 + sleep(1);
  658 + }
  659 + return NULL;
  660 +}
  661 +
  662 +static enum reassign_result_type do_slabs_reassign(int src, int dst) {
  663 + if (slab_rebalance_signal != 0)
  664 + return REASSIGN_RUNNING;
  665 +
  666 + if (src < POWER_SMALLEST || src > power_largest ||
  667 + dst < POWER_SMALLEST || dst > power_largest)
  668 + return REASSIGN_BADCLASS;
  669 +
  670 + if (slabclass[src].slabs < 2)
  671 + return REASSIGN_NOSPARE;
  672 +
  673 + if (slabclass[dst].end_page_ptr)
  674 + return REASSIGN_DEST_NOT_FULL;
  675 +
  676 + if (slabclass[src].end_page_ptr)
  677 + return REASSIGN_SRC_NOT_SAFE;
  678 +
  679 + slab_rebal.s_clsid = src;
  680 + slab_rebal.d_clsid = dst;
  681 +
  682 + slab_rebalance_signal = 1;
  683 +
  684 + return REASSIGN_OK;
  685 +}
  686 +
  687 +enum reassign_result_type slabs_reassign(int src, int dst) {
  688 + enum reassign_result_type ret;
  689 + mutex_lock(&slabs_lock);
  690 + ret = do_slabs_reassign(src, dst);
  691 + pthread_mutex_unlock(&slabs_lock);
  692 + return ret;
  693 +}
  694 +
  695 +static pthread_t maintenance_tid;
  696 +
  697 +int start_slab_maintenance_thread(void) {
  698 + int ret;
  699 + slab_rebalance_signal = 0;
  700 + slab_rebal.slab_start = NULL;
  701 + char *env = getenv("MEMCACHED_SLAB_BULK_CHECK");
  702 + if (env != NULL) {
  703 + slab_bulk_check = atoi(env);
  704 + if (slab_bulk_check == 0) {
  705 + slab_bulk_check = DEFAULT_SLAB_BULK_CHECK;
  706 + }
  707 + }
  708 + if ((ret = pthread_create(&maintenance_tid, NULL,
  709 + slab_maintenance_thread, NULL)) != 0) {
  710 + fprintf(stderr, "Can't create thread: %s\n", strerror(ret));
  711 + return -1;
  712 + }
  713 + return 0;
  714 +}
  715 +
  716 +void stop_slab_maintenance_thread(void) {
  717 + mutex_lock(&cache_lock);
  718 + do_run_slab_thread = 0;
  719 + pthread_cond_signal(&maintenance_cond);
  720 + pthread_mutex_unlock(&cache_lock);
  721 +
  722 + /* Wait for the maintenance thread to stop */
  723 + pthread_join(maintenance_tid, NULL);
  724 +}
10 slabs.h
@@ -33,4 +33,14 @@ bool get_stats(const char *stat_type, int nkey, ADD_STAT add_stats, void *c);
33 33 /** Fill buffer with stats */ /*@null@*/
34 34 void slabs_stats(ADD_STAT add_stats, void *c);
35 35
  36 +int start_slab_maintenance_thread(void);
  37 +void stop_slab_maintenance_thread(void);
  38 +
  39 +enum reassign_result_type {
  40 + REASSIGN_OK=0, REASSIGN_RUNNING, REASSIGN_BADCLASS, REASSIGN_NOSPARE,
  41 + REASSIGN_DEST_NOT_FULL, REASSIGN_SRC_NOT_SAFE
  42 +};
  43 +
  44 +enum reassign_result_type slabs_reassign(int src, int dst);
  45 +
36 46 #endif
2  t/binary.t
@@ -2,7 +2,7 @@
2 2
3 3 use strict;
4 4 use warnings;
5   -use Test::More tests => 3533;
  5 +use Test::More tests => 3539;
6 6 use FindBin qw($Bin);
7 7 use lib "$Bin/lib";
8 8 use MemcachedTest;
71 t/slabs_reassign.t
... ... @@ -0,0 +1,71 @@
  1 +#!/usr/bin/perl
  2 +
  3 +use strict;
  4 +use warnings;
  5 +use Test::More tests => 111;
  6 +use FindBin qw($Bin);
  7 +use lib "$Bin/lib";
  8 +use MemcachedTest;
  9 +
  10 +# Enable manual slab reassign, cap at 6 slabs
  11 +my $server = new_memcached('-o slab_reassign -m 4');
  12 +my $stats = mem_stats($server->sock, ' settings');
  13 +is($stats->{slab_reassign}, "yes");
  14 +
  15 +my $sock = $server->sock;
  16 +
  17 +# Fill a largeish slab until it evicts (honors the -m 6)
  18 +my $bigdata = 'x' x 70000; # slab 31
  19 +for (1 .. 50) {
  20 + print $sock "set bfoo$_ 0 0 70000\r\n", $bigdata, "\r\n";
  21 + is(scalar <$sock>, "STORED\r\n", "stored key");
  22 +}
  23 +
  24 +# Fill a smaller slab until it evicts
  25 +my $smalldata = 'y' x 20000; # slab 25
  26 +for (1 .. 50) {
  27 + print $sock "set sfoo$_ 0 0 20000\r\n", $smalldata, "\r\n";
  28 + is(scalar <$sock>, "STORED\r\n", "stored key");
  29 +}
  30 +
  31 +my $items_before = mem_stats($sock, "items");
  32 +isnt($items_before->{"items:31:evicted"}, 0, "slab 31 evicted is nonzero");
  33 +isnt($items_before->{"items:25:evicted"}, 0, "slab 25 evicted is nonzero");
  34 +
  35 +my $slabs_before = mem_stats($sock, "slabs");
  36 +# Move a large slab to the smaller slab
  37 +print $sock "slabs reassign 31 25\r\n";
  38 +is(scalar <$sock>, "OK\r\n", "slab rebalancer started");
  39 +
  40 +# Still working out how/if to signal the thread. For now, just sleep.
  41 +sleep 2;
  42 +
  43 +# Check that stats counters increased
  44 +my $slabs_after = mem_stats($sock, "slabs");
  45 +my $stats = mem_stats($sock);
  46 +
  47 +isnt($stats->{slabs_moved}, 0, "slabs moved is nonzero");
  48 +
  49 +# Check that slab stats reflect the change
  50 +ok($slabs_before->{"31:total_pages"} != $slabs_after->{"31:total_pages"},
  51 + "slab 31 pagecount changed");
  52 +ok($slabs_before->{"25:total_pages"} != $slabs_after->{"25:total_pages"},
  53 + "slab 25 pagecount changed");
  54 +
  55 +# Try to move another slab, see that it complains
  56 +print $sock "slabs reassign 31 25\r\n";
  57 +is(scalar <$sock>, "NOTFULL\r\n", "Cannot re-run against class with empty space");
  58 +
  59 +# Try to move a page backwards. Should complain that source class isn't "safe"
  60 +# to move from.
  61 +print $sock "slabs reassign 25 31\r\n";
  62 +is(scalar <$sock>, "UNSAFE\r\n", "Cannot move an unsafe slab back");
  63 +
  64 +# Try to insert items into both slabs
  65 +print $sock "set bfoo51 0 0 70000\r\n", $bigdata, "\r\n";
  66 +is(scalar <$sock>, "STORED\r\n", "stored key");
  67 +
  68 +print $sock "set sfoo51 0 0 20000\r\n", $smalldata, "\r\n";
  69 +is(scalar <$sock>, "STORED\r\n", "stored key");
  70 +
  71 +# Do need to come up with better automated tests for this.

0 comments on commit 10698ba

Please sign in to comment.
Something went wrong with that request. Please try again.