@@ -95,6 +95,10 @@ start_server {tags {"defrag"}} {
9595 }
9696 if {$::verbose } {
9797 puts " frag $frag "
98+ set misses [s active_defrag_misses]
99+ set hits [s active_defrag_hits]
100+ puts " hits: $hits "
101+ puts " misses: $misses "
98102 puts " max latency $max_latency "
99103 puts [r latency latest]
100104 puts [r latency history active-defrag-cycle]
@@ -221,6 +225,10 @@ start_server {tags {"defrag"}} {
221225 }
222226 if {$::verbose } {
223227 puts " frag $frag "
228+ set misses [s active_defrag_misses]
229+ set hits [s active_defrag_hits]
230+ puts " hits: $hits "
231+ puts " misses: $misses "
224232 puts " max latency $max_latency "
225233 puts [r latency latest]
226234 puts [r latency history active-defrag-cycle]
@@ -256,11 +264,12 @@ start_server {tags {"defrag"}} {
256264 set expected_frag 1.7
257265 # add a mass of list nodes to two lists (allocations are interlaced)
258266 set val [string repeat A 100] ;# 5 items of 100 bytes puts us in the 640 bytes bin, which has 32 regs, so high potential for fragmentation
259- for {set j 0} {$j < 500000} {incr j} {
267+ set elements 500000
268+ for {set j 0} {$j < $elements } {incr j} {
260269 $rd lpush biglist1 $val
261270 $rd lpush biglist2 $val
262271 }
263- for {set j 0} {$j < 500000 } {incr j} {
272+ for {set j 0} {$j < $elements } {incr j} {
264273 $rd read ; # Discard replies
265274 $rd read ; # Discard replies
266275 }
@@ -302,6 +311,8 @@ start_server {tags {"defrag"}} {
302311
303312 # test the the fragmentation is lower
304313 after 120 ;# serverCron only updates the info once in 100ms
314+ set misses [s active_defrag_misses]
315+ set hits [s active_defrag_hits]
305316 set frag [s allocator_frag_ratio]
306317 set max_latency 0
307318 foreach event [r latency latest] {
@@ -312,6 +323,8 @@ start_server {tags {"defrag"}} {
312323 }
313324 if {$::verbose } {
314325 puts " frag $frag "
326+ puts " misses: $misses "
327+ puts " hits: $hits "
315328 puts " max latency $max_latency "
316329 puts [r latency latest]
317330 puts [r latency history active-defrag-cycle]
@@ -320,13 +333,121 @@ start_server {tags {"defrag"}} {
320333 # due to high fragmentation, 100hz, and active-defrag-cycle-max set to 75,
321334 # we expect max latency to be not much higher than 7.5ms but due to rare slowness threshold is set higher
322335 assert {$max_latency <= 30}
336+
337+ # in extreme cases of stagnation, we see over 20m misses before the tests aborts with "defrag didn't stop",
338+ # in normal cases we only see 100k misses out of 500k elements
339+ assert {$misses < $elements }
323340 }
324341 # verify the data isn't corrupted or changed
325342 set newdigest [r debug digest]
326343 assert {$digest eq $newdigest }
327344 r save ;# saving an rdb iterates over all the data / pointers
328345 r del biglist1 ;# coverage for quicklistBookmarksClear
329346 } {1}
347+
348+ test " Active defrag edge case" {
349+ # there was an edge case in defrag where all the slabs of a certain bin are exact the same
350+ # % utilization, with the exception of the current slab from which new allocations are made
351+ # if the current slab is lower in utilization the defragger would have ended up in stagnation,
352+ # keept running and not move any allocation.
353+ # this test is more consistent on a fresh server with no history
354+ start_server {tags {" defrag" }} {
355+ r flushdb
356+ r config resetstat
357+ r config set save " " ;# prevent bgsave from interfereing with save below
358+ r config set hz 100
359+ r config set activedefrag no
360+ r config set active-defrag-max-scan-fields 1000
361+ r config set active-defrag-threshold-lower 5
362+ r config set active-defrag-cycle-min 65
363+ r config set active-defrag-cycle-max 75
364+ r config set active-defrag-ignore-bytes 1mb
365+ r config set maxmemory 0
366+ set expected_frag 1.3
367+
368+ r debug mallctl-str thread.tcache.flush VOID
369+ # fill the first slab containin 32 regs of 640 bytes.
370+ for {set j 0} {$j < 32} {incr j} {
371+ r setrange " _$j " 600 x
372+ r debug mallctl-str thread.tcache.flush VOID
373+ }
374+
375+ # add a mass of keys with 600 bytes values, fill the bin of 640 bytes which has 32 regs per slab.
376+ set rd [redis_deferring_client]
377+ set keys 640000
378+ for {set j 0} {$j < $keys } {incr j} {
379+ $rd setrange $j 600 x
380+ }
381+ for {set j 0} {$j < $keys } {incr j} {
382+ $rd read ; # Discard replies
383+ }
384+
385+ # create some fragmentation of 50%
386+ set sent 0
387+ for {set j 0} {$j < $keys } {incr j 1} {
388+ $rd del $j
389+ incr sent
390+ incr j 1
391+ }
392+ for {set j 0} {$j < $sent } {incr j} {
393+ $rd read ; # Discard replies
394+ }
395+
396+ # create higher fragmentation in the first slab
397+ for {set j 10} {$j < 32} {incr j} {
398+ r del " _$j "
399+ }
400+
401+ # start defrag
402+ after 120 ;# serverCron only updates the info once in 100ms
403+ set frag [s allocator_frag_ratio]
404+ if {$::verbose } {
405+ puts " frag $frag "
406+ }
407+
408+ assert {$frag >= $expected_frag }
409+
410+ set digest [r debug digest]
411+ catch {r config set activedefrag yes} e
412+ if {![string match {DISABLED*} $e ]} {
413+ # wait for the active defrag to start working (decision once a second)
414+ wait_for_condition 50 100 {
415+ [s active_defrag_running] ne 0
416+ } else {
417+ fail " defrag not started."
418+ }
419+
420+ # wait for the active defrag to stop working
421+ wait_for_condition 500 100 {
422+ [s active_defrag_running] eq 0
423+ } else {
424+ after 120 ;# serverCron only updates the info once in 100ms
425+ puts [r info memory]
426+ puts [r info stats]
427+ puts [r memory malloc-stats]
428+ fail " defrag didn't stop."
429+ }
430+
431+ # test the the fragmentation is lower
432+ after 120 ;# serverCron only updates the info once in 100ms
433+ set misses [s active_defrag_misses]
434+ set hits [s active_defrag_hits]
435+ set frag [s allocator_frag_ratio]
436+ if {$::verbose } {
437+ puts " frag $frag "
438+ puts " hits: $hits "
439+ puts " misses: $misses "
440+ }
441+ assert {$frag < 1.1}
442+ assert {$misses < 10000000} ;# when defrag doesn't stop, we have some 30m misses, when it does, we have 2m misses
443+ }
444+
445+ # verify the data isn't corrupted or changed
446+ set newdigest [r debug digest]
447+ assert {$digest eq $newdigest }
448+ r save ;# saving an rdb iterates over all the data / pointers
449+ }
450+ }
330451 }
331452}
332453} ;# run_solo
0 commit comments