Skip to content

Commit

Permalink
Fix ring-builder crash
Browse files Browse the repository at this point in the history
If you had a partition with 1 replica on a deleted device, 1 on a
zero-weight device, and 1 on a normal device, then rebalancing would
crash. This was because the ring builder was memoizing tiers_for_dev()
for all *available* devices (i.e. weight > 0 and not deleted), but
depended on it being present for all devices with partitions still on
them.

Since the builder moved the replica from the deleted device, it left
the one on the zero-weight device alone, so you had an unavailable
device with a partition replica still on it, triggering the crash.

Now we go ahead and memoize tiers_for_dev() for *all* devices, not
just available ones, thereby fixing the crash.

Change-Id: Ie0b58b65e0353732cf785ab772e95e699f3a5b5d
  • Loading branch information
smerritt committed Mar 11, 2014
1 parent 86668aa commit e7fb089
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 3 deletions.
4 changes: 2 additions & 2 deletions swift/common/ring/builder.py
Expand Up @@ -754,6 +754,7 @@ def _reassign_parts(self, reassign_parts):
"""
for dev in self._iter_devs():
dev['sort_key'] = self._sort_key_for(dev)
dev['tiers'] = tiers_for_dev(dev)

available_devs = \
sorted((d for d in self._iter_devs() if d['weight']),
Expand All @@ -764,7 +765,6 @@ def _reassign_parts(self, reassign_parts):
tier2dev_sort_key = defaultdict(list)
max_tier_depth = 0
for dev in available_devs:
dev['tiers'] = tiers_for_dev(dev)
for tier in dev['tiers']:
tier2devs[tier].append(dev) # <-- starts out sorted!
tier2dev_sort_key[tier].append(dev['sort_key'])
Expand Down Expand Up @@ -889,7 +889,7 @@ def _reassign_parts(self, reassign_parts):
# Just to save memory and keep from accidental reuse.
for dev in self._iter_devs():
del dev['sort_key']
dev.pop('tiers', None) # May be absent for devices w/o weight
del dev['tiers']

def _sort_key_for(self, dev):
return (dev['parts_wanted'], random.randint(0, 0xFFFF), dev['id'])
Expand Down
29 changes: 28 additions & 1 deletion test/unit/common/ring/test_builder.py
Expand Up @@ -107,6 +107,33 @@ def test_rebalance_with_seed(self):
self.assertNotEquals(r0.to_dict(), r1.to_dict())
self.assertEquals(r1.to_dict(), r2.to_dict())

def test_rebalance_part_on_deleted_other_part_on_drained(self):
rb = ring.RingBuilder(8, 3, 1)
rb.add_dev({'id': 0, 'region': 1, 'zone': 1, 'weight': 1,
'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'})
rb.add_dev({'id': 1, 'region': 1, 'zone': 1, 'weight': 1,
'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'})
rb.add_dev({'id': 2, 'region': 1, 'zone': 1, 'weight': 1,
'ip': '127.0.0.1', 'port': 10002, 'device': 'sda1'})
rb.add_dev({'id': 3, 'region': 1, 'zone': 1, 'weight': 1,
'ip': '127.0.0.1', 'port': 10003, 'device': 'sda1'})
rb.add_dev({'id': 4, 'region': 1, 'zone': 1, 'weight': 1,
'ip': '127.0.0.1', 'port': 10004, 'device': 'sda1'})
rb.add_dev({'id': 5, 'region': 1, 'zone': 1, 'weight': 1,
'ip': '127.0.0.1', 'port': 10005, 'device': 'sda1'})

rb.rebalance(seed=1)
# We want a partition where 1 replica is on a removed device, 1
# replica is on a 0-weight device, and 1 on a normal device. To
# guarantee we have one, we see where partition 123 is, then
# manipulate its devices accordingly.
zero_weight_dev_id = rb._replica2part2dev[1][123]
delete_dev_id = rb._replica2part2dev[2][123]

rb.set_dev_weight(zero_weight_dev_id, 0.0)
rb.remove_dev(delete_dev_id)
rb.rebalance()

def test_set_replicas(self):
rb = ring.RingBuilder(8, 3.2, 1)
rb.devs_changed = False
Expand All @@ -125,7 +152,7 @@ def test_add_dev(self):
self.assertRaises(exceptions.DuplicateDeviceError, rb.add_dev, dev)
self.assertEqual(dev_id, 0)
rb = ring.RingBuilder(8, 3, 1)
#test add new dev with no id
# test add new dev with no id
dev_id = rb.add_dev({'zone': 0, 'region': 1, 'weight': 1,
'ip': '127.0.0.1', 'port': 6000})
self.assertEquals(rb.devs[0]['id'], 0)
Expand Down

0 comments on commit e7fb089

Please sign in to comment.