From 5eaf15486ea458ff4fa10d8e355d28b9a7e380e1 Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Fri, 16 Oct 2020 19:11:09 -0700 Subject: [PATCH] Have REPLICATE with suffixes just append to hashes.invalid This only applies to post-sync REPLICATE calls, none of which actually look at the response anyway. Change-Id: I1de62140e7eb9a23152bb9fdb1fa0934e827bfda --- swift/obj/diskfile.py | 14 +++++++-- swift/obj/server.py | 3 +- .../probe/test_replication_servers_working.py | 10 ++++++ test/unit/obj/test_diskfile.py | 8 +++++ test/unit/obj/test_replicator.py | 9 +++--- test/unit/obj/test_server.py | 31 +++++++++++++------ 6 files changed, 58 insertions(+), 17 deletions(-) diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py index b2476a9ca5..a1287a2477 100644 --- a/swift/obj/diskfile.py +++ b/swift/obj/diskfile.py @@ -1509,13 +1509,15 @@ def get_diskfile_from_hash(self, device, partition, object_hash, partition, account, container, obj, policy=policy, **kwargs) - def get_hashes(self, device, partition, suffixes, policy): + def get_hashes(self, device, partition, suffixes, policy, + skip_rehash=False): """ :param device: name of target device :param partition: partition name :param suffixes: a list of suffix directories to be recalculated :param policy: the StoragePolicy instance + :param skip_rehash: just mark the suffixes dirty; return None :returns: a dictionary that maps suffix directories """ dev_path = self.get_dev_path(device) @@ -1524,8 +1526,14 @@ def get_hashes(self, device, partition, suffixes, policy): partition_path = get_part_path(dev_path, policy, partition) if not os.path.exists(partition_path): mkdirs(partition_path) - _junk, hashes = tpool.execute( - self._get_hashes, device, partition, policy, recalculate=suffixes) + if skip_rehash: + for suffix in suffixes or []: + invalidate_hash(os.path.join(partition_path, suffix)) + return None + else: + _junk, hashes = tpool.execute( + self._get_hashes, device, partition, policy, + recalculate=suffixes) return hashes def _listdir(self, path): diff --git a/swift/obj/server.py b/swift/obj/server.py index 5a883e4c1d..2de1453fad 100644 --- a/swift/obj/server.py +++ b/swift/obj/server.py @@ -1298,7 +1298,8 @@ def REPLICATE(self, request): suffixes = suffix_parts.split('-') if suffix_parts else [] try: hashes = self._diskfile_router[policy].get_hashes( - device, partition, suffixes, policy) + device, partition, suffixes, policy, + skip_rehash=bool(suffixes)) except DiskFileDeviceUnavailable: resp = HTTPInsufficientStorage(drive=device, request=request) else: diff --git a/test/probe/test_replication_servers_working.py b/test/probe/test_replication_servers_working.py index d70c0285bb..3bc7ed928e 100644 --- a/test/probe/test_replication_servers_working.py +++ b/test/probe/test_replication_servers_working.py @@ -149,6 +149,16 @@ def test_main(self): for directory in test_node_dir_list: self.assertIn(directory, new_dir_list[0]) + + # We want to make sure that replication is completely + # settled; any invalidated hashes should be rehashed so + # hashes.pkl is stable + for directory in os.listdir( + os.path.join(test_node, data_dir)): + hashes_invalid_path = os.path.join( + test_node, data_dir, directory, 'hashes.invalid') + self.assertEqual(os.stat( + hashes_invalid_path).st_size, 0) break except Exception: if time.time() - begin > 60: diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py index 4e3562bfb3..778a578932 100644 --- a/test/unit/obj/test_diskfile.py +++ b/test/unit/obj/test_diskfile.py @@ -6931,6 +6931,14 @@ def mock_hash_suffix(*args, **kwargs): # so hashes should have the latest suffix hash... self.assertEqual(hashes[suffix], non_local['hash']) + non_local['called'] = False + with mock.patch.object(df_mgr, '_hash_suffix', mock_hash_suffix): + df_mgr.get_hashes('sda1', '0', [suffix], policy, + skip_rehash=True) + self.assertFalse(non_local['called']) + with open(invalidations_file) as f: + self.assertEqual(suffix, f.read().strip('\n')) # sanity + def test_hash_invalidations_race_get_hashes_same_suffix_new(self): self._check_hash_invalidations_race_get_hashes_same_suffix(False) diff --git a/test/unit/obj/test_replicator.py b/test/unit/obj/test_replicator.py index acc9da9eb3..ca9c9abf05 100644 --- a/test/unit/obj/test_replicator.py +++ b/test/unit/obj/test_replicator.py @@ -2011,10 +2011,11 @@ def set_default(self): node['replication_port'], node['device'], repl_job['partition'], 'REPLICATE', '', headers=self.headers)) - reqs.append(mock.call(node['replication_ip'], - node['replication_port'], node['device'], - repl_job['partition'], 'REPLICATE', - '/a83', headers=self.headers)) + reqs.append(mock.call( + node['replication_ip'], + node['replication_port'], node['device'], + repl_job['partition'], 'REPLICATE', + '/a83', headers=self.headers)) mock_http.assert_has_calls(reqs, any_order=True) @mock.patch('swift.obj.replicator.tpool.execute') diff --git a/test/unit/obj/test_server.py b/test/unit/obj/test_server.py index 6a2b40d12c..4cac67d95a 100644 --- a/test/unit/obj/test_server.py +++ b/test/unit/obj/test_server.py @@ -6960,7 +6960,7 @@ def my_tpool_execute(func, *args, **kwargs): try: diskfile.DiskFileManager._get_hashes = fake_get_hashes tpool.execute = my_tpool_execute - req = Request.blank('/sda1/p/suff', + req = Request.blank('/sda1/p/', environ={'REQUEST_METHOD': 'REPLICATE'}, headers={}) resp = req.get_response(self.object_controller) @@ -6984,7 +6984,7 @@ def my_tpool_execute(func, *args, **kwargs): try: diskfile.DiskFileManager._get_hashes = fake_get_hashes tpool.execute = my_tpool_execute - req = Request.blank('/sda1/p/suff', + req = Request.blank('/sda1/p/', environ={'REQUEST_METHOD': 'REPLICATE'}, headers={}) with mock.patch('swift.obj.server.pickle.dumps') as fake_pickle: @@ -7012,7 +7012,7 @@ def my_tpool_execute(func, *args, **kwargs): try: diskfile.DiskFileManager._get_hashes = fake_get_hashes tpool.execute = my_tpool_execute - req = Request.blank('/sda1/p/suff', + req = Request.blank('/sda1/p/', environ={'REQUEST_METHOD': 'REPLICATE'}, headers={}) self.assertRaises(Timeout, self.object_controller.REPLICATE, req) @@ -7059,15 +7059,28 @@ def test_REPLICATE_reclaims_tombstones(self): # tombstone still exists self.assertTrue(os.path.exists(tombstone_file)) - # after reclaim REPLICATE will rehash + # after reclaim REPLICATE will mark invalid (but NOT rehash!) replicate_request = Request.blank( '/sda1/0/%s' % suffix, method='REPLICATE', headers={ 'x-backend-storage-policy-index': int(policy), }) - the_future = time() + 200 - with mock.patch('swift.obj.diskfile.time.time') as mock_time: - mock_time.return_value = the_future + with mock.patch('swift.obj.diskfile.time.time', + return_value=time() + 200): + resp = replicate_request.get_response(self.object_controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(None, pickle.loads(resp.body)) + # no rehash means tombstone still exists... + self.assertTrue(os.path.exists(tombstone_file)) + + # but at some point (like the next pre-sync REPLICATE) it rehashes + replicate_request = Request.blank( + '/sda1/0/', method='REPLICATE', + headers={ + 'x-backend-storage-policy-index': int(policy), + }) + with mock.patch('swift.obj.diskfile.time.time', + return_value=time() + 200): resp = replicate_request.get_response(self.object_controller) self.assertEqual(resp.status_int, 200) self.assertEqual({}, pickle.loads(resp.body)) @@ -7076,8 +7089,8 @@ def test_REPLICATE_reclaims_tombstones(self): # N.B. with a small reclaim age like this - if proxy clocks get far # enough out of whack ... - with mock.patch('swift.obj.diskfile.time.time') as mock_time: - mock_time.return_value = the_future + with mock.patch('swift.obj.diskfile.time.time', + return_value=time() + 200): resp = delete_request.get_response(self.object_controller) # we won't even create the tombstone self.assertFalse(os.path.exists(tombstone_file))