From 341931739a694776c64f0e2148a0ce7c71f11200 Mon Sep 17 00:00:00 2001 From: Duo Zhang Date: Tue, 9 Jun 2020 11:07:16 +0800 Subject: [PATCH] HBASE-24117 Shutdown AssignmentManager before ProcedureExecutor may cause SCP to accidentally skip assigning a region (#1865) Signed-off-by: Michael Stack (cherry picked from commit dee99336dda831e1fa69e7db0cb5bf01c8f480f7) Change-Id: I6bb4b16b68ea8c9e5baa39aa719447268da24c5c --- .../main/java/org/apache/hadoop/hbase/master/HMaster.java | 7 +++++-- .../hbase/master/procedure/ServerCrashProcedure.java | 7 +++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 0c9d6150e511..10478eb7e09c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -1460,6 +1460,11 @@ protected void stopServiceThreads() { LOG.debug("Stopping service threads"); + // stop procedure executor prior to other services such as server manager and assignment + // manager, as these services are important for some running procedures. See HBASE-24117 for + // example. + stopProcedureExecutor(); + if (this.quotaManager != null) { this.quotaManager.stop(); } @@ -1474,8 +1479,6 @@ protected void stopServiceThreads() { this.assignmentManager.stop(); } - stopProcedureExecutor(); - if (this.walManager != null) { this.walManager.stop(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java index 916acfee3d57..a2e756306155 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java @@ -24,6 +24,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfoBuilder; @@ -473,6 +474,12 @@ private void assignRegions(MasterProcedureEnv env, List regions) thr // updated the region location, or even finished itself, so the region is no longer on us // any more, we should not try to assign it again. Please see HBASE-23594 for more details. if (!serverName.equals(regionNode.getRegionLocation())) { + // See HBASE-24117, though we have already changed the shutdown order, it is still worth + // double checking here to confirm that we do not skip assignment incorrectly. + if (!am.isRunning()) { + throw new DoNotRetryIOException( + "AssignmentManager has been stopped, can not process assignment any more"); + } LOG.info("{} found a region {} which is no longer on us {}, give up assigning...", this, regionNode, serverName); continue;