Skip to content

Commit

Permalink
HBASE-23304: RPCs needed for client meta information lookup (apache#904
Browse files Browse the repository at this point in the history
…) (apache#1098)

* HBASE-23257: Track clusterID in stand by masters (apache#798)

This patch implements a simple cache that all the masters
can lookup to serve cluster ID to clients. Active HMaster
is still responsible for creating it but all the masters
will read it from fs to serve clients.

RPCs exposing it will come in a separate patch as a part of
HBASE-18095.

Signed-off-by: Andrew Purtell <apurtell@apache.org>
Signed-off-by: Wellington Chevreuil <wchevreuil@apache.org>
Signed-off-by: Guangxu Cheng <guangxucheng@gmail.com>
(cherry picked from commit c2e01f2)

* HBASE-23275: Track active master's address in ActiveMasterManager (apache#812)

Currently we just track whether an active master exists.
It helps to also track the address of the active master in
all the masters to help serve the client RPC requests to
know which master is active.

Signed-off-by: Nick Dimiduk <ndimiduk@apache.org>
Signed-off-by: Andrew Purtell <apurtell@apache.org>
(cherry picked from commit efebb84)

* HBASE-23281: Track meta region locations in masters (apache#830)

* HBASE-23281: Track meta region changes on masters

This patch adds a simple cache that tracks the meta region replica
locations. It keeps an eye on the region movements so that the
cached locations are not stale.

This information is used for servicing client RPCs for connections
that use master based registry (HBASE-18095). The RPC end points
will be added in a separate patch.

Signed-off-by: Nick Dimiduk <ndimiduk@apache.org>
(cherry picked from commit 8571d38)

* HBASE-23304: RPCs needed for client meta information lookup (apache#904)

* HBASE-23304: RPCs needed for client meta information lookup

This patch implements the RPCs needed for the meta information
lookup during connection init. New tests added to cover the RPC
code paths. HBASE-23305 builds on this to implement the client
side logic.

Fixed a bunch of checkstyle nits around the places the patch
touches.

Signed-off-by: Andrew Purtell <apurtell@apache.org>
(cherry picked from commit 4f8fbba)
  • Loading branch information
bharathv committed Jan 29, 2020
1 parent 36cdcad commit 71f0354
Show file tree
Hide file tree
Showing 29 changed files with 1,180 additions and 118 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
Expand Down Expand Up @@ -80,6 +80,7 @@
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.client.RegionLoadStats;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.client.RegionStatesCount;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
Expand All @@ -93,6 +94,7 @@
import org.apache.hadoop.hbase.filter.ByteArrayComparable;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.io.TimeRange;
import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
import org.apache.hadoop.hbase.protobuf.ProtobufMessageConverter;
import org.apache.hadoop.hbase.quotas.QuotaScope;
Expand Down Expand Up @@ -375,7 +377,9 @@ private static IOException makeIOExceptionOfException(Exception e) {
* @see #toServerName(org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName)
*/
public static HBaseProtos.ServerName toServerName(final ServerName serverName) {
if (serverName == null) return null;
if (serverName == null) {
return null;
}
HBaseProtos.ServerName.Builder builder =
HBaseProtos.ServerName.newBuilder();
builder.setHostName(serverName.getHostname());
Expand Down Expand Up @@ -3071,6 +3075,44 @@ public static ProcedureDescription buildProcedureDescription(String signature, S
return builder.build();
}

/**
* Get the Meta region state from the passed data bytes. Can handle both old and new style
* server names.
* @param data protobuf serialized data with meta server name.
* @param replicaId replica ID for this region
* @return RegionState instance corresponding to the serialized data.
* @throws DeserializationException if the data is invalid.
*/
public static RegionState parseMetaRegionStateFrom(final byte[] data, int replicaId)
throws DeserializationException {
RegionState.State state = RegionState.State.OPEN;
ServerName serverName;
if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) {
try {
int prefixLen = ProtobufUtil.lengthOfPBMagic();
ZooKeeperProtos.MetaRegionServer rl =
ZooKeeperProtos.MetaRegionServer.parser().parseFrom(data, prefixLen,
data.length - prefixLen);
if (rl.hasState()) {
state = RegionState.State.convert(rl.getState());
}
HBaseProtos.ServerName sn = rl.getServer();
serverName = ServerName.valueOf(
sn.getHostName(), sn.getPort(), sn.getStartCode());
} catch (InvalidProtocolBufferException e) {
throw new DeserializationException("Unable to parse meta region location");
}
} else {
// old style of meta region location?
serverName = parseServerNameFrom(data);
}
if (serverName == null) {
state = RegionState.State.OFFLINE;
}
return new RegionState(RegionReplicaUtil.getRegionInfoForReplica(
RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId), state, serverName);
}

/**
* Get a ServerName from the passed in data bytes.
* @param data Data with a serialize server name in it; can handle the old style
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ public class ZNodePaths {
// TODO: Replace this with ZooKeeper constant when ZOOKEEPER-277 is resolved.
public static final char ZNODE_PATH_SEPARATOR = '/';

private static final String META_ZNODE_PREFIX = "meta-region-server";
public static final String META_ZNODE_PREFIX_CONF_KEY = "zookeeper.znode.metaserver";
public static final String META_ZNODE_PREFIX = "meta-region-server";
private static final String DEFAULT_SNAPSHOT_CLEANUP_ZNODE = "snapshot-cleanup";

// base znode for this cluster
Expand Down Expand Up @@ -104,7 +105,7 @@ public class ZNodePaths {
public ZNodePaths(Configuration conf) {
baseZNode = conf.get(ZOOKEEPER_ZNODE_PARENT, DEFAULT_ZOOKEEPER_ZNODE_PARENT);
ImmutableMap.Builder<Integer, String> builder = ImmutableMap.builder();
metaZNodePrefix = conf.get("zookeeper.znode.metaserver", META_ZNODE_PREFIX);
metaZNodePrefix = conf.get(META_ZNODE_PREFIX_CONF_KEY, META_ZNODE_PREFIX);
String defaultMetaReplicaZNode = ZNodePaths.joinZNode(baseZNode, metaZNodePrefix);
builder.put(DEFAULT_REPLICA_ID, defaultMetaReplicaZNode);
int numMetaReplicas = conf.getInt(META_REPLICAS_NUM, DEFAULT_META_REPLICA_NUM);
Expand Down Expand Up @@ -189,7 +190,19 @@ public String getZNodeForReplica(int replicaId) {
}

/**
* Parse the meta replicaId from the passed znode name.
* Parses the meta replicaId from the passed path.
* @param path the name of the full path which includes baseZNode.
* @return replicaId
*/
public int getMetaReplicaIdFromPath(String path) {
// Extract the znode from path. The prefix is of the following format.
// baseZNode + PATH_SEPARATOR.
int prefixLen = baseZNode.length() + 1;
return getMetaReplicaIdFromZnode(path.substring(prefixLen));
}

/**
* Parse the meta replicaId from the passed znode
* @param znode the name of the znode, does not include baseZNode
* @return replicaId
*/
Expand Down
44 changes: 44 additions & 0 deletions hbase-protocol-shaded/src/main/protobuf/Master.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1196,3 +1196,47 @@ service HbckService {
rpc FixMeta(FixMetaRequest)
returns(FixMetaResponse);
}

/** Request and response to get the clusterID for this cluster */
message GetClusterIdRequest {
}
message GetClusterIdResponse {
/** Not set if cluster ID could not be determined. */
optional string cluster_id = 1;
}

/** Request and response to get the currently active master name for this cluster */
message GetActiveMasterRequest {
}
message GetActiveMasterResponse {
/** Not set if an active master could not be determined. */
optional ServerName server_name = 1;
}

/** Request and response to get the current list of meta region locations */
message GetMetaRegionLocationsRequest {
}
message GetMetaRegionLocationsResponse {
/** Not set if meta region locations could not be determined. */
repeated RegionLocation meta_locations = 1;
}

/**
* Implements all the RPCs needed by clients to look up cluster meta information needed for connection establishment.
*/
service ClientMetaService {
/**
* Get Cluster ID for this cluster.
*/
rpc GetClusterId(GetClusterIdRequest) returns(GetClusterIdResponse);

/**
* Get active master server name for this cluster.
*/
rpc GetActiveMaster(GetActiveMasterRequest) returns(GetActiveMasterResponse);

/**
* Get current meta replicas' region locations.
*/
rpc GetMetaRegionLocations(GetMetaRegionLocationsRequest) returns(GetMetaRegionLocationsResponse);
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/**
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
Expand All @@ -17,25 +17,24 @@
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;

import java.io.IOException;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.ZNodeClearer;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
import org.apache.hadoop.hbase.zookeeper.ZKListener;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;

/**
* Handles everything on master-side related to master election.
Expand All @@ -57,12 +56,18 @@ public class ActiveMasterManager extends ZKListener {
final AtomicBoolean clusterHasActiveMaster = new AtomicBoolean(false);
final AtomicBoolean clusterShutDown = new AtomicBoolean(false);

// This server's information.
private final ServerName sn;
private int infoPort;
private final Server master;

// Active master's server name. Invalidated anytime active master changes (based on ZK
// notifications) and lazily fetched on-demand.
// ServerName is immutable, so we don't need heavy synchronization around it.
private volatile ServerName activeMasterServerName;

/**
* @param watcher
* @param watcher ZK watcher
* @param sn ServerName
* @param master In an instance of a Master.
*/
Expand Down Expand Up @@ -106,6 +111,30 @@ void handle(final String path) {
}
}

/**
* Fetches the active master's ServerName from zookeeper.
*/
private void fetchAndSetActiveMasterServerName() {
LOG.debug("Attempting to fetch active master sn from zk");
try {
activeMasterServerName = MasterAddressTracker.getMasterAddress(watcher);
} catch (IOException | KeeperException e) {
// Log and ignore for now and re-fetch later if needed.
LOG.error("Error fetching active master information", e);
}
}

public Optional<ServerName> getActiveMasterServerName() {
if (!clusterHasActiveMaster.get()) {
return Optional.empty();
}
if (activeMasterServerName == null) {
fetchAndSetActiveMasterServerName();
}
// It could still be null, but return whatever we have.
return Optional.ofNullable(activeMasterServerName);
}

/**
* Handle a change in the master node. Doesn't matter whether this was called
* from a nodeCreated or nodeDeleted event because there are no guarantees
Expand Down Expand Up @@ -134,6 +163,9 @@ private void handleMasterNodeChange() {
// Notify any thread waiting to become the active master
clusterHasActiveMaster.notifyAll();
}
// Reset the active master sn. Will be re-fetched later if needed.
// We don't want to make a synchronous RPC under a monitor.
activeMasterServerName = null;
}
} catch (KeeperException ke) {
master.abort("Received an unexpected KeeperException, aborting", ke);
Expand All @@ -151,8 +183,8 @@ private void handleMasterNodeChange() {
* @param checkInterval the interval to check if the master is stopped
* @param startupStatus the monitor status to track the progress
* @return True if no issue becoming active master else false if another
* master was running or if some other problem (zookeeper, stop flag has been
* set on this Master)
* master was running or if some other problem (zookeeper, stop flag has been
* set on this Master)
*/
boolean blockUntilBecomingActiveMaster(
int checkInterval, MonitoredTask startupStatus) {
Expand All @@ -178,10 +210,14 @@ boolean blockUntilBecomingActiveMaster(
// We are the master, return
startupStatus.setStatus("Successfully registered as active master.");
this.clusterHasActiveMaster.set(true);
activeMasterServerName = sn;
LOG.info("Registered as active master=" + this.sn);
return true;
}

// Invalidate the active master name so that subsequent requests do not get any stale
// master information. Will be re-fetched if needed.
activeMasterServerName = null;
// There is another active master running elsewhere or this is a restart
// and the master ephemeral node has not expired yet.
this.clusterHasActiveMaster.set(true);
Expand All @@ -208,7 +244,8 @@ boolean blockUntilBecomingActiveMaster(
ZKUtil.deleteNode(this.watcher, this.watcher.getZNodePaths().masterAddressZNode);

// We may have failed to delete the znode at the previous step, but
// we delete the file anyway: a second attempt to delete the znode is likely to fail again.
// we delete the file anyway: a second attempt to delete the znode is likely to fail
// again.
ZNodeClearer.deleteMyEphemeralNodeOnDisk();
} else {
msg = "Another master is the active master, " + currentMaster +
Expand Down
Loading

0 comments on commit 71f0354

Please sign in to comment.