forked from apache/ozone
-
Notifications
You must be signed in to change notification settings - Fork 0
/
XceiverClientGrpc.java
498 lines (459 loc) · 18.7 KB
/
XceiverClientGrpc.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdds.scm;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdds.HddsUtils;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.DatanodeBlockID;
import org.apache.hadoop.hdds.protocol.datanode.proto.XceiverClientProtocolServiceGrpc;
import org.apache.hadoop.hdds.protocol.datanode.proto.XceiverClientProtocolServiceGrpc.XceiverClientProtocolServiceStub;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.client.HddsClientUtils;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
import org.apache.hadoop.hdds.scm.storage.CheckedBiFunction;
import org.apache.hadoop.hdds.security.exception.SCMSecurityException;
import org.apache.hadoop.hdds.security.x509.SecurityConfig;
import org.apache.hadoop.hdds.tracing.GrpcClientInterceptor;
import org.apache.hadoop.hdds.tracing.TracingUtil;
import org.apache.hadoop.ozone.OzoneConfigKeys;
import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.util.Time;
import io.opentracing.Scope;
import io.opentracing.util.GlobalTracer;
import org.apache.ratis.thirdparty.io.grpc.ManagedChannel;
import org.apache.ratis.thirdparty.io.grpc.Status;
import org.apache.ratis.thirdparty.io.grpc.netty.GrpcSslContexts;
import org.apache.ratis.thirdparty.io.grpc.netty.NettyChannelBuilder;
import org.apache.ratis.thirdparty.io.grpc.stub.StreamObserver;
import org.apache.ratis.thirdparty.io.netty.handler.ssl.SslContextBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.security.cert.X509Certificate;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
/**
* A Client for the storageContainer protocol for read object data.
*/
public class XceiverClientGrpc extends XceiverClientSpi {
static final Logger LOG = LoggerFactory.getLogger(XceiverClientGrpc.class);
private static final String COMPONENT = "dn";
private final Pipeline pipeline;
private final Configuration config;
private Map<UUID, XceiverClientProtocolServiceStub> asyncStubs;
private XceiverClientMetrics metrics;
private Map<UUID, ManagedChannel> channels;
private final Semaphore semaphore;
private boolean closed = false;
private SecurityConfig secConfig;
private final boolean topologyAwareRead;
private X509Certificate caCert;
// Cache the DN which returned the GetBlock command so that the ReadChunk
// command can be sent to the same DN.
private Map<DatanodeBlockID, DatanodeDetails> getBlockDNcache;
/**
* Constructs a client that can communicate with the Container framework on
* data nodes.
*
* @param pipeline - Pipeline that defines the machines.
* @param config -- Ozone Config
* @param caCert - SCM ca certificate.
*/
public XceiverClientGrpc(Pipeline pipeline, Configuration config,
X509Certificate caCert) {
super();
Preconditions.checkNotNull(pipeline);
Preconditions.checkNotNull(config);
this.pipeline = pipeline;
this.config = config;
this.secConfig = new SecurityConfig(config);
this.semaphore =
new Semaphore(HddsClientUtils.getMaxOutstandingRequests(config));
this.metrics = XceiverClientManager.getXceiverClientMetrics();
this.channels = new HashMap<>();
this.asyncStubs = new HashMap<>();
this.topologyAwareRead = config.getBoolean(
OzoneConfigKeys.OZONE_NETWORK_TOPOLOGY_AWARE_READ_KEY,
OzoneConfigKeys.OZONE_NETWORK_TOPOLOGY_AWARE_READ_DEFAULT);
this.caCert = caCert;
this.getBlockDNcache = new ConcurrentHashMap<>();
}
/**
* Constructs a client that can communicate with the Container framework on
* data nodes.
*
* @param pipeline - Pipeline that defines the machines.
* @param config -- Ozone Config
*/
public XceiverClientGrpc(Pipeline pipeline, Configuration config) {
this(pipeline, config, null);
}
/**
* To be used when grpc token is not enabled.
*/
@Override
public void connect() throws Exception {
// connect to the closest node, if closest node doesn't exist, delegate to
// first node, which is usually the leader in the pipeline.
DatanodeDetails dn = topologyAwareRead ? this.pipeline.getClosestNode() :
this.pipeline.getFirstNode();
// just make a connection to the picked datanode at the beginning
connectToDatanode(dn, null);
}
/**
* Passed encoded token to GRPC header when security is enabled.
*/
@Override
public void connect(String encodedToken) throws Exception {
// connect to the closest node, if closest node doesn't exist, delegate to
// first node, which is usually the leader in the pipeline.
DatanodeDetails dn = topologyAwareRead ? this.pipeline.getClosestNode() :
this.pipeline.getFirstNode();
// just make a connection to the picked datanode at the beginning
connectToDatanode(dn, encodedToken);
}
private synchronized void connectToDatanode(DatanodeDetails dn,
String encodedToken) throws IOException {
if (isConnected(dn)){
return;
}
// read port from the data node, on failure use default configured
// port.
int port = dn.getPort(DatanodeDetails.Port.Name.STANDALONE).getValue();
if (port == 0) {
port = config.getInt(OzoneConfigKeys.DFS_CONTAINER_IPC_PORT,
OzoneConfigKeys.DFS_CONTAINER_IPC_PORT_DEFAULT);
}
// Add credential context to the client call
if (LOG.isDebugEnabled()) {
LOG.debug("Nodes in pipeline : {}", pipeline.getNodes().toString());
LOG.debug("Connecting to server : {}", dn.getIpAddress());
}
NettyChannelBuilder channelBuilder =
NettyChannelBuilder.forAddress(dn.getIpAddress(), port).usePlaintext()
.maxInboundMessageSize(OzoneConsts.OZONE_SCM_CHUNK_MAX_SIZE)
.intercept(new GrpcClientInterceptor());
if (secConfig.isGrpcTlsEnabled()) {
SslContextBuilder sslContextBuilder = GrpcSslContexts.forClient();
if (caCert != null) {
sslContextBuilder.trustManager(caCert);
}
if (secConfig.useTestCert()) {
channelBuilder.overrideAuthority("localhost");
}
channelBuilder.useTransportSecurity().
sslContext(sslContextBuilder.build());
} else {
channelBuilder.usePlaintext();
}
ManagedChannel channel = channelBuilder.build();
XceiverClientProtocolServiceStub asyncStub =
XceiverClientProtocolServiceGrpc.newStub(channel);
asyncStubs.put(dn.getUuid(), asyncStub);
channels.put(dn.getUuid(), channel);
}
/**
* Returns if the xceiver client connects to all servers in the pipeline.
*
* @return True if the connection is alive, false otherwise.
*/
@VisibleForTesting
public boolean isConnected(DatanodeDetails details) {
return isConnected(channels.get(details.getUuid()));
}
private boolean isConnected(ManagedChannel channel) {
return channel != null && !channel.isTerminated() && !channel.isShutdown();
}
@Override
public synchronized void close() {
closed = true;
for (ManagedChannel channel : channels.values()) {
channel.shutdownNow();
try {
channel.awaitTermination(60, TimeUnit.MINUTES);
} catch (Exception e) {
LOG.error("Unexpected exception while waiting for channel termination",
e);
}
}
}
@Override
public Pipeline getPipeline() {
return pipeline;
}
@Override
public ContainerCommandResponseProto sendCommand(
ContainerCommandRequestProto request) throws IOException {
try {
XceiverClientReply reply;
reply = sendCommandWithTraceIDAndRetry(request, null);
return reply.getResponse().get();
} catch (ExecutionException | InterruptedException e) {
throw new IOException("Failed to execute command " + request, e);
}
}
@Override
public ContainerCommandResponseProto sendCommand(
ContainerCommandRequestProto request, List<CheckedBiFunction> validators)
throws IOException {
try {
XceiverClientReply reply;
reply = sendCommandWithTraceIDAndRetry(request, validators);
return reply.getResponse().get();
} catch (ExecutionException | InterruptedException e) {
throw new IOException("Failed to execute command " + request, e);
}
}
private XceiverClientReply sendCommandWithTraceIDAndRetry(
ContainerCommandRequestProto request, List<CheckedBiFunction> validators)
throws IOException {
try (Scope scope = GlobalTracer.get()
.buildSpan("XceiverClientGrpc." + request.getCmdType().name())
.startActive(true)) {
ContainerCommandRequestProto finalPayload =
ContainerCommandRequestProto.newBuilder(request)
.setTraceID(TracingUtil.exportCurrentSpan()).build();
return sendCommandWithRetry(finalPayload, validators);
}
}
private XceiverClientReply sendCommandWithRetry(
ContainerCommandRequestProto request, List<CheckedBiFunction> validators)
throws IOException {
ContainerCommandResponseProto responseProto = null;
IOException ioException = null;
// In case of an exception or an error, we will try to read from the
// datanodes in the pipeline in a round robin fashion.
// TODO: cache the correct leader info in here, so that any subsequent calls
// should first go to leader
XceiverClientReply reply = new XceiverClientReply(null);
List<DatanodeDetails> datanodeList = null;
DatanodeBlockID blockID = null;
if (request.getCmdType() == ContainerProtos.Type.ReadChunk) {
blockID = request.getReadChunk().getBlockID();
} else if (request.getCmdType() == ContainerProtos.Type.GetSmallFile) {
blockID = request.getGetSmallFile().getBlock().getBlockID();
}
if (blockID != null) {
// Check if the DN to which the GetBlock command was sent has been cached.
DatanodeDetails cachedDN = getBlockDNcache.get(blockID);
if (cachedDN != null) {
datanodeList = pipeline.getNodes();
int getBlockDNCacheIndex = datanodeList.indexOf(cachedDN);
if (getBlockDNCacheIndex > 0) {
// Pull the Cached DN to the top of the DN list
Collections.swap(datanodeList, 0, getBlockDNCacheIndex);
}
} else if (topologyAwareRead) {
datanodeList = pipeline.getNodesInOrder();
}
}
if (datanodeList == null) {
datanodeList = pipeline.getNodes();
// Shuffle datanode list so that clients do not read in the same order
// every time.
Collections.shuffle(datanodeList);
}
for (DatanodeDetails dn : datanodeList) {
try {
if (LOG.isDebugEnabled()) {
LOG.debug("Executing command " + request + " on datanode " + dn);
}
// In case the command gets retried on a 2nd datanode,
// sendCommandAsyncCall will create a new channel and async stub
// in case these don't exist for the specific datanode.
reply.addDatanode(dn);
responseProto = sendCommandAsync(request, dn).getResponse().get();
if (validators != null && !validators.isEmpty()) {
for (CheckedBiFunction validator : validators) {
validator.apply(request, responseProto);
}
}
if (request.getCmdType() == ContainerProtos.Type.GetBlock) {
DatanodeBlockID getBlockID = request.getGetBlock().getBlockID();
getBlockDNcache.put(getBlockID, dn);
}
break;
} catch (ExecutionException | InterruptedException | IOException e) {
LOG.debug("Failed to execute command {} on datanode {}",
request, dn.getUuid(), e);
if (!(e instanceof IOException)) {
if (Status.fromThrowable(e.getCause()).getCode()
== Status.UNAUTHENTICATED.getCode()) {
throw new SCMSecurityException("Failed to authenticate with "
+ "GRPC XceiverServer with Ozone block token.");
}
ioException = new IOException(e);
} else {
ioException = (IOException) e;
}
responseProto = null;
}
}
if (responseProto != null) {
reply.setResponse(CompletableFuture.completedFuture(responseProto));
return reply;
} else {
Preconditions.checkNotNull(ioException);
LOG.error("Failed to execute command {} on the pipeline {}.", request,
pipeline);
throw ioException;
}
}
// TODO: for a true async API, once the waitable future while executing
// the command on one channel fails, it should be retried asynchronously
// on the future Task for all the remaining datanodes.
// Note: this Async api is not used currently used in any active I/O path.
// In case it gets used, the asynchronous retry logic needs to be plugged
// in here.
/**
* Sends a given command to server gets a waitable future back.
*
* @param request Request
* @return Response to the command
* @throws IOException
*/
@Override
public XceiverClientReply sendCommandAsync(
ContainerCommandRequestProto request)
throws IOException, ExecutionException, InterruptedException {
try (Scope scope = GlobalTracer.get()
.buildSpan("XceiverClientGrpc." + request.getCmdType().name())
.startActive(true)) {
ContainerCommandRequestProto finalPayload =
ContainerCommandRequestProto.newBuilder(request)
.setTraceID(TracingUtil.exportCurrentSpan())
.build();
XceiverClientReply asyncReply =
sendCommandAsync(finalPayload, pipeline.getFirstNode());
// TODO : for now make this API sync in nature as async requests are
// served out of order over XceiverClientGrpc. This needs to be fixed
// if this API is to be used for I/O path. Currently, this is not
// used for Read/Write Operation but for tests.
if (!HddsUtils.isReadOnly(request)) {
asyncReply.getResponse().get();
}
return asyncReply;
}
}
private XceiverClientReply sendCommandAsync(
ContainerCommandRequestProto request, DatanodeDetails dn)
throws IOException, InterruptedException {
checkOpen(dn, request.getEncodedToken());
UUID dnId = dn.getUuid();
if (LOG.isDebugEnabled()) {
LOG.debug("Send command {} to datanode {}",
request.getCmdType().toString(), dn.getNetworkFullPath());
}
final CompletableFuture<ContainerCommandResponseProto> replyFuture =
new CompletableFuture<>();
semaphore.acquire();
long requestTime = Time.monotonicNowNanos();
metrics.incrPendingContainerOpsMetrics(request.getCmdType());
// create a new grpc stream for each non-async call.
// TODO: for async calls, we should reuse StreamObserver resources.
final StreamObserver<ContainerCommandRequestProto> requestObserver =
asyncStubs.get(dnId)
.send(new StreamObserver<ContainerCommandResponseProto>() {
@Override
public void onNext(ContainerCommandResponseProto value) {
replyFuture.complete(value);
metrics.decrPendingContainerOpsMetrics(request.getCmdType());
metrics.addContainerOpsLatency(request.getCmdType(),
Time.monotonicNowNanos() - requestTime);
semaphore.release();
}
@Override
public void onError(Throwable t) {
replyFuture.completeExceptionally(t);
metrics.decrPendingContainerOpsMetrics(request.getCmdType());
metrics.addContainerOpsLatency(request.getCmdType(),
Time.monotonicNowNanos() - requestTime);
semaphore.release();
}
@Override
public void onCompleted() {
if (!replyFuture.isDone()) {
replyFuture.completeExceptionally(new IOException(
"Stream completed but no reply for request " + request));
}
}
});
requestObserver.onNext(request);
requestObserver.onCompleted();
return new XceiverClientReply(replyFuture);
}
private synchronized void checkOpen(DatanodeDetails dn, String encodedToken)
throws IOException{
if (closed) {
throw new IOException("This channel is not connected.");
}
ManagedChannel channel = channels.get(dn.getUuid());
// If the channel doesn't exist for this specific datanode or the channel
// is closed, just reconnect
if (!isConnected(channel)) {
reconnect(dn, encodedToken);
}
}
private void reconnect(DatanodeDetails dn, String encodedToken)
throws IOException {
ManagedChannel channel;
try {
connectToDatanode(dn, encodedToken);
channel = channels.get(dn.getUuid());
} catch (Exception e) {
LOG.error("Error while connecting: ", e);
throw new IOException(e);
}
if (channel == null || !isConnected(channel)) {
throw new IOException("This channel is not connected.");
}
}
@Override
public XceiverClientReply watchForCommit(long index, long timeout)
throws InterruptedException, ExecutionException, TimeoutException,
IOException {
// there is no notion of watch for commit index in standalone pipeline
return null;
};
public long getReplicatedMinCommitIndex() {
return 0;
}
/**
* Returns pipeline Type.
*
* @return - Stand Alone as the type.
*/
@Override
public HddsProtos.ReplicationType getPipelineType() {
return HddsProtos.ReplicationType.STAND_ALONE;
}
}