Skip to content

Commit

Permalink
HADOOP-18606. ABFS: Add reason in x-ms-client-request-id on a retried…
Browse files Browse the repository at this point in the history
… API call. (apache#5299)

Contributed by Pranav Saxena

(cherry picked from commit 358bf80)
  • Loading branch information
saxenapranav committed Jun 1, 2023
1 parent 28faec1 commit 4ad9424
Show file tree
Hide file tree
Showing 18 changed files with 1,116 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,17 @@ public final class AbfsHttpConstants {
public static final char CHAR_EQUALS = '=';
public static final char CHAR_STAR = '*';
public static final char CHAR_PLUS = '+';
/**
* Value that differentiates categories of the http_status.
* <pre>
* 100 - 199 : Informational responses
* 200 - 299 : Successful responses
* 300 - 399 : Redirection messages
* 400 - 499 : Client error responses
* 500 - 599 : Server error responses
* </pre>
*/
public static final Integer HTTP_STATUS_CATEGORY_QUOTIENT = 100;

public static final String BLOB = "Blob";
private static final String PREFIX = "Prefix";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ public String getErrorCode() {
return this.errorCode;
}

public String getErrorMessage() {
return this.errorMessage;
}

public static List<AzureServiceErrorCode> getAzureServiceCode(int httpStatusCode) {
List<AzureServiceErrorCode> errorCodes = new ArrayList<>();
if (httpStatusCode == UNKNOWN.httpStatusCode) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.fs.azurebfs.AbfsStatistic;
import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants;
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException;
Expand Down Expand Up @@ -77,6 +78,12 @@ public class AbfsRestOperation {
private AbfsHttpOperation result;
private AbfsCounters abfsCounters;

/**
* This variable contains the reason of last API call within the same
* AbfsRestOperation object.
*/
private String failureReason;

/**
* Checks if there is non-null HTTP response.
* @return true if there is a non-null HTTP response from the ABFS call.
Expand Down Expand Up @@ -212,7 +219,7 @@ public void execute(TracingContext tracingContext)
private void completeExecute(TracingContext tracingContext)
throws AzureBlobFileSystemException {
// see if we have latency reports from the previous requests
String latencyHeader = this.client.getAbfsPerfTracker().getClientLatency();
String latencyHeader = getClientLatency();
if (latencyHeader != null && !latencyHeader.isEmpty()) {
AbfsHttpHeader httpHeader =
new AbfsHttpHeader(HttpHeaderConfigurations.X_MS_ABFS_CLIENT_LATENCY, latencyHeader);
Expand Down Expand Up @@ -251,6 +258,11 @@ private void completeExecute(TracingContext tracingContext)
LOG.trace("{} REST operation complete", operationType);
}

@VisibleForTesting
String getClientLatency() {
return client.getAbfsPerfTracker().getClientLatency();
}

/**
* Sign an operation.
* @param httpOperation operation to sign
Expand Down Expand Up @@ -296,7 +308,7 @@ private boolean executeHttpOperation(final int retryCount,
// initialize the HTTP request and open the connection
httpOperation = createHttpOperation();
incrementCounter(AbfsStatistic.CONNECTIONS_MADE, 1);
tracingContext.constructHeader(httpOperation);
tracingContext.constructHeader(httpOperation, failureReason);

signRequest(httpOperation, hasRequestBody ? bufferLength : 0);
} catch (IOException e) {
Expand Down Expand Up @@ -332,7 +344,8 @@ private boolean executeHttpOperation(final int retryCount,
} catch (UnknownHostException ex) {
String hostname = null;
hostname = httpOperation.getHost();
LOG.warn("Unknown host name: %s. Retrying to resolve the host name...",
failureReason = RetryReason.getAbbreviation(ex, null, null);
LOG.warn("Unknown host name: {}. Retrying to resolve the host name...",
hostname);
if (!client.getRetryPolicy().shouldRetry(retryCount, -1)) {
throw new InvalidAbfsRestOperationException(ex, retryCount);
Expand All @@ -343,6 +356,8 @@ private boolean executeHttpOperation(final int retryCount,
LOG.debug("HttpRequestFailure: {}, {}", httpOperation.toString(), ex);
}

failureReason = RetryReason.getAbbreviation(ex, -1, "");

if (!client.getRetryPolicy().shouldRetry(retryCount, -1)) {
throw new InvalidAbfsRestOperationException(ex, retryCount);
}
Expand All @@ -368,6 +383,8 @@ private boolean executeHttpOperation(final int retryCount,
LOG.debug("HttpRequest: {}: {}", operationType, httpOperation.toString());

if (client.getRetryPolicy().shouldRetry(retryCount, httpOperation.getStatusCode())) {
int status = httpOperation.getStatusCode();
failureReason = RetryReason.getAbbreviation(null, status, httpOperation.getStorageErrorMessage());
return false;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs.azurebfs.services;

import java.util.LinkedList;
import java.util.List;

import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ClientErrorRetryReason;
import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ConnectionResetRetryReason;
import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ConnectionTimeoutRetryReason;
import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ReadTimeoutRetryReason;
import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.RetryReasonCategory;
import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.ServerErrorRetryReason;
import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.UnknownHostRetryReason;
import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.UnknownIOExceptionRetryReason;
import org.apache.hadoop.fs.azurebfs.services.retryReasonCategories.UnknownSocketExceptionRetryReason;


/**
* This utility class exposes methods to convert a server response-error to a
* category of error.
*/
final class RetryReason {

/**
* Linked-list of the implementations of RetryReasonCategory. The objects in the
* list are arranged by the rank of their significance.
* <ul>
* <li>ServerError (statusCode==5XX), ClientError (statusCode==4XX) are
* independent of other retryReason categories.</li>
* <li>Since {@link java.net.SocketException} is subclass of
* {@link java.io.IOException},
* hence, {@link UnknownIOExceptionRetryReason} is placed before
* {@link UnknownSocketExceptionRetryReason}</li>
* <li>Since, connectionTimeout, readTimeout, and connectionReset are
* {@link java.net.SocketTimeoutException} exceptions with different messages,
* hence, {@link ConnectionTimeoutRetryReason}, {@link ReadTimeoutRetryReason},
* {@link ConnectionResetRetryReason} are above {@link UnknownIOExceptionRetryReason}.
* There is no order between the three reasons as they are differentiated
* by exception-message.</li>
* <li>Since, {@link java.net.UnknownHostException} is subclass of
* {@link java.io.IOException}, {@link UnknownHostRetryReason} is placed
* over {@link UnknownIOExceptionRetryReason}</li>
* </ul>
*/
private static List<RetryReasonCategory> rankedReasonCategories
= new LinkedList<RetryReasonCategory>() {{
add(new ServerErrorRetryReason());
add(new ClientErrorRetryReason());
add(new UnknownIOExceptionRetryReason());
add(new UnknownSocketExceptionRetryReason());
add(new ConnectionTimeoutRetryReason());
add(new ReadTimeoutRetryReason());
add(new UnknownHostRetryReason());
add(new ConnectionResetRetryReason());
}};

private RetryReason() {

}

/**
* Method to get correct abbreviation for a given set of exception, statusCode,
* storageStatusCode.
*
* @param ex exception caught during server communication.
* @param statusCode statusCode in the server response.
* @param storageErrorMessage storageErrorMessage in the server response.
*
* @return abbreviation for the the given set of exception, statusCode, storageStatusCode.
*/
static String getAbbreviation(Exception ex,
Integer statusCode,
String storageErrorMessage) {
String result = null;
for (RetryReasonCategory retryReasonCategory : rankedReasonCategories) {
final String abbreviation
= retryReasonCategory.captureAndGetAbbreviation(ex,
statusCode, storageErrorMessage);
if (abbreviation != null) {
result = abbreviation;
}
}
return result;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs.azurebfs.services;

public final class RetryReasonConstants {

private RetryReasonConstants() {

}
public static final String CONNECTION_TIMEOUT_JDK_MESSAGE = "connect timed out";
public static final String READ_TIMEOUT_JDK_MESSAGE = "Read timed out";
public static final String CONNECTION_RESET_MESSAGE = "Connection reset";
public static final String OPERATION_BREACH_MESSAGE = "Operations per second is over the account limit.";
public static final String CONNECTION_RESET_ABBREVIATION = "CR";
public static final String CONNECTION_TIMEOUT_ABBREVIATION = "CT";
public static final String READ_TIMEOUT_ABBREVIATION = "RT";
public static final String INGRESS_LIMIT_BREACH_ABBREVIATION = "ING";
public static final String EGRESS_LIMIT_BREACH_ABBREVIATION = "EGR";
public static final String OPERATION_LIMIT_BREACH_ABBREVIATION = "OPR";
public static final String UNKNOWN_HOST_EXCEPTION_ABBREVIATION = "UH";
public static final String IO_EXCEPTION_ABBREVIATION = "IOE";
public static final String SOCKET_EXCEPTION_ABBREVIATION = "SE";
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories;

import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_STATUS_CATEGORY_QUOTIENT;

/**
* Category that can capture server-response errors for 4XX status-code.
*/
public class ClientErrorRetryReason extends RetryReasonCategory {

@Override
Boolean canCapture(final Exception ex,
final Integer statusCode,
final String serverErrorMessage) {
if (statusCode == null || statusCode / HTTP_STATUS_CATEGORY_QUOTIENT != 4) {
return false;
}
return true;
}

@Override
String getAbbreviation(final Integer statusCode,
final String serverErrorMessage) {
return statusCode + "";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories;

import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_RESET_ABBREVIATION;
import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_RESET_MESSAGE;

/**
* Category that can capture server-response errors for connection-reset exception.
*/
public class ConnectionResetRetryReason extends
RetryReasonCategory {

@Override
Boolean canCapture(final Exception ex,
final Integer statusCode,
final String serverErrorMessage) {
return checkExceptionMessage(ex, CONNECTION_RESET_MESSAGE);
}

@Override
String getAbbreviation(final Integer statusCode,
final String serverErrorMessage) {
return CONNECTION_RESET_ABBREVIATION;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs.azurebfs.services.retryReasonCategories;


import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_ABBREVIATION;
import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_JDK_MESSAGE;

/**
* Category that can capture server-response errors for connection-timeout.
*/
public class ConnectionTimeoutRetryReason extends
RetryReasonCategory {

@Override
String getAbbreviation(final Integer statusCode,
final String serverErrorMessage) {
return CONNECTION_TIMEOUT_ABBREVIATION;
}

@Override
Boolean canCapture(final Exception ex,
final Integer statusCode,
final String serverErrorMessage) {
return checkExceptionMessage(ex, CONNECTION_TIMEOUT_JDK_MESSAGE);
}
}

0 comments on commit 4ad9424

Please sign in to comment.