diff --git a/ambry-api/src/main/java/com/github/ambry/config/RouterConfig.java b/ambry-api/src/main/java/com/github/ambry/config/RouterConfig.java index abe9ece13c..46d5bc4a47 100644 --- a/ambry-api/src/main/java/com/github/ambry/config/RouterConfig.java +++ b/ambry-api/src/main/java/com/github/ambry/config/RouterConfig.java @@ -130,6 +130,8 @@ public class RouterConfig { public static final String ROUTER_STORE_KEY_CONVERTER_FACTORY = "router.store.key.converter.factory"; public static final String ROUTER_UNAVAILABLE_DUE_TO_OFFLINE_REPLICAS = "router.unavailable.due.to.offline.replicas"; public static final String ROUTER_NOT_FOUND_CACHE_TTL_IN_MS = "router.not.found.cache.ttl.in.ms"; + public static final String ROUTER_NAMED_BLOB_TRANSLATE_NOT_FOUND_TO_UNAVAILABLE_ENABLED = + "router.named.blob.translate.not.found.to.unavailable.enabled"; public static final String ROUTER_UPDATE_OP_METADATA_RELIANCE_TIMESTAMP_IN_MS = "router.update.op.metadata.reliance.timestamp.in.ms"; public static final String ROUTER_UNAVAILABLE_DUE_TO_SUCCESS_COUNT_IS_NON_ZERO_FOR_DELETE = @@ -647,6 +649,17 @@ public class RouterConfig { @Default("15*1000") public final long routerNotFoundCacheTtlInMs; + /** + * If {@code true}, when a named blob's metadata row exists but storage replicas return + * {@link com.github.ambry.router.RouterErrorCode#BlobDoesNotExist}, the router translates the + * error to {@link com.github.ambry.router.RouterErrorCode#AmbryUnavailable} (retryable 503) + * instead of letting it surface as an authoritative 404. Acts as a kill switch for the + * behavior added in PR #3234 / AMBRY-14247. + */ + @Config(ROUTER_NAMED_BLOB_TRANSLATE_NOT_FOUND_TO_UNAVAILABLE_ENABLED) + @Default("true") + public final boolean routerNamedBlobTranslateNotFoundToUnavailableEnabled; + public static final String ROUTER_BLOB_METADATA_CACHE_ID = "router.blob.metadata.cache.id"; @Config(ROUTER_BLOB_METADATA_CACHE_ID) public final String routerBlobMetadataCacheId; @@ -935,6 +948,8 @@ public RouterConfig(VerifiableProperties verifiableProperties) { verifiableProperties.getString(OPERATION_CONTROLLER, "com.github.ambry.router.OperationController"); routerNotFoundCacheTtlInMs = verifiableProperties.getLongInRange(ROUTER_NOT_FOUND_CACHE_TTL_IN_MS, 15 * 1000L, 0, ROUTER_NOT_FOUND_CACHE_MAX_TTL_IN_MS); + routerNamedBlobTranslateNotFoundToUnavailableEnabled = + verifiableProperties.getBoolean(ROUTER_NAMED_BLOB_TRANSLATE_NOT_FOUND_TO_UNAVAILABLE_ENABLED, true); routerUpdateOpMetadataRelianceTimestampInMs = verifiableProperties.getLong(ROUTER_UPDATE_OP_METADATA_RELIANCE_TIMESTAMP_IN_MS, DEFAULT_ROUTER_UPDATE_OP_METADATA_RELIANCE_TIMESTAMP_IN_MS); diff --git a/ambry-router/src/main/java/com/github/ambry/router/NonBlockingRouter.java b/ambry-router/src/main/java/com/github/ambry/router/NonBlockingRouter.java index 8775a42ff2..ddd6d98eb5 100644 --- a/ambry-router/src/main/java/com/github/ambry/router/NonBlockingRouter.java +++ b/ambry-router/src/main/java/com/github/ambry/router/NonBlockingRouter.java @@ -351,6 +351,9 @@ private Exception translateNamedBlobMissingInStorage(String resolvedBlobId, Exce if (e instanceof RouterException && ((RouterException) e).getErrorCode() == RouterErrorCode.BlobDoesNotExist) { routerMetrics.namedBlobMetadataExistsButStorageNotFoundCount.inc(); + if (!routerConfig.routerNamedBlobTranslateNotFoundToUnavailableEnabled) { + return e; + } logger.warn("Named blob metadata exists but storage returned BlobNotFound for blob {}; " + "translating to AmbryUnavailable (retryable 503)", resolvedBlobId); return new RouterException( diff --git a/ambry-router/src/test/java/com/github/ambry/router/NonBlockingRouterTest.java b/ambry-router/src/test/java/com/github/ambry/router/NonBlockingRouterTest.java index 104674ef66..1950ad8aa8 100644 --- a/ambry-router/src/test/java/com/github/ambry/router/NonBlockingRouterTest.java +++ b/ambry-router/src/test/java/com/github/ambry/router/NonBlockingRouterTest.java @@ -1270,6 +1270,24 @@ public void testNamedBlobMetadataNotFoundStillReturnsNotFound() throws Exception } } + /** With the gating config disabled, storage NOT_FOUND surfaces as BlobDoesNotExist (no translation). */ + @Test + public void testNamedBlobMissingInStorageNotTranslatedWhenConfigDisabled() throws Exception { + AtomicReference storedBlobId = new AtomicReference<>(); + Properties props = getNonBlockingRouterProperties(localDcName); + props.setProperty("router.named.blob.translate.not.found.to.unavailable.enabled", "false"); + MockServerLayout layout = setUpNamedBlobAndPut(storedBlobId, props); + try { + layout.getMockServers().forEach(s -> s.setServerErrorForAllRequests(ServerErrorCode.BlobNotFound)); + Assert.assertEquals(RouterErrorCode.BlobDoesNotExist, + ((RouterException) expectGetFailure("a", "c", "b").getCause()).getErrorCode()); + // Metric still increments for observability even when translation is disabled. + Assert.assertEquals(1L, routerMetrics.namedBlobMetadataExistsButStorageNotFoundCount.getCount()); + } finally { + if (router != null) { router.close(); assertClosed(); } + } + } + /** Translation also fires when getBlobHelper short-circuits via the notFoundCache. */ @Test public void testNamedBlobMissingViaNotFoundCacheStillTranslatedToAmbryUnavailable() throws Exception { @@ -1288,6 +1306,12 @@ public void testNamedBlobMissingViaNotFoundCacheStillTranslatedToAmbryUnavailabl /** Wire a mock IdConverter, set up the router, PUT a named blob {@code /a/c/b}, and stub the * GET-path {@code convert(RestRequest, String)} to return the resolved blob ID. */ private MockServerLayout setUpNamedBlobAndPut(AtomicReference storedBlobId) throws Exception { + return setUpNamedBlobAndPut(storedBlobId, getNonBlockingRouterProperties(localDcName)); + } + + /** Variant of {@link #setUpNamedBlobAndPut(AtomicReference)} that accepts custom router properties. */ + private MockServerLayout setUpNamedBlobAndPut(AtomicReference storedBlobId, Properties props) + throws Exception { CountDownLatch putLatch = new CountDownLatch(1); // Wire a mock IdConverterFactory + IdConverter. IdConverterFactory factory = mock(IdConverterFactory.class); @@ -1308,8 +1332,7 @@ private MockServerLayout setUpNamedBlobAndPut(AtomicReference storedBlob }); // Build the router with the mock factory. MockServerLayout layout = new MockServerLayout(mockClusterMap); - setRouterWithIdConverterFactory(getNonBlockingRouterProperties(localDcName), layout, - new LoggingNotificationSystem(), factory); + setRouterWithIdConverterFactory(props, layout, new LoggingNotificationSystem(), factory); // PUT a named blob /a/c/b, then wait for the convert mock to have run. setOperationParams(); router.putBlob(createNamedBlobRestRequest(RestMethod.PUT, "a", "c", "b", false, false, false, false),