Skip to content

Commit

Permalink
Retry HGAP's extensionsArtifact requests on BAD_REQUEST status (Azure…
Browse files Browse the repository at this point in the history
…#2621)

* Retry HGAP's extensionsArtifact requests on BAD_REQUEST status

* python 2.6 compat

Co-authored-by: narrieta <narrieta>
  • Loading branch information
narrieta committed Jul 5, 2022
1 parent e13eb4f commit dbc82d3
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from azurelinuxagent.common.future import ustr
from azurelinuxagent.common.protocol.extensions_goal_state import ExtensionsGoalState, GoalStateChannel, GoalStateSource
from azurelinuxagent.common.protocol.restapi import ExtensionSettings, Extension, VMAgentManifest, ExtensionState, InVMGoalStateMetaData
from azurelinuxagent.common.utils import restutil
from azurelinuxagent.common.utils.textutil import parse_doc, parse_json, findall, find, findtext, getattrib, gettext, format_exception, \
is_str_none_or_whitespace, is_str_empty

Expand Down Expand Up @@ -99,7 +100,7 @@ def fetch_direct():
def fetch_through_host():
host = wire_client.get_host_plugin()
uri, headers = host.get_artifact_request(artifacts_profile_blob)
content, _ = wire_client.fetch(uri, headers, use_proxy=False)
content, _ = wire_client.fetch(uri, headers, use_proxy=False, retry_codes=restutil.HGAP_GET_EXTENSION_ARTIFACT_RETRY_CODES)
return content

logger.verbose("Retrieving the artifacts profile")
Expand Down
17 changes: 10 additions & 7 deletions azurelinuxagent/common/protocol/wire.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def get_goal_state(self):
def _download_ext_handler_pkg_through_host(self, uri, destination):
host = self.client.get_host_plugin()
uri, headers = host.get_artifact_request(uri, host.manifest_uri)
success = self.client.stream(uri, destination, headers=headers, use_proxy=False, max_retry=1)
success = self.client.stream(uri, destination, headers=headers, use_proxy=False, max_retry=1) # set max_retry to 1 because extension packages already have a retry loop (see ExtHandlerInstance.download())
return success

def download_ext_handler_pkg(self, uri, destination, headers=None, use_proxy=True): # pylint: disable=W0613
Expand Down Expand Up @@ -626,7 +626,7 @@ def call_storage_service(http_req, *args, **kwargs):
def fetch_manifest_through_host(self, uri):
host = self.get_host_plugin()
uri, headers = host.get_artifact_request(uri)
response, _ = self.fetch(uri, headers, use_proxy=False, max_retry=1)
response, _ = self.fetch(uri, headers, use_proxy=False, retry_codes=restutil.HGAP_GET_EXTENSION_ARTIFACT_RETRY_CODES)
return response

def fetch_manifest(self, version_uris, timeout_in_minutes=5, timeout_in_ms=0):
Expand All @@ -649,9 +649,11 @@ def fetch_manifest(self, version_uris, timeout_in_minutes=5, timeout_in_ms=0):
logger.verbose('The specified manifest URL is empty, ignored.')
continue

direct_func = lambda: self.fetch(version_uri, max_retry=1)[0] # pylint: disable=W0640
# Disable W0640: OK to use version_uri in a lambda within the loop's body
direct_func = lambda: self.fetch(version_uri)[0] # pylint: disable=W0640
# NOTE: the host_func may be called after refreshing the goal state, be careful about any goal state data
# in the lambda.
# Disable W0640: OK to use version_uri in a lambda within the loop's body
host_func = lambda: self.fetch_manifest_through_host(version_uri) # pylint: disable=W0640

try:
Expand Down Expand Up @@ -690,7 +692,7 @@ def stream(self, uri, destination, headers=None, use_proxy=None, max_retry=None)

return success

def fetch(self, uri, headers=None, use_proxy=None, decode=True, max_retry=None, ok_codes=None):
def fetch(self, uri, headers=None, use_proxy=None, decode=True, max_retry=None, retry_codes=None, ok_codes=None):
"""
max_retry indicates the maximum number of retries for the HTTP request; None indicates that the default value should be used
Expand All @@ -699,14 +701,14 @@ def fetch(self, uri, headers=None, use_proxy=None, decode=True, max_retry=None,
logger.verbose("Fetch [{0}] with headers [{1}]", uri, headers)
content = None
response_headers = None
response = self._fetch_response(uri, headers, use_proxy, max_retry=max_retry, ok_codes=ok_codes)
response = self._fetch_response(uri, headers, use_proxy, max_retry=max_retry, retry_codes=retry_codes, ok_codes=ok_codes)
if response is not None and not restutil.request_failed(response, ok_codes=ok_codes):
response_content = response.read()
content = self.decode_config(response_content) if decode else response_content
response_headers = response.getheaders()
return content, response_headers

def _fetch_response(self, uri, headers=None, use_proxy=None, max_retry=None, ok_codes=None):
def _fetch_response(self, uri, headers=None, use_proxy=None, max_retry=None, retry_codes=None, ok_codes=None):
"""
max_retry indicates the maximum number of retries for the HTTP request; None indicates that the default value should be used
"""
Expand All @@ -717,7 +719,8 @@ def _fetch_response(self, uri, headers=None, use_proxy=None, max_retry=None, ok_
uri,
headers=headers,
use_proxy=use_proxy,
max_retry=max_retry)
max_retry=max_retry,
retry_codes=retry_codes)

host_plugin = self.get_host_plugin()

Expand Down
9 changes: 9 additions & 0 deletions azurelinuxagent/common/utils/restutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,15 @@
429, # Request Rate Limit Exceeded
]

#
# Currently the HostGAPlugin has an issue its cache that may produce a BAD_REQUEST failure for valid URIs when using the extensionArtifact API.
# Add this status to the retryable codes, but use it only when requesting downloads via the HostGAPlugin. The retry logic in the download code
# would give enough time to the HGAP to refresh its cache. Once the fix to address that issue is deployed, consider removing the use of
# HGAP_GET_EXTENSION_ARTIFACT_RETRY_CODES.
#
HGAP_GET_EXTENSION_ARTIFACT_RETRY_CODES = RETRY_CODES[:] # make a copy of RETRY_CODES
HGAP_GET_EXTENSION_ARTIFACT_RETRY_CODES.append(httpclient.BAD_REQUEST)

RESOURCE_GONE_CODES = [
httpclient.GONE
]
Expand Down
6 changes: 3 additions & 3 deletions azurelinuxagent/ga/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -1614,7 +1614,7 @@ def _download(self):

uri, headers = self.host.get_artifact_request(uri, self.host.manifest_uri)
try:
if self._fetch(uri, headers=headers, use_proxy=False):
if self._fetch(uri, headers=headers, use_proxy=False, retry_codes=restutil.HGAP_GET_EXTENSION_ARTIFACT_RETRY_CODES):
if not HostPluginProtocol.is_default_channel:
logger.verbose("Setting host plugin as default channel")
HostPluginProtocol.is_default_channel = True
Expand All @@ -1641,12 +1641,12 @@ def _download(self):
message=msg)
raise UpdateError(msg)

def _fetch(self, uri, headers=None, use_proxy=True):
def _fetch(self, uri, headers=None, use_proxy=True, retry_codes=None):
package = None
try:
is_healthy = True
error_response = ''
resp = restutil.http_get(uri, use_proxy=use_proxy, headers=headers, max_retry=1)
resp = restutil.http_get(uri, use_proxy=use_proxy, headers=headers, max_retry=3, retry_codes=retry_codes) # Use only 3 retries, since there are usually 5 or 6 URIs and we try all of them
if restutil.request_succeeded(resp):
package = resp.read()
fileutil.write_file(self.get_agent_pkg_path(),
Expand Down

0 comments on commit dbc82d3

Please sign in to comment.