From bffdf1e6c15f13a1cd1450f9d64681b7dbd7abe6 Mon Sep 17 00:00:00 2001 From: Thanh Ha Date: Thu, 4 Sep 2025 08:23:45 -0400 Subject: [PATCH] Retry runner checks 3 times before failing PyTorch HUD recently has been occassionally responding slowly. Let's add a retry function so that we avoid alerting due to HUD issues. This change will retry up to 3 times waiting 1 minute each before declaring an alert. Signed-off-by: Thanh Ha --- datadog-synthetics_tests.tf | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/datadog-synthetics_tests.tf b/datadog-synthetics_tests.tf index 53af083..bd25a49 100644 --- a/datadog-synthetics_tests.tf +++ b/datadog-synthetics_tests.tf @@ -330,6 +330,10 @@ EOT locations = ["aws:us-west-2"] options_list { tick_every = 900 + retry { + count = 3 + interval = 60000 + } } request_definition { method = "GET" @@ -359,6 +363,10 @@ EOT locations = ["aws:us-west-2"] options_list { tick_every = 900 + retry { + count = 3 + interval = 60000 + } } request_definition { method = "GET" @@ -388,6 +396,10 @@ EOT locations = ["aws:us-west-2"] options_list { tick_every = 900 + retry { + count = 3 + interval = 60000 + } } request_definition { method = "GET" @@ -417,6 +429,10 @@ EOT locations = ["aws:us-west-2"] options_list { tick_every = 900 + retry { + count = 3 + interval = 60000 + } } request_definition { method = "GET" @@ -446,6 +462,10 @@ EOT locations = ["aws:us-west-2"] options_list { tick_every = 900 + retry { + count = 3 + interval = 60000 + } } request_definition { method = "GET" @@ -475,6 +495,10 @@ EOT locations = ["aws:us-west-2"] options_list { tick_every = 900 + retry { + count = 3 + interval = 60000 + } } request_definition { method = "GET" @@ -504,6 +528,10 @@ EOT locations = ["aws:us-west-2"] options_list { tick_every = 900 + retry { + count = 3 + interval = 60000 + } } request_definition { method = "GET"