From e93f3f2db3bd1ae7492f8b80bcf796ee9ac7b59b Mon Sep 17 00:00:00 2001 From: leoporoli Date: Thu, 9 May 2024 07:46:55 -0300 Subject: [PATCH] fix: restart APICs containers when the engine is restarted (#2441) ## Description Fixing the issue when running the `kurtosis engine restart --restart-apic-containers`. The issue was happening when the Engine's REST API was trying to reconnect to all the enclaves while the APIC containers were in the restart process. We are also increasing the engine's readiness check time because the restart process could take more than 1 minutes if there are many APIC containers running ## REMINDER: Tag Reviewers, so they get notified to review ## Is this change user facing? YES ## References (if applicable) This is part of the `upgrade kurtosis from the UI` project --- .../engine_functions/create_engine.go | 4 ++-- engine/server/engine/main.go | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/container-engine-lib/lib/backend_impls/docker/docker_kurtosis_backend/engine_functions/create_engine.go b/container-engine-lib/lib/backend_impls/docker/docker_kurtosis_backend/engine_functions/create_engine.go index 585c9e83cc..4b46974fbc 100644 --- a/container-engine-lib/lib/backend_impls/docker/docker_kurtosis_backend/engine_functions/create_engine.go +++ b/container-engine-lib/lib/backend_impls/docker/docker_kurtosis_backend/engine_functions/create_engine.go @@ -29,8 +29,8 @@ const ( enclaveManagerUIPort = 9711 enclaveManagerAPIPort = 8081 engineDebugServerPort = 50102 // in ClI this is 50101 and 50103 for the APIC - maxWaitForEngineAvailabilityRetries = 10 - timeBetweenWaitForEngineAvailabilityRetries = 1 * time.Second + maxWaitForEngineAvailabilityRetries = 40 + timeBetweenWaitForEngineAvailabilityRetries = 2 * time.Second logsStorageDirPath = "/var/log/kurtosis/" ) diff --git a/engine/server/engine/main.go b/engine/server/engine/main.go index 1f79948dab..262d38768f 100644 --- a/engine/server/engine/main.go +++ b/engine/server/engine/main.go @@ -261,6 +261,12 @@ func runMain() error { } }() + if serverArgs.RestartAPIContainers { + if err := enclaveManager.RestartAllEnclaveAPIContainers(ctx); err != nil { + return stacktrace.Propagate(err, "An error occurred restarting all API containers.") + } + } + go func() { err := restApiServer( ctx, @@ -292,12 +298,6 @@ func runMain() error { } }() - if serverArgs.RestartAPIContainers { - if err := enclaveManager.RestartAllEnclaveAPIContainers(ctx); err != nil { - return stacktrace.Propagate(err, "An error occurred restarting all API containers.") - } - } - engineHttpServer := connect_server.NewConnectServer(serverArgs.GrpcListenPortNum, grpcServerStopGracePeriod, handler, apiPath) if err := engineHttpServer.RunServerUntilInterruptedWithCors(cors.AllowAll()); err != nil { return stacktrace.Propagate(err, "An error occurred running the server.")