From 207ac3c08f37d0609e9301ae34863804632213ba Mon Sep 17 00:00:00 2001 From: Mat Kowalski Date: Wed, 26 May 2021 13:23:57 +0200 Subject: [PATCH] Bug 1964591: Remove AI Agent image in case of service failure This PR adds a handler for a failure scenario of `agent.service` which removes the `assisted-installer-agent` container image. This is a workaround for an issue where symlinks in `/var/lib/containers/` are corrupted. Deleting an image in `ExecStartPre` means that every time agent.service starts we make sure the image is available. If it's the very first attempt to start `agent.service`, then the the image will be pulled as it would be in any other scenario. Any consecutive attempt to start `agent.service` will first check if the image is present and in case of errors will remove it so that it can be pulled again. We are not using the `OnFailure` directive because the unit defined there would only be started once all the restarts attempts are exhausted which is not a desired workflow in this scenario here. Closes: OCPBUGSM-29583 --- internal/ignition/ignition.go | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/internal/ignition/ignition.go b/internal/ignition/ignition.go index de6d5f430f1..db2cbb6b75a 100644 --- a/internal/ignition/ignition.go +++ b/internal/ignition/ignition.go @@ -125,6 +125,12 @@ allow chronyd_t container_file_t:sock_file write; allow chronyd_t spc_t:unix_dgram_socket sendto; ` +const agentFixBZ1964591 = `#!/usr/bin/sh + +IMAGE=$(echo $1 | sed 's/:.*//') +podman images | grep $IMAGE || podman rmi --force $1 || true +` + const discoveryIgnitionConfigFormat = `{ "ignition": { "version": "3.1.0"{{if .PROXY_SETTINGS}}, @@ -139,7 +145,7 @@ const discoveryIgnitionConfigFormat = `{ "units": [{ "name": "agent.service", "enabled": true, - "contents": "[Service]\nType=simple\nRestart=always\nRestartSec=3\nStartLimitInterval=0\nEnvironment=HTTP_PROXY={{.HTTPProxy}}\nEnvironment=http_proxy={{.HTTPProxy}}\nEnvironment=HTTPS_PROXY={{.HTTPSProxy}}\nEnvironment=https_proxy={{.HTTPSProxy}}\nEnvironment=NO_PROXY={{.NoProxy}}\nEnvironment=no_proxy={{.NoProxy}}{{if .PullSecretToken}}\nEnvironment=PULL_SECRET_TOKEN={{.PullSecretToken}}{{end}}\nTimeoutStartSec={{.AgentTimeoutStartSec}}\nExecStartPre=podman run --privileged --rm -v /usr/local/bin:/hostbin {{.AgentDockerImg}} cp /usr/bin/agent /hostbin\nExecStart=/usr/local/bin/agent --url {{.ServiceBaseURL}} --cluster-id {{.clusterId}} --agent-version {{.AgentDockerImg}} --insecure={{.SkipCertVerification}} {{if .HostCACertPath}}--cacert {{.HostCACertPath}}{{end}}\n\n[Unit]\nWants=network-online.target\nAfter=network-online.target\n\n[Install]\nWantedBy=multi-user.target" + "contents": "[Service]\nType=simple\nRestart=always\nRestartSec=3\nStartLimitInterval=0\nEnvironment=HTTP_PROXY={{.HTTPProxy}}\nEnvironment=http_proxy={{.HTTPProxy}}\nEnvironment=HTTPS_PROXY={{.HTTPSProxy}}\nEnvironment=https_proxy={{.HTTPSProxy}}\nEnvironment=NO_PROXY={{.NoProxy}}\nEnvironment=no_proxy={{.NoProxy}}{{if .PullSecretToken}}\nEnvironment=PULL_SECRET_TOKEN={{.PullSecretToken}}{{end}}\nTimeoutStartSec={{.AgentTimeoutStartSec}}\nExecStartPre=/usr/local/bin/agent-fix-bz1964591 {{.AgentDockerImg}}\nExecStartPre=podman run --privileged --rm -v /usr/local/bin:/hostbin {{.AgentDockerImg}} cp /usr/bin/agent /hostbin\nExecStart=/usr/local/bin/agent --url {{.ServiceBaseURL}} --cluster-id {{.clusterId}} --agent-version {{.AgentDockerImg}} --insecure={{.SkipCertVerification}} {{if .HostCACertPath}}--cacert {{.HostCACertPath}}{{end}}\n\n[Unit]\nWants=network-online.target\nAfter=network-online.target\n\n[Install]\nWantedBy=multi-user.target" }, { "name": "selinux.service", @@ -155,6 +161,15 @@ const discoveryIgnitionConfigFormat = `{ }, "storage": { "files": [{ + "overwrite": true, + "path": "/usr/local/bin/agent-fix-bz1964591", + "mode": 755, + "user": { + "name": "root" + }, + "contents": { "source": "data:,{{.AGENT_FIX_BZ1964591}}" } + }, + { "overwrite": true, "path": "/etc/motd", "mode": 420, @@ -1251,6 +1266,7 @@ func (ib *ignitionBuilder) FormatDiscoveryIgnitionFile(cluster *common.Cluster, "clusterId": cluster.ID.String(), "PullSecretToken": pullSecretToken, "AGENT_MOTD": url.PathEscape(agentMessageOfTheDay), + "AGENT_FIX_BZ1964591": url.PathEscape(agentFixBZ1964591), "IPv6_CONF": url.PathEscape(common.Ipv6DuidDiscoveryConf), "PULL_SECRET": url.PathEscape(cluster.PullSecret), "RH_ROOT_CA": rhCa,