Skip to content

Commit

Permalink
MGMT-13586: Wait for ETCD Bootstrap to complete (#670) (#717)
Browse files Browse the repository at this point in the history
https://issues.redhat.com/browse/MGMT-13586
There have previously been issues where the bootstrap
node will reboot before ETCD is ready. This change waits
for the new status provided by the ETCD operator before
rebooting the bootstrap node.
  • Loading branch information
CrystalChun committed Sep 18, 2023
1 parent 6160d18 commit edf2542
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 0 deletions.
21 changes: 21 additions & 0 deletions src/installer/installer.go
Expand Up @@ -422,6 +422,10 @@ func (i *installer) waitForControlPlane(ctx context.Context) error {
}

i.waitForBootkube(ctx)
if err = i.waitForETCDBootstrap(ctx); err != nil {
i.log.Error(err)
return err
}

// waiting for controller pod to be running
if err = i.waitForController(kc); err != nil {
Expand All @@ -432,6 +436,23 @@ func (i *installer) waitForControlPlane(ctx context.Context) error {
return nil
}

func (i *installer) waitForETCDBootstrap(ctx context.Context) error {
i.UpdateHostInstallProgress(models.HostStageWaitingForBootkube, "waiting for ETCD bootstrap to be complete")
i.log.Infof("Started waiting for ETCD bootstrap to complete")
return utils.WaitForPredicate(waitForeverTimeout, generalWaitInterval, func() bool {
// check if ETCD bootstrap has completed every 5 seconds
if result, err := i.ops.ExecPrivilegeCommand(nil, "systemctl", "is-active", "progress.service"); result == "inactive" {
i.log.Infof("ETCD bootstrap progress service status: %s", result)
out, _ := i.ops.ExecPrivilegeCommand(nil, "systemctl", "status", "progress.service")
i.log.Info(out)
return true
} else if err != nil {
i.log.WithError(err).Warnf("error occurred checking ETCD bootstrap progress: %s", result)
}
return false
})
}

func numDone(hosts models.HostList) int {
numDone := 0
for _, h := range hosts {
Expand Down
17 changes: 17 additions & 0 deletions src/installer/installer_test.go
Expand Up @@ -271,6 +271,15 @@ var _ = Describe("installer HostRoleMaster role", func() {
mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "status", "bootkube.service").Return("1", nil).Times(1)
}

waitForETCDBootstrapSuccess := func() {
mockbmclient.EXPECT().UpdateHostInstallProgress(gomock.Any(), infraEnvId, hostId, models.HostStageWaitingForBootkube, "waiting for ETCD bootstrap to be complete").Return(nil).Times(1)
mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "is-active", "progress.service").Return("inactive", nil).Times(1)
}

bootstrapETCDStatusSuccess := func() {
mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "status", "progress.service").Return("1", nil).Times(1)
}

extractSecretFromIgnitionSuccess := func() {
mockops.EXPECT().ExtractFromIgnition(filepath.Join(InstallDir, bootstrapIgn), dockerConfigFile).Return(nil).Times(1)
}
Expand Down Expand Up @@ -316,6 +325,8 @@ var _ = Describe("installer HostRoleMaster role", func() {
WaitMasterNodesSucccess()
waitForBootkubeSuccess()
bootkubeStatusSuccess()
waitForETCDBootstrapSuccess()
bootstrapETCDStatusSuccess()
resolvConfSuccess()
waitForControllerSuccessfully(conf.ClusterID)
//HostRoleMaster flow:
Expand Down Expand Up @@ -345,6 +356,8 @@ var _ = Describe("installer HostRoleMaster role", func() {
WaitMasterNodesSucccess()
waitForBootkubeSuccess()
bootkubeStatusSuccess()
waitForETCDBootstrapSuccess()
bootstrapETCDStatusSuccess()
resolvConfSuccess()
waitForControllerSuccessfully(conf.ClusterID)
//HostRoleMaster flow:
Expand Down Expand Up @@ -399,6 +412,8 @@ var _ = Describe("installer HostRoleMaster role", func() {
WaitMasterNodesSucccess()
waitForBootkubeSuccess()
bootkubeStatusSuccess()
waitForETCDBootstrapSuccess()
bootstrapETCDStatusSuccess()
resolvConfSuccess()
waitForControllerSuccessfully(conf.ClusterID)
//HostRoleMaster flow:
Expand Down Expand Up @@ -483,6 +498,8 @@ var _ = Describe("installer HostRoleMaster role", func() {
// mockbmclient.EXPECT().UpdateHostInstallProgress(gomock.Any(), inventoryNamesHost["node0"].Host.InfraEnvID.String(), inventoryNamesHost["node0"].Host.ID.String(), models.HostStageJoined, "").Times(1)
waitForBootkubeSuccess()
bootkubeStatusSuccess()
waitForETCDBootstrapSuccess()
bootstrapETCDStatusSuccess()
resolvConfSuccess()
waitForControllerSuccessfully(conf.ClusterID)
//HostRoleMaster flow:
Expand Down

0 comments on commit edf2542

Please sign in to comment.