Skip to content

Commit

Permalink
MGMT-17241: [soft-timeout] Installation not timeout /aborted after 24…
Browse files Browse the repository at this point in the history
…Hours (openshift#6092)

Missing 24h statemachine transition for soft timeouts was added.
  • Loading branch information
ori-amizur authored and paul-maidment committed Mar 25, 2024
1 parent d41226e commit 737e439
Show file tree
Hide file tree
Showing 214 changed files with 1,370 additions and 1,001 deletions.
17 changes: 10 additions & 7 deletions go.mod
Expand Up @@ -73,9 +73,9 @@ require (
github.com/thedevsaddam/retry v1.2.1
github.com/thoas/go-funk v0.9.3
github.com/vincent-petithory/dataurl v1.0.0
golang.org/x/crypto v0.14.0
golang.org/x/crypto v0.20.0
golang.org/x/sync v0.4.0
golang.org/x/sys v0.16.0
golang.org/x/sys v0.17.0
gopkg.in/ini.v1 v1.67.0
gopkg.in/square/go-jose.v2 v2.6.0
gopkg.in/yaml.v2 v2.4.0
Expand Down Expand Up @@ -175,10 +175,10 @@ require (
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/itchyny/timefmt-go v0.1.3 // indirect
github.com/jackc/chunkreader/v2 v2.0.1 // indirect
github.com/jackc/pgconn v1.14.1 // indirect
github.com/jackc/pgconn v1.14.3 // indirect
github.com/jackc/pgio v1.0.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgproto3/v2 v2.3.2 // indirect
github.com/jackc/pgproto3/v2 v2.3.3 // indirect
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect
github.com/jackc/pgtype v1.14.0 // indirect
github.com/jackc/pgx/v4 v4.16.0 // indirect
Expand Down Expand Up @@ -231,10 +231,10 @@ require (
go.uber.org/atomic v1.11.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.24.0 // indirect
golang.org/x/net v0.17.0
golang.org/x/net v0.21.0
golang.org/x/oauth2 v0.12.0 // indirect
golang.org/x/term v0.13.0 // indirect
golang.org/x/text v0.13.0 // indirect
golang.org/x/term v0.17.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/time v0.3.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.3.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
Expand All @@ -256,6 +256,9 @@ require (
)

replace (
github.com/jackc/pgproto3/v2 => github.com/jackc/pgproto3/v2 v2.3.3
github.com/jackc/pgx/v4 => github.com/jackc/pgx/v4 v4.18.2
github.com/jackc/pgx/v5 => github.com/jackc/pgx/v5 v5.5.4
github.com/metal3-io/baremetal-operator => github.com/openshift/baremetal-operator v0.0.0-20231019133159-8643f32fea3e
github.com/metal3-io/baremetal-operator/apis => github.com/openshift/baremetal-operator/apis v0.0.0-20231019133159-8643f32fea3e
github.com/metal3-io/baremetal-operator/pkg/hardwareutils => github.com/openshift/baremetal-operator/pkg/hardwareutils v0.0.0-20231019133159-8643f32fea3e
Expand Down
101 changes: 22 additions & 79 deletions go.sum

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions internal/cluster/common.go
Expand Up @@ -39,6 +39,7 @@ const (
statusInfoPendingForInput = "User input required"
statusInfoError = "cluster has hosts in error"
statusInfoTimeout = "cluster installation timed out while pending user action (a manual booting from installation disk)"
statusInfoInstallationTimeout = "cluster installation timeout (%d minutes) has expired"
statusInfoAddingHosts = "cluster is adding hosts to existing OCP cluster"
statusInfoInstallingPendingUserAction = "Cluster has hosts pending user action"
statusInfoUnpreparingHostExists = "At least one host has stopped preparing for installation"
Expand Down
14 changes: 14 additions & 0 deletions internal/cluster/mock_transition.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions internal/cluster/statemachine.go
Expand Up @@ -292,6 +292,21 @@ func NewClusterStateMachine(th TransitionHandler) stateswitch.StateMachine {
},
})

sm.AddTransitionRule(stateswitch.TransitionRule{
TransitionType: TransitionTypeRefreshStatus,
SourceStates: []stateswitch.State{
stateswitch.State(models.ClusterStatusInstalling),
stateswitch.State(models.ClusterStatusFinalizing),
},
Condition: stateswitch.And(th.IsInstallationTimedOut, th.SoftTimeoutsEnabled),
DestinationState: stateswitch.State(models.ClusterStatusError),
PostTransition: th.PostRefreshCluster(statusInfoInstallationTimeout, th.InstallationTimeoutMinutes),
Documentation: stateswitch.TransitionRuleDoc{
Name: "Timed out while installing",
Description: "Cluster installation is taking too long, give up and display appropriate error",
},
})

sm.AddTransitionRule(stateswitch.TransitionRule{
TransitionType: TransitionTypeRefreshStatus,
SourceStates: []stateswitch.State{
Expand Down
5 changes: 5 additions & 0 deletions internal/cluster/transition.go
Expand Up @@ -72,6 +72,7 @@ type TransitionHandler interface {
PostRefreshFinalizingStageSoftTimedOut(sw stateswitch.StateSwitch, args stateswitch.TransitionArgs) error
SoftTimeoutsEnabled(_ stateswitch.StateSwitch, _ stateswitch.TransitionArgs) (bool, error)
FinalizingStageTimeoutMinutes(sCluster *stateCluster) interface{}
InstallationTimeoutMinutes(_ *stateCluster) interface{}
}

////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -654,6 +655,10 @@ func (th *transitionHandler) FinalizingStageTimeoutMinutes(sCluster *stateCluste
return th.finalizingStageTimeoutMinutes(sCluster)
}

func (th *transitionHandler) InstallationTimeoutMinutes(_ *stateCluster) interface{} {
return int64(th.installationTimeout.Minutes())
}

func (th *transitionHandler) PostRefreshFinalizingStageSoftTimedOut(sw stateswitch.StateSwitch, args stateswitch.TransitionArgs) error {
sCluster, ok := sw.(*stateCluster)
if !ok {
Expand Down
83 changes: 83 additions & 0 deletions internal/cluster/transition_test.go
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/golang/mock/gomock"
"github.com/google/uuid"
. "github.com/onsi/ginkgo"
. "github.com/onsi/ginkgo/extensions/table"
. "github.com/onsi/gomega"
"github.com/openshift/assisted-service/internal/common"
eventgen "github.com/openshift/assisted-service/internal/common/events"
Expand Down Expand Up @@ -4693,6 +4694,87 @@ var _ = Describe("ValidationResult sort", func() {
})
})

var _ = Describe("installation timeout", func() {
var (
db *gorm.DB
clusterId strfmt.UUID
clusterApi *Manager
mockEvents *eventsapi.MockHandler
mockHostAPI *host.MockAPI
mockMetric *metrics.MockAPI
ctrl *gomock.Controller
dbName string
operatorsManager *operators.Manager
mockS3Api *s3wrapper.MockAPI
)

BeforeEach(func() {
db, dbName = common.PrepareTestDB()
ctrl = gomock.NewController(GinkgoT())
mockEvents = eventsapi.NewMockHandler(ctrl)
mockHostAPI = host.NewMockAPI(ctrl)
mockMetric = metrics.NewMockAPI(ctrl)
mockS3Api = s3wrapper.NewMockAPI(ctrl)
operatorsManager = operators.NewManager(common.GetTestLog(), nil, operators.Options{}, nil, nil)
clusterId = strfmt.UUID(uuid.New().String())
clusterApi = NewManager(getDefaultConfig(), common.GetTestLog().WithField("pkg", "cluster-monitor"), db, testing.GetDummyNotificationStream(ctrl),
mockEvents, nil, mockHostAPI, mockMetric, nil, nil, operatorsManager, nil, mockS3Api, nil, nil, nil, true)
})
createCluster := func(status, statusInfo string, installStartedAt time.Time) *common.Cluster {
id := strfmt.UUID(uuid.NewString())
infraenvId := strfmt.UUID(uuid.NewString())
cls := &common.Cluster{
Cluster: models.Cluster{
ID: &clusterId,
Status: swag.String(status),
StatusInfo: swag.String(statusInfo),
InstallStartedAt: strfmt.DateTime(installStartedAt),
OpenshiftVersion: "4.15",
EmailDomain: "redhat.com",
OrgSoftTimeoutsEnabled: true,
HighAvailabilityMode: swag.String(models.ClusterHighAvailabilityModeNone),
Hosts: []*models.Host{
{
ID: &id,
InfraEnvID: infraenvId,
ClusterID: &clusterId,
Status: swag.String(models.HostStatusInstallingInProgress),
Role: models.HostRoleMaster,
},
},
},
}
Expect(db.Create(cls).Error).ToNot(HaveOccurred())
return cls
}
AfterEach(func() {
ctrl.Finish()
common.DeleteTestDB(db, dbName)
})
DescribeTable("installation timeout cases",
func(status, statusInfo string, installStartedAt time.Time, timeoutExpected bool) {
cls := createCluster(status, statusInfo, installStartedAt)
if timeoutExpected {
mockEvents.EXPECT().SendClusterEvent(gomock.Any(), gomock.Any()).Times(1)
mockMetric.EXPECT().ClusterInstallationFinished(gomock.Any(), gomock.Any(), gomock.Any(),
gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Times(1)
}
returnedCluster, err := clusterApi.RefreshStatus(context.Background(), cls, db)
Expect(err).ToNot(HaveOccurred())
if timeoutExpected {
Expect(swag.StringValue(returnedCluster.Status)).To(Equal(models.ClusterStatusError))
Expect(swag.StringValue(returnedCluster.StatusInfo)).To(Equal(fmt.Sprintf(statusInfoInstallationTimeout, int64((24 * time.Hour).Minutes()))))
} else {
Expect(swag.StringValue(returnedCluster.Status)).To(Equal(status))
}
},
Entry("timeout expired for installing status", models.ClusterStatusInstalling, statusInfoInstalling, time.Now().Add(-24*time.Hour), true),
Entry("timeout not expired for installing status", models.ClusterStatusInstalling, statusInfoInstalling, time.Now().Add(-23*time.Hour), false),
Entry("timeout expired for finalizing status", models.ClusterStatusFinalizing, statusInfoFinalizing, time.Now().Add(-24*time.Hour), true),
Entry("timeout not expired for finalizing status", models.ClusterStatusFinalizing, statusInfoFinalizing, time.Now().Add(-23*time.Hour), false),
)
})

var _ = Describe("finalizing timeouts", func() {
var (
ctx = context.Background()
Expand Down Expand Up @@ -4737,6 +4819,7 @@ var _ = Describe("finalizing timeouts", func() {
FinalizingStageStartedAt: strfmt.DateTime(stageTimestamp),
},
OrgSoftTimeoutsEnabled: true,
InstallStartedAt: strfmt.DateTime(time.Now()),
},
}
Expect(db.Create(cls).Error).ToNot(HaveOccurred())
Expand Down
10 changes: 10 additions & 0 deletions vendor/github.com/jackc/pgconn/CHANGELOG.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 10 additions & 2 deletions vendor/github.com/jackc/pgconn/auth_scram.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion vendor/github.com/jackc/pgconn/krb5.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 737e439

Please sign in to comment.