Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
- Add field `Labels` (type `*map[string]string`) to structs `LoadBalancer`, `CreateLoadBalancerPayload`, `UpdateLoadBalancerPayload`
- `git`: [v0.8.0](services/git/CHANGELOG.md#v080)
- **Feature:** Add support for the instance patch operation
- `ske`
- [v1.12.0](services/ske/CHANGELOG.md#v141)
- **Feature:** Exit `CreateOrUpdateClusterWaitHandler` early when the cluster reports structured errors and is in a failure state.



## Release (2025-09-11)
- `cdn`: [v1.5.0](services/cdn/CHANGELOG.md#v150)
Expand Down
3 changes: 3 additions & 0 deletions services/ske/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## v1.4.1
- **Feature:** Exit `CreateOrUpdateClusterWaitHandler` early when the cluster reports structured errors and is in a failure state.

## v1.4.0
- **Feature:** Add new field `Kubernetes` to `Nodepool` model

Expand Down
2 changes: 1 addition & 1 deletion services/ske/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
v1.4.0
v1.4.1
24 changes: 24 additions & 0 deletions services/ske/wait/wait.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ const (
CredentialsRotationStateCompleted = "COMPLETED"
// Deprecated: InvalidArgusInstanceErrorCode is deprecated and will be removed after 14th November 2025. Use [ske.RUNTIMEERRORCODE_OBSERVABILITY_INSTANCE_NOT_FOUND] instead.
InvalidArgusInstanceErrorCode = "SKE_ARGUS_INSTANCE_NOT_FOUND"

ClusterNoValidHostFound = "SKE_NODE_NO_VALID_HOST_FOUND"
)

type APIClientClusterInterface interface {
Expand All @@ -42,6 +44,8 @@ type APIClientClusterInterface interface {

// CreateOrUpdateClusterWaitHandler will wait for cluster creation or update
func CreateOrUpdateClusterWaitHandler(ctx context.Context, a APIClientClusterInterface, projectId, region, name string) *wait.AsyncActionHandler[ske.Cluster] {
startTime := time.Now()

handler := wait.New(func() (waitFinished bool, response *ske.Cluster, err error) {
s, err := a.GetClusterExecute(ctx, projectId, region, name)
if err != nil {
Expand All @@ -56,6 +60,26 @@ func CreateOrUpdateClusterWaitHandler(ctx context.Context, a APIClientClusterInt
return true, s, nil
}

// If cluster is UNSPECIFIED or UNHEALTHY and has structured errors, exit early
hasStructuredErrors := s.Status.Errors != nil && len(*s.Status.Errors) > 0
if (state == ske.CLUSTERSTATUSSTATE_UNSPECIFIED || state == ske.CLUSTERSTATUSSTATE_UNHEALTHY) && hasStructuredErrors {
for _, clusterError := range *s.Status.Errors {
if clusterError.Code != nil && clusterError.Message != nil {
return true, s, nil
}
}
}

// Waiter has been running more than 15 minutes and cluster is still in CREATING or RECONCILING state with errors
if time.Since(startTime) > 15*time.Minute &&
(state == ske.CLUSTERSTATUSSTATE_CREATING || state == ske.CLUSTERSTATUSSTATE_RECONCILING) && hasStructuredErrors {
for _, clusterError := range *s.Status.Errors {
if clusterError.Code != nil && clusterError.Message != nil {
return true, s, nil
}
}
}

if state == ske.CLUSTERSTATUSSTATE_HEALTHY || state == ske.CLUSTERSTATUSSTATE_HIBERNATED {
return true, s, nil
}
Expand Down
53 changes: 52 additions & 1 deletion services/ske/wait/wait_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ type apiClientClusterMocked struct {
name string
resourceState ske.ClusterStatusState
invalidArgusInstance bool
errorList *[]ske.ClusterError
}

const testRegion = "eu01"
Expand Down Expand Up @@ -45,7 +46,17 @@ func (a *apiClientClusterMocked) GetClusterExecute(_ context.Context, _, _, _ st
return &ske.Cluster{
Name: utils.Ptr("cluster"),
Status: &ske.ClusterStatus{
Aggregated: &rs,
Aggregated: utils.Ptr(rs),
Error: func() *ske.RuntimeError {
if a.invalidArgusInstance {
return &ske.RuntimeError{
Code: utils.Ptr(ske.RUNTIMEERRORCODE_OBSERVABILITY_INSTANCE_NOT_FOUND),
Message: utils.Ptr("invalid argus instance"),
}
}
return nil
}(),
Errors: a.errorList,
},
}, nil
}
Expand Down Expand Up @@ -77,6 +88,7 @@ func TestCreateOrUpdateClusterWaitHandler(t *testing.T) {
invalidArgusInstance bool
wantErr bool
wantResp bool
errorList *[]ske.ClusterError
}{
{
desc: "create_succeeded",
Expand Down Expand Up @@ -120,6 +132,40 @@ func TestCreateOrUpdateClusterWaitHandler(t *testing.T) {
wantErr: true,
wantResp: false,
},
{
desc: "status_errors_present_state_unhealthy",
getFails: false,
resourceState: ske.CLUSTERSTATUSSTATE_UNHEALTHY,
errorList: &[]ske.ClusterError{
{
Code: utils.Ptr("ERR_CODE"),
Message: utils.Ptr("Error 1"),
},
{
Code: utils.Ptr("ERR_OTHER"),
Message: utils.Ptr("Error 2"),
},
},
wantErr: false,
wantResp: true,
},
{
desc: "status_errors_present_state_unspecified",
getFails: false,
resourceState: ske.CLUSTERSTATUSSTATE_UNSPECIFIED,
errorList: &[]ske.ClusterError{
{
Code: utils.Ptr("ERR_CODE"),
Message: utils.Ptr("Error 1"),
},
{
Code: utils.Ptr("ERR_OTHER"),
Message: utils.Ptr("Error 2"),
},
},
wantErr: false,
wantResp: true,
},
}
for _, tt := range tests {
t.Run(tt.desc, func(t *testing.T) {
Expand All @@ -130,6 +176,7 @@ func TestCreateOrUpdateClusterWaitHandler(t *testing.T) {
name: name,
resourceState: tt.resourceState,
invalidArgusInstance: tt.invalidArgusInstance,
errorList: tt.errorList,
}
var wantRes *ske.Cluster
rs := ske.ClusterStatusState(tt.resourceState)
Expand All @@ -147,6 +194,10 @@ func TestCreateOrUpdateClusterWaitHandler(t *testing.T) {
Message: utils.Ptr("invalid argus instance"),
}
}

if tt.errorList != nil && len(*tt.errorList) > 0 {
wantRes.Status.Errors = tt.errorList
}
}

handler := CreateOrUpdateClusterWaitHandler(context.Background(), apiClient, "", testRegion, name)
Expand Down
Loading