Skip to content

Commit

Permalink
manager: synchronize task fileds with Manager 2.2 (#303)
Browse files Browse the repository at this point in the history
Intensity in Manager is float64, but k8s runtime controller doesn't support floats.
To workaround it, intensity becomes a string value, and validating webhook checks if
provided value is a correct float string.

Fixes #303
  • Loading branch information
zimnx committed Dec 17, 2020
1 parent 5fd3b4c commit 043eb7a
Show file tree
Hide file tree
Showing 9 changed files with 88 additions and 33 deletions.
20 changes: 12 additions & 8 deletions config/operator/crd/bases/scylla.scylladb.com_scyllaclusters.yaml
Expand Up @@ -1421,10 +1421,12 @@ spec:
failFast:
description: FailFast stop repair on first error.
type: boolean
host:
description: Host to repair, by default all hosts are repaired
type: string
intensity:
description: Intensity integer >= 1 or a decimal between (0,1), higher values may result in higher speed and cluster load. 0 value means repair at maximum intensity.
format: int64
type: integer
description: Intensity how many token ranges (per shard) to repair in a single Scylla repair job. By default this is 1. If you set it to 0 the number of token ranges is adjusted to the maximum supported by node (see max_repair_ranges_in_parallel in Scylla logs). Valid values are 0 and integers >= 1. Higher values will result in increased cluster load and slightly faster repairs. Changing the intensity impacts repair granularity if you need to resume it, the higher the value the more work on resume. For Scylla clusters that DO NOT SUPPORT ROW-LEVEL REPAIR, intensity can be a decimal between (0,1). In that case it specifies percent of shards that can be repaired in parallel on a repair master node. For Scylla clusters that are row-level repair enabled, setting intensity below 1 has the same effect as setting intensity 1.
type: string
interval:
description: Interval task schedule interval e.g. 3d2h10m, valid units are d, h, m, s (default "0").
type: string
Expand All @@ -1441,7 +1443,7 @@ spec:
format: int64
type: integer
parallel:
description: Parallel The maximum number of repair jobs to run in parallel, each node can participate in at most one repair at any given time. Default is means system will repair at maximum parallelism.
description: 'Parallel the maximum number of Scylla repair jobs that can run at the same time (on different token ranges and replicas). Each node can take part in at most one repair at any given moment. By default the maximum possible parallelism is used. The effective parallelism depends on a keyspace replication factor (RF) and the number of nodes. The formula to calculate it is as follows: number of nodes / RF, ex. for 6 node cluster with RF=3 the maximum parallelism is 2.'
format: int64
type: integer
smallTableThreshold:
Expand Down Expand Up @@ -1591,12 +1593,14 @@ spec:
failFast:
description: FailFast stop repair on first error.
type: boolean
host:
description: Host to repair, by default all hosts are repaired
type: string
id:
type: string
intensity:
description: Intensity integer >= 1 or a decimal between (0,1), higher values may result in higher speed and cluster load. 0 value means repair at maximum intensity.
format: int64
type: integer
description: Intensity how many token ranges (per shard) to repair in a single Scylla repair job. By default this is 1. If you set it to 0 the number of token ranges is adjusted to the maximum supported by node (see max_repair_ranges_in_parallel in Scylla logs). Valid values are 0 and integers >= 1. Higher values will result in increased cluster load and slightly faster repairs. Changing the intensity impacts repair granularity if you need to resume it, the higher the value the more work on resume. For Scylla clusters that DO NOT SUPPORT ROW-LEVEL REPAIR, intensity can be a decimal between (0,1). In that case it specifies percent of shards that can be repaired in parallel on a repair master node. For Scylla clusters that are row-level repair enabled, setting intensity below 1 has the same effect as setting intensity 1.
type: string
interval:
description: Interval task schedule interval e.g. 3d2h10m, valid units are d, h, m, s (default "0").
type: string
Expand All @@ -1613,7 +1617,7 @@ spec:
format: int64
type: integer
parallel:
description: Parallel The maximum number of repair jobs to run in parallel, each node can participate in at most one repair at any given time. Default is means system will repair at maximum parallelism.
description: 'Parallel the maximum number of Scylla repair jobs that can run at the same time (on different token ranges and replicas). Each node can take part in at most one repair at any given moment. By default the maximum possible parallelism is used. The effective parallelism depends on a keyspace replication factor (RF) and the number of nodes. The formula to calculate it is as follows: number of nodes / RF, ex. for 6 node cluster with RF=3 the maximum parallelism is 2.'
format: int64
type: integer
smallTableThreshold:
Expand Down
15 changes: 11 additions & 4 deletions docs/source/scylla_cluster_crd.md
Expand Up @@ -92,10 +92,17 @@ valid units are d, h, m, s (default "now").
* `numRetries` - the number of times a scheduled task will retry to run before failing (default 3).
* `dc` - list of datacenter glob patterns, e.g. `["dc1", "!otherdc*"]` used to specify the DCs to include or exclude from backup.
* `failFast` - stop repair on first error.
* `intensity` - integer >= 1 or a decimal between (0,1), higher values may result in higher speed and cluster load.
0 value means repair at maximum intensity.
* `parallel` - The maximum number of repair jobs to run in parallel, each node can participate in at most one repair
at any given time. Default is means system will repair at maximum parallelism.
* `intensity` - specifies how many token ranges (per shard) to repair in a single Scylla repair job. By default this is 1.
If you set it to 0 the number of token ranges is adjusted to the maximum supported by node (see max_repair_ranges_in_parallel in Scylla logs).
Valid values are 0 and integers >= 1. Higher values will result in increased cluster load and slightly faster repairs.
Changing the intensity impacts repair granularity if you need to resume it, the higher the value the more work on resume.
For Scylla clusters that **do not support row-level repair**, intensity can be a decimal between (0,1).
In that case it specifies percent of shards that can be repaired in parallel on a repair master node.
For Scylla clusters that are row-level repair enabled, setting intensity below 1 has the same effect as setting intensity 1.
* `parallel` - specifies the maximum number of Scylla repair jobs that can run at the same time (on different token ranges and replicas).
Each node can take part in at most one repair at any given moment. By default the maximum possible parallelism is used.
The effective parallelism depends on a keyspace replication factor (RF) and the number of nodes.
The formula to calculate it is as follows: number of nodes / RF, ex. for 6 node cluster with RF=3 the maximum parallelism is 2.
* `keyspace` - a list of keyspace/tables glob patterns, e.g. `["keyspace", "!keyspace.table_prefix_*"]`
used to include or exclude keyspaces from repair.
* `smallTableThreshold` - enable small table optimization for tables of size lower than given threshold.
Expand Down
20 changes: 12 additions & 8 deletions examples/common/operator.yaml
Expand Up @@ -1436,10 +1436,12 @@ spec:
failFast:
description: FailFast stop repair on first error.
type: boolean
host:
description: Host to repair, by default all hosts are repaired
type: string
intensity:
description: Intensity integer >= 1 or a decimal between (0,1), higher values may result in higher speed and cluster load. 0 value means repair at maximum intensity.
format: int64
type: integer
description: Intensity how many token ranges (per shard) to repair in a single Scylla repair job. By default this is 1. If you set it to 0 the number of token ranges is adjusted to the maximum supported by node (see max_repair_ranges_in_parallel in Scylla logs). Valid values are 0 and integers >= 1. Higher values will result in increased cluster load and slightly faster repairs. Changing the intensity impacts repair granularity if you need to resume it, the higher the value the more work on resume. For Scylla clusters that DO NOT SUPPORT ROW-LEVEL REPAIR, intensity can be a decimal between (0,1). In that case it specifies percent of shards that can be repaired in parallel on a repair master node. For Scylla clusters that are row-level repair enabled, setting intensity below 1 has the same effect as setting intensity 1.
type: string
interval:
description: Interval task schedule interval e.g. 3d2h10m, valid units are d, h, m, s (default "0").
type: string
Expand All @@ -1456,7 +1458,7 @@ spec:
format: int64
type: integer
parallel:
description: Parallel The maximum number of repair jobs to run in parallel, each node can participate in at most one repair at any given time. Default is means system will repair at maximum parallelism.
description: 'Parallel the maximum number of Scylla repair jobs that can run at the same time (on different token ranges and replicas). Each node can take part in at most one repair at any given moment. By default the maximum possible parallelism is used. The effective parallelism depends on a keyspace replication factor (RF) and the number of nodes. The formula to calculate it is as follows: number of nodes / RF, ex. for 6 node cluster with RF=3 the maximum parallelism is 2.'
format: int64
type: integer
smallTableThreshold:
Expand Down Expand Up @@ -1606,12 +1608,14 @@ spec:
failFast:
description: FailFast stop repair on first error.
type: boolean
host:
description: Host to repair, by default all hosts are repaired
type: string
id:
type: string
intensity:
description: Intensity integer >= 1 or a decimal between (0,1), higher values may result in higher speed and cluster load. 0 value means repair at maximum intensity.
format: int64
type: integer
description: Intensity how many token ranges (per shard) to repair in a single Scylla repair job. By default this is 1. If you set it to 0 the number of token ranges is adjusted to the maximum supported by node (see max_repair_ranges_in_parallel in Scylla logs). Valid values are 0 and integers >= 1. Higher values will result in increased cluster load and slightly faster repairs. Changing the intensity impacts repair granularity if you need to resume it, the higher the value the more work on resume. For Scylla clusters that DO NOT SUPPORT ROW-LEVEL REPAIR, intensity can be a decimal between (0,1). In that case it specifies percent of shards that can be repaired in parallel on a repair master node. For Scylla clusters that are row-level repair enabled, setting intensity below 1 has the same effect as setting intensity 1.
type: string
interval:
description: Interval task schedule interval e.g. 3d2h10m, valid units are d, h, m, s (default "0").
type: string
Expand All @@ -1628,7 +1632,7 @@ spec:
format: int64
type: integer
parallel:
description: Parallel The maximum number of repair jobs to run in parallel, each node can participate in at most one repair at any given time. Default is means system will repair at maximum parallelism.
description: 'Parallel the maximum number of Scylla repair jobs that can run at the same time (on different token ranges and replicas). Each node can take part in at most one repair at any given moment. By default the maximum possible parallelism is used. The effective parallelism depends on a keyspace replication factor (RF) and the number of nodes. The formula to calculate it is as follows: number of nodes / RF, ex. for 6 node cluster with RF=3 the maximum parallelism is 2.'
format: int64
type: integer
smallTableThreshold:
Expand Down
19 changes: 14 additions & 5 deletions pkg/api/v1alpha1/cluster_types.go
Expand Up @@ -83,18 +83,27 @@ type RepairTaskSpec struct {
DC []string `json:"dc,omitempty" mapstructure:"dc,omitempty"`
// FailFast stop repair on first error.
FailFast *bool `json:"failFast,omitempty" mapstructure:"fail_fast,omitempty"`
// Intensity integer >= 1 or a decimal between (0,1), higher values may result in higher speed and cluster load.
// 0 value means repair at maximum intensity.
Intensity *int64 `json:"intensity,omitempty" mapstructure:"intensity,omitempty"`
// Parallel The maximum number of repair jobs to run in parallel, each node can participate in at most one repair
// at any given time. Default is means system will repair at maximum parallelism.
// Intensity how many token ranges (per shard) to repair in a single Scylla repair job. By default this is 1.
// If you set it to 0 the number of token ranges is adjusted to the maximum supported by node (see max_repair_ranges_in_parallel in Scylla logs).
// Valid values are 0 and integers >= 1. Higher values will result in increased cluster load and slightly faster repairs.
// Changing the intensity impacts repair granularity if you need to resume it, the higher the value the more work on resume.
// For Scylla clusters that *do not support row-level repair*, intensity can be a decimal between (0,1).
// In that case it specifies percent of shards that can be repaired in parallel on a repair master node.
// For Scylla clusters that are row-level repair enabled, setting intensity below 1 has the same effect as setting intensity 1.
Intensity *string `json:"intensity,omitempty" mapstructure:"intensity,omitempty"`
// Parallel the maximum number of Scylla repair jobs that can run at the same time (on different token ranges and replicas).
// Each node can take part in at most one repair at any given moment. By default the maximum possible parallelism is used.
// The effective parallelism depends on a keyspace replication factor (RF) and the number of nodes.
// The formula to calculate it is as follows: number of nodes / RF, ex. for 6 node cluster with RF=3 the maximum parallelism is 2.
Parallel *int64 `json:"parallel,omitempty" mapstructure:"parallel,omitempty"`
// Keyspace a list of keyspace/tables glob patterns, e.g. 'keyspace,!keyspace.table_prefix_*'
// used to include or exclude keyspaces from repair.
Keyspace []string `json:"keyspace,omitempty" mapstructure:"keyspace,omitempty"`
// SmallTableThreshold enable small table optimization for tables of size lower than given threshold.
// Supported units [B, MiB, GiB, TiB] (default "1GiB").
SmallTableThreshold *string `json:"smallTableThreshold,omitempty" mapstructure:"small_table_threshold,omitempty"`
// Host to repair, by default all hosts are repaired
Host *string `json:"host,omitempty" mapstructure:"host,omitempty"`
}

type BackupTaskSpec struct {
Expand Down
10 changes: 10 additions & 0 deletions pkg/api/v1alpha1/cluster_validation.go
Expand Up @@ -2,6 +2,7 @@ package v1alpha1

import (
"reflect"
"strconv"

"github.com/blang/semver"
"github.com/pkg/errors"
Expand Down Expand Up @@ -84,6 +85,15 @@ func checkValues(c *ScyllaCluster) error {
}
}

for _, r := range c.Spec.Repairs {
if r.Intensity != nil {
_, err := strconv.ParseFloat(*r.Intensity, 64)
if err != nil {
return errors.Errorf("invalid intensity %q in %q repair task, it must be a float value", *r.Intensity, r.Name)
}
}
}

return nil
}

Expand Down
12 changes: 11 additions & 1 deletion pkg/api/v1alpha1/cluster_validation_test.go
Expand Up @@ -8,10 +8,10 @@ import (
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/utils/pointer"
)

func TestCheckValues(t *testing.T) {

validCluster := unit.NewSingleRackCluster(3)
validCluster.Spec.Datacenter.Racks[0].Resources = corev1.ResourceRequirements{
Limits: map[corev1.ResourceName]resource.Quantity{
Expand All @@ -23,6 +23,11 @@ func TestCheckValues(t *testing.T) {
sameName := validCluster.DeepCopy()
sameName.Spec.Datacenter.Racks = append(sameName.Spec.Datacenter.Racks, sameName.Spec.Datacenter.Racks[0])

invalidIntensity := validCluster.DeepCopy()
invalidIntensity.Spec.Repairs = append(invalidIntensity.Spec.Repairs, v1alpha1.RepairTaskSpec{
Intensity: pointer.StringPtr("100Mib"),
})

tests := []struct {
name string
obj *v1alpha1.ScyllaCluster
Expand All @@ -38,6 +43,11 @@ func TestCheckValues(t *testing.T) {
obj: sameName,
allowed: false,
},
{
name: "invalid intensity in repair task spec",
obj: invalidIntensity,
allowed: false,
},
}

for _, test := range tests {
Expand Down
6 changes: 6 additions & 0 deletions pkg/api/v1alpha1/cluster_webhook.go
Expand Up @@ -73,6 +73,12 @@ func (c *ScyllaCluster) Default() {
if repairTask.SmallTableThreshold == nil {
c.Spec.Repairs[i].SmallTableThreshold = pointer.StringPtr("1GiB")
}
if repairTask.Intensity == nil {
c.Spec.Repairs[i].Intensity = pointer.StringPtr("1")
}
if repairTask.Parallel == nil {
c.Spec.Repairs[i].Parallel = pointer.Int64Ptr(0)
}
}

for i, backupTask := range c.Spec.Backups {
Expand Down
7 changes: 6 additions & 1 deletion pkg/api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions pkg/controllers/manager/sync_test.go
Expand Up @@ -111,7 +111,7 @@ func TestManagerSynchronization(t *testing.T) {
},
DC: []string{"dc1"},
FailFast: pointer.BoolPtr(false),
Intensity: pointer.Int64Ptr(17),
Intensity: pointer.StringPtr("0.5"),
Keyspace: []string{"keyspace1"},
},
},
Expand Down Expand Up @@ -180,7 +180,7 @@ func TestManagerSynchronization(t *testing.T) {
SchedulerTaskSpec: v1alpha1.SchedulerTaskSpec{
Name: "repair",
},
Intensity: pointer.Int64Ptr(666),
Intensity: pointer.StringPtr("666"),
},
},
},
Expand All @@ -197,7 +197,7 @@ func TestManagerSynchronization(t *testing.T) {
SchedulerTaskSpec: v1alpha1.SchedulerTaskSpec{
Name: "repair",
},
Intensity: pointer.Int64Ptr(123),
Intensity: pointer.StringPtr("123"),
},
ID: "repair-id",
},
Expand All @@ -214,7 +214,7 @@ func TestManagerSynchronization(t *testing.T) {
SchedulerTaskSpec: v1alpha1.SchedulerTaskSpec{
Name: "repair",
},
Intensity: pointer.Int64Ptr(666),
Intensity: pointer.StringPtr("666"),
},
},
},
Expand All @@ -227,7 +227,7 @@ func TestManagerSynchronization(t *testing.T) {
SchedulerTaskSpec: v1alpha1.SchedulerTaskSpec{
Name: "repair",
},
Intensity: pointer.Int64Ptr(666),
Intensity: pointer.StringPtr("666"),
},
},
},
Expand All @@ -244,7 +244,7 @@ func TestManagerSynchronization(t *testing.T) {
SchedulerTaskSpec: v1alpha1.SchedulerTaskSpec{
Name: "repair",
},
Intensity: pointer.Int64Ptr(666),
Intensity: pointer.StringPtr("666"),
},
ID: "repair-id",
},
Expand Down

0 comments on commit 043eb7a

Please sign in to comment.