Skip to content

Commit

Permalink
cluster: major upgrade procedure (#51)
Browse files Browse the repository at this point in the history
Current patch version upgrade procedure is ran when cluster is being
upgraded to next patch version.
New major upgrade procedure handles all others upgrades.

Procedure:
* Check if the cluster has schema agreement (using API call)
* Take `system` and `system_schema` tables snapshot on all nodes in
parallel.

For each node:

* Drain node
* Backup the data - snapshot of all data keyspaces
* Update Scylla image by restarting Pod
* Validate if node is up and version is updated via API call
* Clear data snapshot

After last node:

* Delete `system` and `system_schema` table snapshots on all nodes in
parallel

Fixes #51
  • Loading branch information
zimnx committed Dec 11, 2020
1 parent 78a9f6f commit 4c88e11
Show file tree
Hide file tree
Showing 22 changed files with 1,761 additions and 151 deletions.
29 changes: 29 additions & 0 deletions config/operator/crd/bases/scylla.scylladb.com_scyllaclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1618,6 +1618,35 @@ spec:
- name
type: object
type: array
upgrade:
description: UpgradeStatus contains state of ongoing upgrade procedure.
properties:
currentNode:
description: CurrentNode node under upgrade.
type: string
currentRack:
description: CurrentRack rack under upgrade.
type: string
dataSnapshotTag:
description: SystemSnapshotTag snapshot tag of data keyspaces.
type: string
fromVersion:
description: FromVersion reflects from which version ScyllaCluster is being upgraded.
type: string
state:
description: State reflect current state machine state.
type: string
systemSnapshotTag:
description: SystemSnapshotTag snapshot tag of system keyspaces.
type: string
toVersion:
description: ToVersion reflects to which version ScyllaCluster is being upgraded.
type: string
required:
- fromVersion
- state
- toVersion
type: object
type: object
type: object
version: v1alpha1
Expand Down
29 changes: 29 additions & 0 deletions examples/common/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1633,6 +1633,35 @@ spec:
- name
type: object
type: array
upgrade:
description: UpgradeStatus contains state of ongoing upgrade procedure.
properties:
currentNode:
description: CurrentNode node under upgrade.
type: string
currentRack:
description: CurrentRack rack under upgrade.
type: string
dataSnapshotTag:
description: SystemSnapshotTag snapshot tag of data keyspaces.
type: string
fromVersion:
description: FromVersion reflects from which version ScyllaCluster is being upgraded.
type: string
state:
description: State reflect current state machine state.
type: string
systemSnapshotTag:
description: SystemSnapshotTag snapshot tag of system keyspaces.
type: string
toVersion:
description: ToVersion reflects to which version ScyllaCluster is being upgraded.
type: string
required:
- fromVersion
- state
- toVersion
type: object
type: object
type: object
version: v1alpha1
Expand Down
19 changes: 19 additions & 0 deletions pkg/api/v1alpha1/cluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,25 @@ type ClusterStatus struct {
ManagerID *string `json:"managerId,omitempty"`
Repairs []RepairTaskStatus `json:"repairs,omitempty"`
Backups []BackupTaskStatus `json:"backups,omitempty"`
Upgrade *UpgradeStatus `json:"upgrade,omitempty"`
}

// UpgradeStatus contains state of ongoing upgrade procedure.
type UpgradeStatus struct {
// State reflect current state machine state.
State string `json:"state"`
// CurrentNode node under upgrade.
CurrentNode string `json:"currentNode,omitempty"`
// CurrentRack rack under upgrade.
CurrentRack string `json:"currentRack,omitempty"`
// FromVersion reflects from which version ScyllaCluster is being upgraded.
FromVersion string `json:"fromVersion"`
// ToVersion reflects to which version ScyllaCluster is being upgraded.
ToVersion string `json:"toVersion"`
// SystemSnapshotTag snapshot tag of system keyspaces.
SystemSnapshotTag string `json:"systemSnapshotTag,omitempty"`
// SystemSnapshotTag snapshot tag of data keyspaces.
DataSnapshotTag string `json:"dataSnapshotTag,omitempty"`
}

// RackStatus is the status of a Scylla Rack
Expand Down
13 changes: 0 additions & 13 deletions pkg/api/v1alpha1/cluster_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,19 +60,6 @@ func checkValues(c *ScyllaCluster) error {
}

func checkTransitions(old, new *ScyllaCluster) error {
oldVersion, err := semver.Parse(old.Spec.Version)
if err != nil {
return errors.Errorf("invalid old semantic version, err=%s", err)
}
newVersion, err := semver.Parse(new.Spec.Version)
if err != nil {
return errors.Errorf("invalid new semantic version, err=%s", err)
}
// Check that version remained the same
if newVersion.Major != oldVersion.Major || newVersion.Minor != oldVersion.Minor {
return errors.Errorf("only upgrading of patch versions are supported")
}

// Check that repository remained the same
if !reflect.DeepEqual(old.Spec.Repository, new.Spec.Repository) {
return errors.Errorf("repository change is currently not supported, old=%v, new=%v", *old.Spec.Repository, *new.Spec.Repository)
Expand Down
4 changes: 2 additions & 2 deletions pkg/api/v1alpha1/cluster_validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,13 @@ func TestCheckTransitions(t *testing.T) {
name: "major version changed",
old: unit.NewSingleRackCluster(3),
new: unit.NewDetailedSingleRackCluster("test-cluster", "test-ns", "repo", "3.3.1", "test-dc", "test-rack", 3),
allowed: false,
allowed: true,
},
{
name: "minor version changed",
old: unit.NewSingleRackCluster(3),
new: unit.NewDetailedSingleRackCluster("test-cluster", "test-ns", "repo", "2.4.2", "test-dc", "test-rack", 3),
allowed: false,
allowed: true,
},
{
name: "patch version changed",
Expand Down
5 changes: 5 additions & 0 deletions pkg/controllers/cluster/actions/export_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// Copyright (C) 2017 ScyllaDB

package actions

type CQLSession = cqlSession
99 changes: 99 additions & 0 deletions pkg/controllers/cluster/actions/main_integration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// +build integration

// Copyright (C) 2017 ScyllaDB

package actions_test

import (
"context"
"fmt"
"os"
"testing"
"time"

"github.com/go-logr/zapr"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
"github.com/pkg/errors"
"github.com/scylladb/go-log"
"github.com/scylladb/scylla-operator/pkg/cmd/options"
"github.com/scylladb/scylla-operator/pkg/controllers/cluster"
"github.com/scylladb/scylla-operator/pkg/test/integration"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
"k8s.io/klog"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/envtest/printer"
)

var (
testEnv *integration.TestEnvironment
ctx = context.Background()
)

const (
retryInterval = 200 * time.Millisecond
timeout = 30 * time.Second
)

func TestMain(m *testing.M) {
ctx := log.WithNewTraceID(context.Background())
atom := zap.NewAtomicLevelAt(zapcore.DebugLevel)
logger, _ := log.NewProduction(log.Config{
Level: atom,
})
zlogger, _ := zap.NewDevelopment()
ctrl.SetLogger(zapr.NewLogger(zlogger))
klog.InitFlags(nil)
klog.SetOutput(os.Stdout)

logger.Info(ctx, "Creating test environment")
var err error
testEnv, err = integration.NewTestEnvironment(logger.Named("env"),
integration.WithPollRetryInterval(retryInterval),
integration.WithPollTimeout(timeout),
)
if err != nil {
panic(err)
}

logger.Info(ctx, "Starting test manager")
go func() {
if err := testEnv.StartManager(ctx); err != nil {
panic(fmt.Sprintf("Failed to start the envtest manager: %v", err))
}
}()

options.GetOperatorOptions().Image = "scylladb/scylla-operator"
defer func() {
options.GetOperatorOptions().Image = ""
}()

reconciler, err := cluster.New(ctx, testEnv.Manager, logger)
if err != nil {
panic(errors.Wrap(err, "create cluster reconciler"))
}
logger.Info(ctx, "Reconciler setup")
if err := reconciler.SetupWithManager(testEnv.Manager); err != nil {
panic(errors.Wrap(err, "setup cluster reconciler"))
}

logger.Info(ctx, "Starting tests")
// Run tests
code := m.Run()
logger.Info(ctx, "Tests done")
// Tearing down the test environment
if err := testEnv.Stop(); err != nil {
panic(fmt.Sprintf("Failed to stop the envtest: %v", err))
}

os.Exit(code)
}

func TestAPIs(t *testing.T) {
RegisterFailHandler(Fail)

RunSpecsWithDefaultAndCustomReporters(t,
"Controller Suite",
[]Reporter{printer.NewlineReporter{}})
}

0 comments on commit 4c88e11

Please sign in to comment.