Skip to content

Commit

Permalink
Add an HA test for session failover (#1209)
Browse files Browse the repository at this point in the history
* Add an HA test

* Fix linter errors

* Check the term before failure
  • Loading branch information
adibrastegarnia committed Sep 14, 2020
1 parent 801bf22 commit 5ec7962
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 3 deletions.
2 changes: 2 additions & 0 deletions cmd/onos-config-tests/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@ import (
"github.com/onosproject/helmit/pkg/test"
"github.com/onosproject/onos-config/test/cli"
"github.com/onosproject/onos-config/test/gnmi"
"github.com/onosproject/onos-config/test/ha"
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
)

func main() {
registry.RegisterTestSuite("cli", &cli.TestSuite{})
registry.RegisterTestSuite("gnmi", &gnmi.TestSuite{})
registry.RegisterTestSuite("ha", &ha.TestSuite{})

test.Main()
}
1 change: 1 addition & 0 deletions pkg/southbound/synchronizer/device_update.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ func (s *Session) updateDevice(connectivity topodevice.ConnectivityState, channe
}

topoDevice.Attributes[mastershipTermKey] = strconv.FormatUint(uint64(s.mastershipState.Term), 10)
topoDevice.Attributes[mastershipMasterKey] = string(s.mastershipState.Master)
_, err = s.deviceStore.Update(topoDevice)
if err != nil {
log.Errorf("Device %s is not updated %s", id, err.Error())
Expand Down
7 changes: 4 additions & 3 deletions pkg/southbound/synchronizer/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,10 @@ import (
)

const (
backoffInterval = 10 * time.Millisecond
maxBackoffTime = 5 * time.Second
mastershipTermKey = "onos-config.mastership.term"
backoffInterval = 10 * time.Millisecond
maxBackoffTime = 5 * time.Second
mastershipTermKey = "onos-config.mastership.term"
mastershipMasterKey = "onos-config.mastership.master"
)

// Session a gNMI session
Expand Down
67 changes: 67 additions & 0 deletions test/ha/session_failover.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright 2020-present Open Networking Foundation.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package ha

import (
"strconv"
"testing"
"time"

"github.com/onosproject/onos-config/test/utils/gnmi"
hautils "github.com/onosproject/onos-config/test/utils/ha"
"github.com/onosproject/onos-topo/api/device"
"github.com/stretchr/testify/assert"
)

const (
mastershipTermKey = "onos-config.mastership.term"
mastershipMasterKey = "onos-config.mastership.master"
)

// TestSessionFailOver tests gnmi session failover is happening when the master node
// is crashed
func (s *TestSuite) TestSessionFailOver(t *testing.T) {
simulator := gnmi.CreateSimulator(t)
assert.NotNil(t, simulator)
var currentTerm int
var masterNode string
found := gnmi.WaitForDevice(t, func(d *device.Device) bool {
currentTerm, _ = strconv.Atoi(d.Attributes[mastershipTermKey])
masterNode = d.Attributes[mastershipMasterKey]
return currentTerm == 1 && len(d.Protocols) > 0 &&
d.Protocols[0].Protocol == device.Protocol_GNMI &&
d.Protocols[0].ConnectivityState == device.ConnectivityState_REACHABLE &&
d.Protocols[0].ChannelState == device.ChannelState_CONNECTED &&
d.Protocols[0].ServiceState == device.ServiceState_AVAILABLE
}, 5*time.Second)
assert.Equal(t, true, found)
assert.Equal(t, currentTerm, 1)
masterPod := hautils.FindPodWithPrefix(t, masterNode)
// Crash master pod
hautils.CrashPodOrFail(t, masterPod)

// Waits for a new master to be elected (i.e. the term will be increased), it establishes a connection to the device
// and updates the device state
found = gnmi.WaitForDevice(t, func(d *device.Device) bool {
currentTerm, _ = strconv.Atoi(d.Attributes[mastershipTermKey])
return currentTerm == 2 && len(d.Protocols) > 0 &&
d.Protocols[0].Protocol == device.Protocol_GNMI &&
d.Protocols[0].ConnectivityState == device.ConnectivityState_REACHABLE &&
d.Protocols[0].ChannelState == device.ChannelState_CONNECTED &&
d.Protocols[0].ServiceState == device.ServiceState_AVAILABLE
}, 70*time.Second)
assert.Equal(t, true, found)

}
74 changes: 74 additions & 0 deletions test/ha/suite.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Copyright 2020-present Open Networking Foundation.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package ha

import (
"github.com/onosproject/onos-test/pkg/onostest"

"github.com/onosproject/helmit/pkg/helm"
"github.com/onosproject/helmit/pkg/test"
)

type testSuite struct {
test.Suite
}

// TestSuite is the onos-config CLI test suite
type TestSuite struct {
testSuite
}

const onosComponentName = "onos-config"
const testName = "ha-test"

// SetupTestSuite sets up the onos-config CLI test suite
func (s *TestSuite) SetupTestSuite() error {
err := helm.Chart(onostest.ControllerChartName, onostest.AtomixChartRepo).
Release(onostest.AtomixName(testName, onosComponentName)).
Set("scope", "Namespace").
Install(true)
if err != nil {
return err
}

err = helm.Chart(onostest.RaftStorageControllerChartName, onostest.AtomixChartRepo).
Release(onostest.RaftReleaseName(onosComponentName)).
Set("scope", "Namespace").
Install(true)
if err != nil {
return err
}

err = helm.Chart("onos-topo", onostest.OnosChartRepo).
Release("onos-topo").
Set("image.tag", "latest").
Set("storage.controller", onostest.AtomixController(testName, onosComponentName)).
Install(true)
if err != nil {
return err
}

err = helm.Chart("onos-config", onostest.OnosChartRepo).
Release("onos-config").
Set("image.tag", "latest").
Set("replicaCount", "2").
Set("storage.controller", onostest.AtomixController(testName, onosComponentName)).
Install(true)
if err != nil {
return err
}

return nil
}
61 changes: 61 additions & 0 deletions test/utils/ha/utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright 2020-present Open Networking Foundation.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package ha

import (
"strings"
"testing"

"github.com/onosproject/helmit/pkg/helm"
"github.com/onosproject/helmit/pkg/kubernetes"
v1 "github.com/onosproject/helmit/pkg/kubernetes/core/v1"
"github.com/stretchr/testify/assert"
)

const (
onosComponentName = "onos-config"
)

// GetPodListOrFail gets the list of pods active in the onos-config release. The test is failed if getting the list returns
// an error.
func GetPodListOrFail(t *testing.T) []*v1.Pod {
release := helm.Chart(onosComponentName).Release(onosComponentName)
client := kubernetes.NewForReleaseOrDie(release)
podList, err := client.
CoreV1().
Pods().
List()
assert.NoError(t, err)
return podList
}

// CrashPodOrFail deletes the given pod and fails the test if there is an error
func CrashPodOrFail(t *testing.T, pod *v1.Pod) {
err := pod.Delete()
assert.NoError(t, err)
}

// FindPodWithPrefix looks for the first pod whose name matches the given prefix string. The test is failed
// if no matching pod is found.
func FindPodWithPrefix(t *testing.T, prefix string) *v1.Pod {
podList := GetPodListOrFail(t)
for _, p := range podList {
if strings.HasPrefix(p.Name, prefix) {
return p
}
}
assert.Failf(t, "No pod found matching %s", prefix)
return nil
}

0 comments on commit 5ec7962

Please sign in to comment.