Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow to suspend chaoskube at certain weekdays #56

Merged
merged 7 commits into from Jan 22, 2018
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 16 additions & 0 deletions README.md
Expand Up @@ -139,6 +139,22 @@ spec:
...
```

## Limiting the Chaos

You can limit the time when chaos is introduced. To turn on this feature, add a comma-separated list of abbreviated weekdays via the `--excluded-weekdays` option and specify a `--timezone` in which to interpret those weekdays. Use `UTC`, `Local` or pick a timezone name from the [(IANA) tz database](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones). If you're testing `chaoskube` from your local machine then `Local` makes the most sense. Once you deploy `chaoskube` to your cluster you should deploy it with a specific timezone, e.g. where most of your team members are living, so that both your team and `chaoskube` have a common understanding when a particular weekday begins and ends. If your team is spread across multiple time zones it's probably best to pick `UTC` which is also the default. Picking the wrong timezone shifts the meaning of, e.g., Saturday by a couple of hours between you and the server.

## Flags

| Option | Description | Default |
|-----------------------|----------------------------------------------------------------------|------------------------|
| `--interval` | interval between pod terminations | 10m |
| `--labels` | label selector to filter pods by | (matches everything) |
| `--annotations` | annotation selector to filter pods by | (matches everything) |
| `--namespaces` | namespace selector to filter pods by | (all namespaces) |
| `--excluded-weekdays` | weekdays when chaos is to be suspended, e.g. "Sat,Sun" | (no weekday excluded) |
| `--timezone` | timezone from tz database, e.g. "America/New_York", "UTC" or "Local" | (UTC) |
| `--dry-run` | don't kill pods, only log what would have been done | true |

## Contributing

Feel free to create issues or submit pull requests.
36 changes: 28 additions & 8 deletions chaoskube/chaoskube.go
Expand Up @@ -4,6 +4,7 @@ import (
"errors"
"fmt"
"math/rand"
"time"

log "github.com/sirupsen/logrus"

Expand All @@ -24,12 +25,18 @@ type Chaoskube struct {
Annotations labels.Selector
// a namespace selector which restricts the pods to choose from
Namespaces labels.Selector
// a list of weekdays when termination is suspended
ExcludedWeekdays []time.Weekday
// the timezone to apply when detecting the current weekday
Timezone *time.Location
// an instance of logrus.StdLogger to write log messages to
Logger log.StdLogger
// dry run will not allow any pod terminations
DryRun bool
// seed value for the randomizer
Seed int64
// a function to retrieve the current time
Now func() time.Time
}

// ErrPodNotFound is returned when no victim could be found
Expand All @@ -38,18 +45,24 @@ var ErrPodNotFound = errors.New("pod not found")
// msgVictimNotFound is the log message when no victim was found
var msgVictimNotFound = "No victim could be found. If that's surprising double-check your selectors."

// msgWeekdayExcluded is the log message when termination is suspended due to the weekday filter
var msgWeekdayExcluded = "This day of the week is excluded from chaos."

// New returns a new instance of Chaoskube. It expects a kubernetes client, a
// label and namespace selector to reduce the amount of affected pods as well as
// whether to enable dryRun mode and a seed to seed the randomizer with.
func New(client kubernetes.Interface, labels, annotations, namespaces labels.Selector, logger log.StdLogger, dryRun bool, seed int64) *Chaoskube {
func New(client kubernetes.Interface, labels, annotations, namespaces labels.Selector, excludedWeekdays []time.Weekday, timezone *time.Location, logger log.StdLogger, dryRun bool, seed int64) *Chaoskube {
c := &Chaoskube{
Client: client,
Labels: labels,
Annotations: annotations,
Namespaces: namespaces,
Logger: logger,
DryRun: dryRun,
Seed: seed,
Client: client,
Labels: labels,
Annotations: annotations,
Namespaces: namespaces,
ExcludedWeekdays: excludedWeekdays,
Timezone: timezone,
Logger: logger,
DryRun: dryRun,
Seed: seed,
Now: time.Now,
}

rand.Seed(c.Seed)
Expand Down Expand Up @@ -109,6 +122,13 @@ func (c *Chaoskube) DeletePod(victim v1.Pod) error {

// TerminateVictim picks and deletes a victim if found.
func (c *Chaoskube) TerminateVictim() error {
for _, wd := range c.ExcludedWeekdays {
if wd == c.Now().In(c.Timezone).Weekday() {
c.Logger.Printf(msgWeekdayExcluded)
return nil
}
}

victim, err := c.Victim()
if err == ErrPodNotFound {
c.Logger.Printf(msgVictimNotFound)
Expand Down
120 changes: 100 additions & 20 deletions chaoskube/chaoskube_test.go
Expand Up @@ -5,6 +5,7 @@ import (
"log"
"strings"
"testing"
"time"

"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/kubernetes/fake"
Expand All @@ -22,8 +23,9 @@ func TestNew(t *testing.T) {
labelSelector, _ := labels.Parse("foo=bar")
annotations, _ := labels.Parse("baz=waldo")
namespaces, _ := labels.Parse("qux")
excludedWeekdays := []time.Weekday{time.Friday}

chaoskube := New(client, labelSelector, annotations, namespaces, logger, false, 42)
chaoskube := New(client, labelSelector, annotations, namespaces, excludedWeekdays, time.UTC, logger, false, 42)

if chaoskube == nil {
t.Errorf("expected Chaoskube but got nothing")
Expand All @@ -45,6 +47,18 @@ func TestNew(t *testing.T) {
t.Errorf("expected %s, got %s", "qux", chaoskube.Namespaces.String())
}

if len(chaoskube.ExcludedWeekdays) != 1 {
t.Fatalf("expected %d, got %d", 1, len(chaoskube.ExcludedWeekdays))
}

if chaoskube.ExcludedWeekdays[0] != time.Friday {
t.Errorf("expected %s, got %s", time.Friday.String(), chaoskube.ExcludedWeekdays[0].String())
}

if chaoskube.Timezone != time.UTC {
t.Errorf("expected %#v, got %#v", time.UTC, chaoskube.Timezone)
}

if chaoskube.Logger != logger {
t.Errorf("expected %#v, got %#v", logger, chaoskube.Logger)
}
Expand All @@ -60,7 +74,7 @@ func TestNew(t *testing.T) {

// TestCandidates tests the set of pods available for termination
func TestCandidates(t *testing.T) {
chaoskube := setup(t, labels.Everything(), labels.Everything(), labels.Everything(), false, 0)
chaoskube := setup(t, labels.Everything(), labels.Everything(), labels.Everything(), []time.Weekday{}, time.UTC, false, 0)

validateCandidates(t, chaoskube, []map[string]string{
{"namespace": "default", "name": "foo"},
Expand All @@ -76,7 +90,7 @@ func TestCandidatesLabelSelector(t *testing.T) {
t.Fatal(err)
}

chaoskube := setup(t, selector, labels.Everything(), labels.Everything(), false, 0)
chaoskube := setup(t, selector, labels.Everything(), labels.Everything(), []time.Weekday{}, time.UTC, false, 0)

validateCandidates(t, chaoskube, []map[string]string{
{"namespace": "default", "name": "foo"},
Expand All @@ -90,7 +104,7 @@ func TestCandidatesExcludingLabelSelector(t *testing.T) {
t.Fatal(err)
}

chaoskube := setup(t, selector, labels.Everything(), labels.Everything(), false, 0)
chaoskube := setup(t, selector, labels.Everything(), labels.Everything(), []time.Weekday{}, time.UTC, false, 0)

validateCandidates(t, chaoskube, []map[string]string{
{"namespace": "testing", "name": "bar"},
Expand All @@ -105,7 +119,7 @@ func TestCandidatesAnnotationSelector(t *testing.T) {
t.Fatal(err)
}

chaoskube := setup(t, labels.Everything(), selector, labels.Everything(), false, 0)
chaoskube := setup(t, labels.Everything(), selector, labels.Everything(), []time.Weekday{}, time.UTC, false, 0)

validateCandidates(t, chaoskube, []map[string]string{
{"namespace": "default", "name": "foo"},
Expand All @@ -119,7 +133,7 @@ func TestCandidatesExcludingAnnotationSelector(t *testing.T) {
t.Fatal(err)
}

chaoskube := setup(t, labels.Everything(), selector, labels.Everything(), false, 0)
chaoskube := setup(t, labels.Everything(), selector, labels.Everything(), []time.Weekday{}, time.UTC, false, 0)

validateCandidates(t, chaoskube, []map[string]string{
{"namespace": "testing", "name": "bar"},
Expand Down Expand Up @@ -149,15 +163,15 @@ func TestCandidatesNamespaces(t *testing.T) {
t.Fatal(err)
}

chaoskube := setup(t, labels.Everything(), labels.Everything(), namespaces, false, 0)
chaoskube := setup(t, labels.Everything(), labels.Everything(), namespaces, []time.Weekday{}, time.UTC, false, 0)

validateCandidates(t, chaoskube, test.pods)
}
}

// TestVictim tests that a pod is chosen from the candidates
func TestVictim(t *testing.T) {
chaoskube := setup(t, labels.Everything(), labels.Everything(), labels.Everything(), false, 2000)
chaoskube := setup(t, labels.Everything(), labels.Everything(), labels.Everything(), []time.Weekday{}, time.UTC, false, 2000)

validateVictim(t, chaoskube, map[string]string{
"namespace": "default", "name": "foo",
Expand All @@ -166,7 +180,7 @@ func TestVictim(t *testing.T) {

// TestAnotherVictim tests that the chosen victim is different for another seed
func TestAnotherVictim(t *testing.T) {
chaoskube := setup(t, labels.Everything(), labels.Everything(), labels.Everything(), false, 4000)
chaoskube := setup(t, labels.Everything(), labels.Everything(), labels.Everything(), []time.Weekday{}, time.UTC, false, 4000)

validateVictim(t, chaoskube, map[string]string{
"namespace": "testing", "name": "bar",
Expand All @@ -181,7 +195,7 @@ func TestAnotherVictimRespectsLabelSelector(t *testing.T) {
t.Fatal(err)
}

chaoskube := setup(t, selector, labels.Everything(), labels.Everything(), false, 4000)
chaoskube := setup(t, selector, labels.Everything(), labels.Everything(), []time.Weekday{}, time.UTC, false, 0)

validateVictim(t, chaoskube, map[string]string{
"namespace": "default", "name": "foo",
Expand All @@ -190,7 +204,7 @@ func TestAnotherVictimRespectsLabelSelector(t *testing.T) {

// TestNoVictimReturnsError tests that on missing victim it returns a known error
func TestNoVictimReturnsError(t *testing.T) {
chaoskube := New(fake.NewSimpleClientset(), labels.Everything(), labels.Everything(), labels.Everything(), logger, false, 2000)
chaoskube := New(fake.NewSimpleClientset(), labels.Everything(), labels.Everything(), labels.Everything(), []time.Weekday{}, time.UTC, logger, false, 0)

if _, err := chaoskube.Victim(); err != ErrPodNotFound {
t.Errorf("expected %#v, got %#v", ErrPodNotFound, err)
Expand All @@ -199,7 +213,7 @@ func TestNoVictimReturnsError(t *testing.T) {

// TestDeletePod tests deleting a particular pod
func TestDeletePod(t *testing.T) {
chaoskube := setup(t, labels.Everything(), labels.Everything(), labels.Everything(), false, 0)
chaoskube := setup(t, labels.Everything(), labels.Everything(), labels.Everything(), []time.Weekday{}, time.UTC, false, 0)

victim := util.NewPod("default", "foo")

Expand All @@ -216,7 +230,7 @@ func TestDeletePod(t *testing.T) {

// TestDeletePodDryRun tests that enabled dry run doesn't delete the pod
func TestDeletePodDryRun(t *testing.T) {
chaoskube := setup(t, labels.Everything(), labels.Everything(), labels.Everything(), true, 0)
chaoskube := setup(t, labels.Everything(), labels.Everything(), labels.Everything(), []time.Weekday{}, time.UTC, true, 0)

victim := util.NewPod("default", "foo")

Expand All @@ -232,21 +246,67 @@ func TestDeletePodDryRun(t *testing.T) {

// TestTerminateVictim tests that the correct victim pod is chosen and deleted
func TestTerminateVictim(t *testing.T) {
chaoskube := setup(t, labels.Everything(), labels.Everything(), labels.Everything(), false, 2000)
chaoskube := setup(t, labels.Everything(), labels.Everything(), labels.Everything(), []time.Weekday{}, time.UTC, false, 0)

if err := chaoskube.TerminateVictim(); err != nil {
t.Fatal(err)
}

validateCandidates(t, chaoskube, []map[string]string{
{"namespace": "testing", "name": "bar"},
})
validateCandidatesCount(t, chaoskube, 1)
}

// TestTerminateVictimRespectsExcludedWeekday tests that no victim is terminated when the current weekday is excluded.
func TestTerminateVictimRespectsExcludedWeekdays(t *testing.T) {
chaoskube := setup(t, labels.Everything(), labels.Everything(), labels.Everything(), []time.Weekday{time.Friday}, time.UTC, false, 0)

// simulate that it's a Friday in our test (UTC).
chaoskube.Now = ThankGodItsFriday{}.Now

if err := chaoskube.TerminateVictim(); err != nil {
t.Fatal(err)
}

validateCandidatesCount(t, chaoskube, 2)
validateLog(t, msgWeekdayExcluded)
}

// TestTerminateVictimOnNonExcludedWeekdays tests that victim is terminated when weekday filter doesn't match.
func TestTerminateVictimOnNonExcludedWeekdays(t *testing.T) {
chaoskube := setup(t, labels.Everything(), labels.Everything(), labels.Everything(), []time.Weekday{time.Friday}, time.UTC, false, 0)

// simulate that it's a Saturday in our test (UTC).
chaoskube.Now = func() time.Time { return ThankGodItsFriday{}.Now().Add(24 * time.Hour) }

if err := chaoskube.TerminateVictim(); err != nil {
t.Fatal(err)
}

validateCandidatesCount(t, chaoskube, 1)
}

// TestTerminateVictimRespectsTimezone tests that victim is terminated when weekday filter doesn't match due to different timezone.
func TestTerminateVictimRespectsTimezone(t *testing.T) {
timezone, err := time.LoadLocation("Australia/Brisbane")
if err != nil {
t.Fatal(err)
}

chaoskube := setup(t, labels.Everything(), labels.Everything(), labels.Everything(), []time.Weekday{time.Friday}, timezone, false, 0)

// simulate that it's a Friday in our test (UTC). However, in Australia it's already Saturday.
chaoskube.Now = ThankGodItsFriday{}.Now

if err := chaoskube.TerminateVictim(); err != nil {
t.Fatal(err)
}

validateCandidatesCount(t, chaoskube, 1)
}

// TestTerminateNoVictimLogsInfo tests that missing victim prints a log message
func TestTerminateNoVictimLogsInfo(t *testing.T) {
logOutput.Reset()
chaoskube := New(fake.NewSimpleClientset(), labels.Everything(), labels.Everything(), labels.Everything(), logger, false, 0)
chaoskube := New(fake.NewSimpleClientset(), labels.Everything(), labels.Everything(), labels.Everything(), []time.Weekday{}, time.UTC, logger, false, 0)

if err := chaoskube.TerminateVictim(); err != nil {
t.Fatal(err)
Expand All @@ -257,6 +317,17 @@ func TestTerminateNoVictimLogsInfo(t *testing.T) {

// helper functions

func validateCandidatesCount(t *testing.T, chaoskube *Chaoskube, expected int) {
pods, err := chaoskube.Candidates()
if err != nil {
t.Fatal(err)
}

if len(pods) != expected {
t.Errorf("expected %d pods, got %d pods", expected, len(pods))
}
}

func validateCandidates(t *testing.T, chaoskube *Chaoskube, expected []map[string]string) {
pods, err := chaoskube.Candidates()
if err != nil {
Expand Down Expand Up @@ -301,7 +372,7 @@ func validateLog(t *testing.T, msg string) {
}
}

func setup(t *testing.T, labelSelector labels.Selector, annotations labels.Selector, namespaces labels.Selector, dryRun bool, seed int64) *Chaoskube {
func setup(t *testing.T, labelSelector labels.Selector, annotations labels.Selector, namespaces labels.Selector, excludedWeekdays []time.Weekday, timezone *time.Location, dryRun bool, seed int64) *Chaoskube {
pods := []v1.Pod{
util.NewPod("default", "foo"),
util.NewPod("testing", "bar"),
Expand All @@ -317,5 +388,14 @@ func setup(t *testing.T, labelSelector labels.Selector, annotations labels.Selec

logOutput.Reset()

return New(client, labelSelector, annotations, namespaces, logger, dryRun, seed)
return New(client, labelSelector, annotations, namespaces, excludedWeekdays, timezone, logger, dryRun, seed)
}

// ThankGodItsFriday is a helper struct that contains a Now() function that always returns a Friday.
type ThankGodItsFriday struct{}

// Now returns a particular Friday.
func (t ThankGodItsFriday) Now() time.Time {
blackFriday, _ := time.Parse(time.RFC1123, "Fri, 24 Sep 1869 15:04:05 UTC")
return blackFriday
}