Skip to content

Commit

Permalink
Merge pull request #15313 from tlm/teardown-2.0
Browse files Browse the repository at this point in the history
#15313

With the introduction of sidecar charms and Pebble Juju had no logic to tear down charms. Specifically when a pod/unit inside of a Statefulset for Kubernetes was going away through the receiving of (SIGTERM) it would be given at most 5 seconds to run the charms tear down hooks. This also applied to cases of using `juju scale-application` as this subsequently just trigger a scale within Kubernetes.

Obviously in a lot of situations this isn't enough time for a charm to clean up after itself and push the state of the application into one that can be safely shut down. As part of this PR we are introducing the ability to use `juju scale-application` to now take individual units of an application into a "zombie" state by where tear down hooks are run and given an infinite amount of time to execute.

Once the desired number of units have entered a zombie/dead state then Juju will command the Kubernetes deployment resource to scale down.

To make sure that the zombie pod starts to be de-considered for traffic after tear down the charms readiness probe is set to failing.

This PR also fixes storage reattaching for sidecar applications.

Depends on juju/description#128

## Checklist

- [x] Code style: imports ordered, good names, simple structure, etc
- [x] Comments saying why design decisions were made
- [x] Go unit tests, with comments saying what you're testing
- [x] [Integration tests](https://github.com/juju/juju/tree/develop/tests), with comments saying what you're testing
- [x] [doc.go](https://discourse.charmhub.io/t/readme-in-packages/451) added or updated in changed packages

## QA steps

- create a sidecar charm that sleeps on remove hook
- bootstrap k8s
- `juju deploy mycharm`
- `juju scale-application 3`
- wait for units to settle
- `juju scale-application 0`
- wait for units to start running the remove hook
- `juju scale-application 3`
- observe that the units get removed/pods get removed, then the pods come back and new units come back.
- verify storage is reattached to the same unit
- verify all hooks have run before the unit and pod are removed

Things to also try:
- during scale down, delete a pod from the application, it should come back and continue the hooks.
- add a sleep in storage-attached or storage-detached hook and ensure the pod can be deleted and comes back during that hook and it reruns successfully.
- manually scale the stateful set in kubernetes, notice that the unwanted pods block in the init container.

## Documentation changes

N/A

## Bug reference

https://bugs.launchpad.net/juju/+bug/1951415
https://bugs.launchpad.net/juju/+bug/1995466
https://bugs.launchpad.net/juju/+bug/2018147
  • Loading branch information
jujubot committed May 29, 2023
2 parents c14bcb3 + 0ab55fd commit 243fd66
Show file tree
Hide file tree
Showing 128 changed files with 4,962 additions and 2,926 deletions.
6 changes: 6 additions & 0 deletions api/agent/caasapplication/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package caasapplication

import (
"github.com/juju/errors"
"github.com/juju/names/v4"

"github.com/juju/juju/api/base"
Expand Down Expand Up @@ -42,6 +43,11 @@ func (c *Client) UnitIntroduction(podName string, podUUID string) (*UnitConfig,
return nil, err
}
if err := result.Error; err != nil {
if params.IsCodeAlreadyExists(err) {
return nil, errors.AlreadyExists
} else if params.IsCodeNotAssigned(err) {
return nil, errors.NotAssigned
}
return nil, err
}
return &UnitConfig{
Expand Down
47 changes: 45 additions & 2 deletions api/agent/caasapplication/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
package caasapplication_test

import (
"errors"

"github.com/juju/errors"
"github.com/juju/names/v4"
"github.com/juju/testing"
jc "github.com/juju/testing/checkers"
Expand Down Expand Up @@ -76,6 +75,50 @@ func (s *provisionerSuite) TestUnitIntroductionFail(c *gc.C) {
c.Assert(called, jc.IsTrue)
}

func (s *provisionerSuite) TestUnitIntroductionFailAlreadyExists(c *gc.C) {
var called bool
client := newClient(func(objType string, version int, id, request string, a, result interface{}) error {
called = true
c.Assert(objType, gc.Equals, "CAASApplication")
c.Assert(id, gc.Equals, "")
c.Assert(request, gc.Equals, "UnitIntroduction")
c.Assert(a, gc.FitsTypeOf, params.CAASUnitIntroductionArgs{})
args := a.(params.CAASUnitIntroductionArgs)
c.Assert(args.PodName, gc.Equals, "pod-name")
c.Assert(args.PodUUID, gc.Equals, "pod-uuid")
c.Assert(result, gc.FitsTypeOf, &params.CAASUnitIntroductionResult{})
*(result.(*params.CAASUnitIntroductionResult)) = params.CAASUnitIntroductionResult{
Error: &params.Error{Code: params.CodeAlreadyExists},
}
return nil
})
_, err := client.UnitIntroduction("pod-name", "pod-uuid")
c.Assert(errors.Is(err, errors.AlreadyExists), jc.IsTrue)
c.Assert(called, jc.IsTrue)
}

func (s *provisionerSuite) TestUnitIntroductionFailNotAssigned(c *gc.C) {
var called bool
client := newClient(func(objType string, version int, id, request string, a, result interface{}) error {
called = true
c.Assert(objType, gc.Equals, "CAASApplication")
c.Assert(id, gc.Equals, "")
c.Assert(request, gc.Equals, "UnitIntroduction")
c.Assert(a, gc.FitsTypeOf, params.CAASUnitIntroductionArgs{})
args := a.(params.CAASUnitIntroductionArgs)
c.Assert(args.PodName, gc.Equals, "pod-name")
c.Assert(args.PodUUID, gc.Equals, "pod-uuid")
c.Assert(result, gc.FitsTypeOf, &params.CAASUnitIntroductionResult{})
*(result.(*params.CAASUnitIntroductionResult)) = params.CAASUnitIntroductionResult{
Error: &params.Error{Code: params.CodeNotAssigned},
}
return nil
})
_, err := client.UnitIntroduction("pod-name", "pod-uuid")
c.Assert(errors.Is(err, errors.NotAssigned), jc.IsTrue)
c.Assert(called, jc.IsTrue)
}

func (s *provisionerSuite) TestUnitTerminating(c *gc.C) {
tests := []struct {
willRestart bool
Expand Down
24 changes: 24 additions & 0 deletions api/agent/lifeflag/client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright 2023 Canonical Ltd.
// Licensed under the AGPLv3, see LICENCE file for details.

package lifeflag

import (
"github.com/juju/names/v4"

"github.com/juju/juju/api/base"
"github.com/juju/juju/api/common/lifeflag"
"github.com/juju/juju/core/life"
"github.com/juju/juju/core/watcher"
)

// Client is the client used for connecting to the life flag facade.
type Client interface {
Life(names.Tag) (life.Value, error)
Watch(names.Tag) (watcher.NotifyWatcher, error)
}

// NewClient creates a new life flag client.
func NewClient(caller base.APICaller) Client {
return lifeflag.NewClient(caller, "AgentLifeFlag")
}
37 changes: 37 additions & 0 deletions api/agent/uniter/state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,40 @@ func (s *stateSuite) TestOpenedMachinePortRangesByEndpoint(c *gc.C) {
},
})
}

func (s *stateSuite) TestUnitWorkloadVersion(c *gc.C) {
apiCaller := testing.APICallerFunc(func(objType string, version int, id, request string, arg, result interface{}) error {
c.Assert(objType, gc.Equals, "Uniter")
c.Assert(request, gc.Equals, "WorkloadVersion")
c.Assert(arg, gc.DeepEquals, params.Entities{Entities: []params.Entity{{Tag: "unit-mysql-0"}}})
c.Assert(result, gc.FitsTypeOf, &params.StringResults{})
*(result.(*params.StringResults)) = params.StringResults{
Results: []params.StringResult{{Result: "mysql-1.2.3"}},
}
return nil
})
caller := testing.BestVersionCaller{apiCaller, 17}
client := uniter.NewState(caller, names.NewUnitTag("mysql/0"))

workloadVersion, err := client.UnitWorkloadVersion(names.NewUnitTag("mysql/0"))
c.Assert(err, jc.ErrorIsNil)
c.Assert(workloadVersion, gc.Equals, "mysql-1.2.3")
}

func (s *stateSuite) TestSetUnitWorkloadVersion(c *gc.C) {
apiCaller := testing.APICallerFunc(func(objType string, version int, id, request string, arg, result interface{}) error {
c.Assert(objType, gc.Equals, "Uniter")
c.Assert(request, gc.Equals, "SetWorkloadVersion")
c.Assert(arg, gc.DeepEquals, params.EntityWorkloadVersions{Entities: []params.EntityWorkloadVersion{{Tag: "unit-mysql-0", WorkloadVersion: "mysql-1.2.3"}}})
c.Assert(result, gc.FitsTypeOf, &params.ErrorResults{})
*(result.(*params.ErrorResults)) = params.ErrorResults{
Results: []params.ErrorResult{{}},
}
return nil
})
caller := testing.BestVersionCaller{apiCaller, 17}
client := uniter.NewState(caller, names.NewUnitTag("mysql/0"))

err := client.SetUnitWorkloadVersion(names.NewUnitTag("mysql/0"), "mysql-1.2.3")
c.Assert(err, jc.ErrorIsNil)
}
3 changes: 2 additions & 1 deletion api/agent/uniter/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/juju/juju/api/base"
apiwatcher "github.com/juju/juju/api/watcher"
apiservererrors "github.com/juju/juju/apiserver/errors"
"github.com/juju/juju/core/watcher"
"github.com/juju/juju/rpc/params"
)
Expand Down Expand Up @@ -115,7 +116,7 @@ func (sa *StorageAccessor) StorageAttachment(storageTag names.StorageTag, unitTa
}
result := results.Results[0]
if result.Error != nil {
return params.StorageAttachment{}, result.Error
return params.StorageAttachment{}, apiservererrors.RestoreError(result.Error)
}
return result.Result, nil
}
Expand Down
37 changes: 37 additions & 0 deletions api/agent/uniter/uniter.go
Original file line number Diff line number Diff line change
Expand Up @@ -644,3 +644,40 @@ func (st *State) CloudSpec() (*params.CloudSpec, error) {
}
return result.Result, nil
}

// UnitWorkloadVersion returns the version of the workload reported by
// the specified unit.
func (st *State) UnitWorkloadVersion(tag names.UnitTag) (string, error) {
var results params.StringResults
args := params.Entities{
Entities: []params.Entity{{Tag: tag.String()}},
}
err := st.facade.FacadeCall("WorkloadVersion", args, &results)
if err != nil {
return "", err
}
if len(results.Results) != 1 {
return "", fmt.Errorf("expected 1 result, got %d", len(results.Results))
}
result := results.Results[0]
if result.Error != nil {
return "", result.Error
}
return result.Result, nil
}

// SetUnitWorkloadVersion sets the specified unit's workload version to
// the provided value.
func (st *State) SetUnitWorkloadVersion(tag names.UnitTag, version string) error {
var result params.ErrorResults
args := params.EntityWorkloadVersions{
Entities: []params.EntityWorkloadVersion{
{Tag: tag.String(), WorkloadVersion: version},
},
}
err := st.facade.FacadeCall("SetWorkloadVersion", args, &result)
if err != nil {
return err
}
return result.OneError()
}
41 changes: 20 additions & 21 deletions api/controller/lifeflag/facade.go → api/common/lifeflag/facade.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2016 Canonical Ltd.
// Copyright 2023 Canonical Ltd.
// Licensed under the AGPLv3, see LICENCE file for details.

package lifeflag
Expand All @@ -8,6 +8,7 @@ import (
"github.com/juju/names/v4"

"github.com/juju/juju/api/base"
apiwatcher "github.com/juju/juju/api/watcher"
"github.com/juju/juju/core/life"
"github.com/juju/juju/core/watcher"
"github.com/juju/juju/rpc/params"
Expand All @@ -16,21 +17,19 @@ import (
// NewWatcherFunc exists to let us test Watch properly.
type NewWatcherFunc func(base.APICaller, params.NotifyWatchResult) watcher.NotifyWatcher

// Facade makes calls to the LifeFlag facade.
type Facade struct {
caller base.FacadeCaller
newWatcher NewWatcherFunc
// Client makes calls to the LifeFlag facade.
type Client struct {
caller base.FacadeCaller
}

// NewFacade returns a new Facade using the supplied caller.
func NewFacade(caller base.APICaller, newWatcher NewWatcherFunc) *Facade {
return &Facade{
caller: base.NewFacadeCaller(caller, "LifeFlag"),
newWatcher: newWatcher,
// NewClient returns a new Facade using the supplied caller.
func NewClient(caller base.APICaller, facadeName string) *Client {
return &Client{
caller: base.NewFacadeCaller(caller, facadeName),
}
}

// ErrNotFound indicates that the requested entity no longer exists.
// ErrEntityNotFound indicates that the requested entity no longer exists.
//
// We avoid errors.NotFound, because errors.NotFound is non-specific, and
// it's our job to communicate *this specific condition*. There are many
Expand All @@ -41,17 +40,17 @@ func NewFacade(caller base.APICaller, newWatcher NewWatcherFunc) *Facade {
// We're still vulnerable to apiservers returning unjustified CodeNotFound
// but at least we're safe from accidental errors.NotFound injection in
// the api client mechanism.
var ErrNotFound = errors.New("entity not found")
const ErrEntityNotFound = errors.ConstError("entity not found")

// Watch returns a NotifyWatcher that sends a value whenever the
// entity's life value may have changed; or ErrNotFound; or some
// entity's life value may have changed; or ErrEntityNotFound; or some
// other error.
func (facade *Facade) Watch(entity names.Tag) (watcher.NotifyWatcher, error) {
func (c *Client) Watch(entity names.Tag) (watcher.NotifyWatcher, error) {
args := params.Entities{
Entities: []params.Entity{{Tag: entity.String()}},
}
var results params.NotifyWatchResults
err := facade.caller.FacadeCall("Watch", args, &results)
err := c.caller.FacadeCall("Watch", args, &results)
if err != nil {
return nil, errors.Trace(err)
}
Expand All @@ -61,22 +60,22 @@ func (facade *Facade) Watch(entity names.Tag) (watcher.NotifyWatcher, error) {
result := results.Results[0]
if err := result.Error; err != nil {
if params.IsCodeNotFound(err) {
return nil, ErrNotFound
return nil, ErrEntityNotFound
}
return nil, errors.Trace(result.Error)
}
w := facade.newWatcher(facade.caller.RawAPICaller(), result)
w := apiwatcher.NewNotifyWatcher(c.caller.RawAPICaller(), result)
return w, nil
}

// Life returns the entity's life value; or ErrNotFound; or some
// Life returns the entity's life value; or ErrEntityNotFound; or some
// other error.
func (facade *Facade) Life(entity names.Tag) (life.Value, error) {
func (c *Client) Life(entity names.Tag) (life.Value, error) {
args := params.Entities{
Entities: []params.Entity{{Tag: entity.String()}},
}
var results params.LifeResults
err := facade.caller.FacadeCall("Life", args, &results)
err := c.caller.FacadeCall("Life", args, &results)
if err != nil {
return "", errors.Trace(err)
}
Expand All @@ -86,7 +85,7 @@ func (facade *Facade) Life(entity names.Tag) (life.Value, error) {
result := results.Results[0]
if err := result.Error; err != nil {
if params.IsCodeNotFound(err) {
return "", ErrNotFound
return "", ErrEntityNotFound
}
return "", errors.Trace(result.Error)
}
Expand Down
Loading

0 comments on commit 243fd66

Please sign in to comment.