Skip to content

Commit

Permalink
Remove KeepaliveTimeout from entities (#2045)
Browse files Browse the repository at this point in the history
Signed-off-by: Simon Plourde <simon@sensu.io>
  • Loading branch information
palourde committed Sep 12, 2018
1 parent 0de35c2 commit c26adb8
Show file tree
Hide file tree
Showing 14 changed files with 122 additions and 148 deletions.
11 changes: 8 additions & 3 deletions CHANGELOG.md
Expand Up @@ -12,23 +12,28 @@ Versioning](http://semver.org/spec/v2.0.0.html).

### Changed
- Changed sensuctl title colour to use terminal's configured default for bold
text.
text.
- The backend no longer forcibly binds to localhost.
- Keepalive intervals and timeouts are now configured in the check object of
keepalive events.
- The sensu-agent binary is now located at ./cmd/sensu-agent.
- Sensuctl no longer uses auto text wrapping.

### Fixed
- Fixes a bug in `sensuctl cluster health` so the correct error is handled.
- Fixed a bug where assets could not extract git tarballs.
- Fixed a bug where assets would not install if given cache directory was a
relative path.
relative path.
- Fixed a bug where an agent's collection of system information could delay
sending of keepalive messages.
sending of keepalive messages.
- Fixed a bug in nagios perfdata parsing.
- Etcd client URLs can now be a comma-separated list.
- Fixed a bug where output metric format could not be unset.
- Fixed a bug where the agent does not validate the ID at startup.

### Breaking Changes
- Removed the KeepaliveTimeout attribute from entities.

## [2.0.0-beta.4] - 2018-08-14

### Added
Expand Down
19 changes: 11 additions & 8 deletions agent/agent.go
Expand Up @@ -46,8 +46,6 @@ const (
DefaultEnvironment = "default"
// DefaultKeepaliveInterval specifies the default keepalive interval
DefaultKeepaliveInterval = 20
// DefaultKeepaliveTimeout specifies the default keepalive timeout
DefaultKeepaliveTimeout = 120
// DefaultOrganization specifies the default organization
DefaultOrganization = "default"
// DefaultPassword specifies the default password
Expand Down Expand Up @@ -92,10 +90,11 @@ type Config struct {
// start
ExtendedAttributes []byte
// KeepaliveInterval is the interval, in seconds, when agents will send a
// keepalive to sensu-backend. Default: 60
KeepaliveInterval int
// keepalive to sensu-backend.
KeepaliveInterval uint32
// KeepaliveTimeout is the time after which a sensu-agent is considered dead
// by the backend.
// by the backend. See DefaultKeepaliveTimeout in types package for default
// value.
KeepaliveTimeout uint32
// Organization sets the Agent's RBAC organization identifier
Organization string
Expand Down Expand Up @@ -143,7 +142,7 @@ func FixtureConfig() *Config {
CacheDir: path.SystemCacheDir("sensu-agent"),
Environment: DefaultEnvironment,
KeepaliveInterval: DefaultKeepaliveInterval,
KeepaliveTimeout: DefaultKeepaliveTimeout,
KeepaliveTimeout: types.DefaultKeepaliveTimeout,
Organization: DefaultOrganization,
Password: DefaultPassword,
Socket: &SocketConfig{
Expand Down Expand Up @@ -381,15 +380,19 @@ func (a *Agent) sendKeepalive() error {
}
keepalive := &types.Event{}

keepalive.Check = &types.Check{
Name: "keepalive",
Interval: a.config.KeepaliveInterval,
Timeout: a.config.KeepaliveTimeout,
}
keepalive.Entity = a.getAgentEntity()

keepalive.Timestamp = time.Now().Unix()

msgBytes, err := json.Marshal(keepalive)
if err != nil {
return err
}
msg.Payload = msgBytes

a.sendq <- msg

return nil
Expand Down
19 changes: 9 additions & 10 deletions agent/entity.go
Expand Up @@ -11,16 +11,15 @@ import (
func (a *Agent) getAgentEntity() *types.Entity {
if a.entity == nil {
e := &types.Entity{
Class: types.EntityAgentClass,
Deregister: a.config.Deregister,
Environment: a.config.Environment,
ID: a.config.AgentID,
KeepaliveTimeout: a.config.KeepaliveTimeout,
LastSeen: time.Now().Unix(),
Organization: a.config.Organization,
Redact: a.config.Redact,
Subscriptions: a.config.Subscriptions,
User: a.config.User,
Class: types.EntityAgentClass,
Deregister: a.config.Deregister,
Environment: a.config.Environment,
ID: a.config.AgentID,
LastSeen: time.Now().Unix(),
Organization: a.config.Organization,
Redact: a.config.Redact,
Subscriptions: a.config.Subscriptions,
User: a.config.User,
}

if a.config.DeregistrationHandler != "" {
Expand Down
2 changes: 1 addition & 1 deletion backend/keepalived/deregisterer.go
Expand Up @@ -61,7 +61,7 @@ func (adapterPtr *Deregistration) Deregister(entity *types.Entity) error {
if entity.Deregistration.Handler != "" {
deregistrationCheck := &types.Check{
Name: "deregistration",
Interval: entity.KeepaliveTimeout,
Interval: 1,
Subscriptions: []string{""},
Command: "",
Handlers: []string{entity.Deregistration.Handler},
Expand Down
2 changes: 1 addition & 1 deletion backend/keepalived/integration_test.go
Expand Up @@ -65,9 +65,9 @@ func TestKeepaliveMonitor(t *testing.T) {
}

entity := types.FixtureEntity("entity1")
entity.KeepaliveTimeout = 1

keepalive := &types.Event{
Check: &types.Check{Timeout: 1},
Entity: entity,
Timestamp: time.Now().Unix(),
}
Expand Down
31 changes: 17 additions & 14 deletions backend/keepalived/keepalived.go
Expand Up @@ -16,10 +16,6 @@ const (
// handling keepalive events.
DefaultHandlerCount = 10

// DefaultKeepaliveTimeout is the amount of time we consider a Keepalive
// valid for.
DefaultKeepaliveTimeout = 120 // seconds

// KeepaliveCheckName is the name of the check that is created when a
// keepalive timeout occurs.
KeepaliveCheckName = "keepalive"
Expand Down Expand Up @@ -217,7 +213,13 @@ func (k *Keepalived) processKeepalives() {
logger.WithError(err).Error("error handling entity registration")
}

timeout := int64(entity.KeepaliveTimeout)
// Retrieve the keepalive timeout or use a default value in case an older
// agent version was used, since entity.KeepaliveTimeout no longer exist
timeout := int64(types.DefaultKeepaliveTimeout)
if event.Check != nil {
timeout = int64(event.Check.Timeout)
}

supervisor := k.monitorFactory(k)
if err := supervisor.Monitor(context.TODO(), entity.ID, event, timeout); err != nil {
logger.WithError(err).Error("error monitoring entity")
Expand Down Expand Up @@ -253,13 +255,14 @@ func (k *Keepalived) handleEntityRegistration(entity *types.Entity) error {
return err
}

func createKeepaliveEvent(entity *types.Entity) *types.Event {
func createKeepaliveEvent(rawEvent *types.Event) *types.Event {
keepaliveCheck := &types.Check{
Name: KeepaliveCheckName,
Interval: entity.KeepaliveTimeout,
Interval: rawEvent.Check.Interval,
Timeout: rawEvent.Check.Timeout,
Handlers: []string{KeepaliveHandlerName},
Environment: entity.Environment,
Organization: entity.Organization,
Environment: rawEvent.Entity.Environment,
Organization: rawEvent.Entity.Organization,
Status: 1,
Issued: time.Now().Unix(),
History: []types.CheckHistory{
Expand All @@ -271,7 +274,7 @@ func createKeepaliveEvent(entity *types.Entity) *types.Event {
}
keepaliveEvent := &types.Event{
Timestamp: time.Now().Unix(),
Entity: entity,
Entity: rawEvent.Entity,
Check: keepaliveCheck,
}

Expand All @@ -281,7 +284,7 @@ func createKeepaliveEvent(entity *types.Entity) *types.Event {
func createRegistrationEvent(entity *types.Entity) *types.Event {
registrationCheck := &types.Check{
Name: RegistrationCheckName,
Interval: entity.KeepaliveTimeout,
Interval: 1,
Handlers: []string{RegistrationHandlerName},
Environment: entity.Environment,
Organization: entity.Organization,
Expand Down Expand Up @@ -312,7 +315,7 @@ func (k *Keepalived) handleUpdate(e *types.Event) error {
logger.WithError(err).Error("error updating entity in store")
return err
}
event := createKeepaliveEvent(entity)
event := createKeepaliveEvent(e)
event.Check.Status = 0
return k.bus.Publish(messaging.TopicEventRaw, event)
}
Expand All @@ -335,13 +338,13 @@ func (k *Keepalived) HandleFailure(e *types.Event) error {
}

// this is a real keepalive event, emit it.
event := createKeepaliveEvent(entity)
event := createKeepaliveEvent(e)
event.Check.Status = 1
if err := k.bus.Publish(messaging.TopicEventRaw, event); err != nil {
return err
}

logger.WithField("entity", entity.GetID()).Info("keepalive timed out, creating keepalive event for entity")
timeout := time.Now().Unix() + int64(entity.KeepaliveTimeout)
timeout := time.Now().Unix() + int64(e.Check.Timeout)
return k.store.UpdateFailingKeepalive(ctx, entity, timeout)
}
6 changes: 3 additions & 3 deletions backend/keepalived/keepalived_test.go
Expand Up @@ -242,10 +242,10 @@ func TestProcessRegistration(t *testing.T) {
}

func TestCreateKeepaliveEvent(t *testing.T) {
entity := types.FixtureEntity("entity1")
keepaliveEvent := createKeepaliveEvent(entity)
event := types.FixtureEvent("entity1", "keepalive")
keepaliveEvent := createKeepaliveEvent(event)
assert.Equal(t, "keepalive", keepaliveEvent.Check.Name)
assert.Equal(t, uint32(120), keepaliveEvent.Check.Interval)
assert.Equal(t, uint32(60), keepaliveEvent.Check.Interval)
assert.Equal(t, []string{"keepalive"}, keepaliveEvent.Check.Handlers)
assert.Equal(t, uint32(1), keepaliveEvent.Check.Status)
assert.NotEqual(t, int64(0), keepaliveEvent.Check.Issued)
Expand Down
3 changes: 2 additions & 1 deletion benchmark/cmd/main.go
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/google/uuid"
"github.com/sensu/sensu-go/agent"
"github.com/sensu/sensu-go/types"
"github.com/sensu/sensu-go/util/path"
)

Expand Down Expand Up @@ -40,7 +41,7 @@ func main() {
cfg.Environment = agent.DefaultEnvironment
cfg.ExtendedAttributes = []byte{}
cfg.KeepaliveInterval = agent.DefaultKeepaliveInterval
cfg.KeepaliveTimeout = agent.DefaultKeepaliveTimeout
cfg.KeepaliveTimeout = types.DefaultKeepaliveTimeout
cfg.Organization = agent.DefaultOrganization
cfg.Password = agent.DefaultPassword
cfg.Socket.Host = agent.DefaultAPIHost
Expand Down
5 changes: 3 additions & 2 deletions cmd/sensu-agent/start.go
Expand Up @@ -11,6 +11,7 @@ import (
"syscall"

"github.com/sensu/sensu-go/agent"
"github.com/sensu/sensu-go/types"
"github.com/sensu/sensu-go/types/dynamic"
"github.com/sensu/sensu-go/util/path"
"github.com/sensu/sensu-go/util/url"
Expand Down Expand Up @@ -126,7 +127,7 @@ func newStartCommand() *cobra.Command {
cfg.DeregistrationHandler = viper.GetString(flagDeregistrationHandler)
cfg.Environment = viper.GetString(flagEnvironment)
cfg.ExtendedAttributes = []byte(viper.GetString(flagExtendedAttributes))
cfg.KeepaliveInterval = viper.GetInt(flagKeepaliveInterval)
cfg.KeepaliveInterval = uint32(viper.GetInt(flagKeepaliveInterval))
cfg.KeepaliveTimeout = uint32(viper.GetInt(flagKeepaliveTimeout))
cfg.Organization = viper.GetString(flagOrganization)
cfg.Password = viper.GetString(flagPassword)
Expand Down Expand Up @@ -228,7 +229,7 @@ func newStartCommand() *cobra.Command {
viper.SetDefault(flagDeregistrationHandler, "")
viper.SetDefault(flagEnvironment, agent.DefaultEnvironment)
viper.SetDefault(flagKeepaliveInterval, agent.DefaultKeepaliveInterval)
viper.SetDefault(flagKeepaliveTimeout, agent.DefaultKeepaliveTimeout)
viper.SetDefault(flagKeepaliveTimeout, types.DefaultKeepaliveTimeout)
viper.SetDefault(flagOrganization, agent.DefaultOrganization)
viper.SetDefault(flagPassword, agent.DefaultPassword)
viper.SetDefault(flagRedact, dynamic.DefaultRedactFields)
Expand Down
11 changes: 5 additions & 6 deletions types/entity.go
Expand Up @@ -74,12 +74,11 @@ func GetEntitySubscription(entityID string) string {
// FixtureEntity returns a testing fixture for an Entity object.
func FixtureEntity(id string) *Entity {
return &Entity{
ID: id,
Class: "host",
Subscriptions: []string{"linux"},
Environment: "default",
Organization: "default",
KeepaliveTimeout: 120,
ID: id,
Class: "host",
Subscriptions: []string{"linux"},
Environment: "default",
Organization: "default",
}
}

Expand Down

0 comments on commit c26adb8

Please sign in to comment.