Skip to content

Commit

Permalink
fix: restart loop in windows service
Browse files Browse the repository at this point in the history
  • Loading branch information
cristianciutea committed Aug 25, 2021
1 parent d6b8a52 commit dd35e33
Show file tree
Hide file tree
Showing 9 changed files with 135 additions and 63 deletions.
77 changes: 66 additions & 11 deletions build/.goreleaser_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ builds:
main: ./cmd/newrelic-infra
binary: newrelic-infra
env:
- CGO_ENABLED=0
- CGO_ENABLED=1
goos:
- linux
ldflags:
Expand All @@ -22,7 +22,7 @@ builds:
main: ./cmd/newrelic-infra-ctl
binary: newrelic-infra-ctl
env:
- CGO_ENABLED=0
- CGO_ENABLED=1
goos:
- linux
ldflags:
Expand All @@ -35,7 +35,7 @@ builds:
main: ./cmd/newrelic-infra-service
binary: newrelic-infra-service
env:
- CGO_ENABLED=0
- CGO_ENABLED=1
goos:
- linux
ldflags:
Expand Down Expand Up @@ -133,17 +133,64 @@ builds:
main: ./cmd/newrelic-infra
binary: newrelic-infra
env:
- CGO_ENABLED=0
- CGO_ENABLED=1
goos:
- linux
ldflags:
- -s -w -X main.buildVersion={{.Version}}
- -s -w -X main.gitCommit={{.Commit}}
goarch:
- amd64
gomips:
- hardfloat

- id: linux-newrelic-infra-ctl
main: ./cmd/newrelic-infra-ctl
binary: newrelic-infra-ctl
env:
- CGO_ENABLED=1
goos:
- linux
ldflags:
- -s -w -X main.buildVersion={{.Version}}
- -s -w -X main.gitCommit={{.Commit}}
goarch:
- amd64
gomips:
- hardfloat

- id: linux-newrelic-infra-service
main: ./cmd/newrelic-infra-service
binary: newrelic-infra-service
env:
- CGO_ENABLED=1
goos:
- linux
ldflags:
- -s -w -X main.buildVersion={{.Version}}
- -s -w -X main.gitCommit={{.Commit}}
goarch:
- amd64
gomips:
- hardfloat

###
### architectures where CGO is not supported
###
- id: linux-newrelic-infra-no-cgo
main: ./cmd/newrelic-infra
binary: newrelic-infra
env:
- CGO_ENABLED=0
goos:
- linux
ldflags:
- -s -w -X main.buildVersion={{.Version}}
- -s -w -X main.gitCommit={{.Commit}}
goarch:
- 386
- arm
- arm64
- 386
- mips
- mips64
- mipsle
Expand All @@ -153,7 +200,7 @@ builds:
gomips:
- hardfloat

- id: linux-newrelic-infra-ctl
- id: linux-newrelic-infra-ctl-no-cgo
main: ./cmd/newrelic-infra-ctl
binary: newrelic-infra-ctl
env:
Expand All @@ -164,10 +211,9 @@ builds:
- -s -w -X main.buildVersion={{.Version}}
- -s -w -X main.gitCommit={{.Commit}}
goarch:
- amd64
- 386
- arm
- arm64
- 386
- mips
- mips64
- mipsle
Expand All @@ -177,7 +223,7 @@ builds:
gomips:
- hardfloat

- id: linux-newrelic-infra-service
- id: linux-newrelic-infra-service-no-cgo
main: ./cmd/newrelic-infra-service
binary: newrelic-infra-service
env:
Expand All @@ -188,10 +234,9 @@ builds:
- -s -w -X main.buildVersion={{.Version}}
- -s -w -X main.gitCommit={{.Commit}}
goarch:
- amd64
- 386
- arm
- arm64
- 386
- mips
- mips64
- mipsle
Expand All @@ -212,6 +257,16 @@ archives:
format: tar.gz
files:
- none*
- id: linux-tarball-infrastructure-agent-no-cgo
builds:
- linux-newrelic-infra-no-cgo
- linux-newrelic-infra-ctl-no-cgo
- linux-newrelic-infra-service-no-cgo
name_template: "newrelic-infra_{{.Os}}_{{ .Env.TAG }}_{{ .Arch }}_dirty"
wrap_in_directory: false
format: tar.gz
files:
- none*

nfpms:
###############################################################################
Expand Down
13 changes: 10 additions & 3 deletions build/container/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,16 @@ USE_BUILDX ?= false
DOCKER_PUBLISH ?= false

AGENT_ARCH ?= $(DOCKER_ARCH)
CGO_ENABLED := "-no-cgo"

ifeq ($(AGENT_ARCH), amd64)
CGO_ENABLED := ""

endif

ifeq ($(AGENT_ARCH), arm)
AGENT_ARCH := $(AGENT_ARCH)_6

endif

ifeq ($(DOCKER_PUBLISH), true)
Expand Down Expand Up @@ -125,9 +132,9 @@ base/get-integrations : embed-integrations
.PHONY : base/get-infra-agent
base/get-infra-agent : workspace
base/get-infra-agent:
@(cp $(AGENT_BUILD_TARGET_DIR)/$(GOOS)-newrelic-infra-ctl_$(GOOS)_$(AGENT_ARCH)/* $(PROJECT_WORKSPACE)/)
@(cp $(AGENT_BUILD_TARGET_DIR)/$(GOOS)-newrelic-infra-service_$(GOOS)_$(AGENT_ARCH)/* $(PROJECT_WORKSPACE)/)
@(cp $(AGENT_BUILD_TARGET_DIR)/$(GOOS)-newrelic-infra_$(GOOS)_$(AGENT_ARCH)/* $(PROJECT_WORKSPACE)/)
@(cp $(AGENT_BUILD_TARGET_DIR)/$(GOOS)-newrelic-infra-ctl$(CGO_ENABLED)_$(GOOS)_$(AGENT_ARCH)/* $(PROJECT_WORKSPACE)/)
@(cp $(AGENT_BUILD_TARGET_DIR)/$(GOOS)-newrelic-infra-service$(CGO_ENABLED)_$(GOOS)_$(AGENT_ARCH)/* $(PROJECT_WORKSPACE)/)
@(cp $(AGENT_BUILD_TARGET_DIR)/$(GOOS)-newrelic-infra$(CGO_ENABLED)_$(GOOS)_$(AGENT_ARCH)/* $(PROJECT_WORKSPACE)/)

.PHONY : build/base
build/base : workspace/assets
Expand Down
19 changes: 1 addition & 18 deletions cmd/newrelic-infra-service/newrelic-infra-service.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import (
"os"

"github.com/newrelic/infrastructure-agent/internal/agent/service"
"github.com/newrelic/infrastructure-agent/internal/os/api"
"github.com/newrelic/infrastructure-agent/pkg/log"
)

Expand All @@ -17,8 +16,7 @@ func main() {
log.Info("Creating service...")

// Create a native service wrapper for the agent and start it up.
exitCodeC := make(chan int, 1)
agentSvc, err := service.New(exitCodeC, os.Args...)
agentSvc, err := service.New(os.Args...)

if err != nil {
log.WithError(err).Error("Initializing service manager support...")
Expand All @@ -28,19 +26,4 @@ func main() {
if err = agentSvc.Run(); err != nil {
log.WithError(err).Warn("Service exiting abnormally.")
}

err = service.WaitForExitOrTimeout(exitCodeC)
if err == nil {
log.Info("Service exited.")
return
}

// This might not be an error: child may have already exited.
if errCode, ok := err.(*api.ExitCodeErr); ok {
log.WithError(err).Warn("Service exiting with child process status code.")
os.Exit(errCode.ExitCode())
}

log.WithError(err).Warn("Service exiting with child process error message.")
os.Exit(1)
}
59 changes: 31 additions & 28 deletions internal/agent/service/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,13 @@ type Service struct {
daemon daemon
}

func New(exitCodeC chan int, arg ...string) (service.Service, error) {
func New(arg ...string) (service.Service, error) {
svc := &Service{
daemon: daemon{
args: arg,
exitCodeC: exitCodeC,
exited: new(atomBool),
args: arg,
exitCodeC: make(chan int, 1),
exited: new(atomBool),
stopRequested: new(atomBool),
},
}

Expand All @@ -49,11 +50,12 @@ func New(exitCodeC chan int, arg ...string) (service.Service, error) {
}

type daemon struct {
sync.Mutex // daemon can be accessed from different routines.
args []string
cmd *exec.Cmd
exitCodeC chan int // exit status handed off to the service for its own exit
exited *atomBool
sync.Mutex // daemon can be accessed from different routines.
args []string
cmd *exec.Cmd
exitCodeC chan int // exit status handed off to the service for its own exit
exited *atomBool
stopRequested *atomBool
}

type atomBool struct {
Expand All @@ -69,10 +71,7 @@ func (b *atomBool) Set(value bool) {
}

func (b *atomBool) Get() bool {
if atomic.LoadInt32(&(b.flag)) != 0 {
return true
}
return false
return atomic.LoadInt32(&(b.flag)) != 0
}

// GetCommandPath returns the absolute path of the agent binary that should be run.
Expand Down Expand Up @@ -107,24 +106,28 @@ func WaitForExitOrTimeout(exitCode <-chan int) error {
}
}

// There are 2 scenarios in how the child process can exit:
// 1. OS requested service stop
// 2. child process decided to exit on it's own.
// In the second scenario, there's no other way to make interface.Run to stop
// we have to call os.Exit.
func (d *daemon) exitWithChildStatus(s service.Service, exitCode int) {
log.WithField("exit_code", exitCode).
Info("child process exited")
var err error
var st service.Status
if st, err = s.Status(); err == nil && st == service.StatusRunning {
d.exited.Set(true)

d.exited.Set(true)

// OS requested service stop.
if d.stopRequested.Get() {
d.exitCodeC <- exitCode
log.Debug("signaling service stop...")
// TODO: investigate this, seems that stopping the service will affect
// the service recovery policy on both Windows / Linux
//s.Stop()
}
if st != service.StatusUnknown {
log.WithError(err).Debug("retrieving service status")
} else { // child process decided to exit on it's own.

// If newrelic-infra-service was not started manually from cmd line (interactive mode).
if !service.Interactive() && exitCode == 0 {
s.Stop()
return
}
// In case of error we have to call os.Exit and rely on os service recovery policy.
os.Exit(exitCode)
}
// interface.Stop cannot be called as service is not running
// and there's no other way to make interface.Run to stop.
// Case for foreground execution, AKA interactive mode.
os.Exit(exitCode)
}
8 changes: 8 additions & 0 deletions internal/agent/service/service_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ func (svc *Service) Stop(_ service.Service) (err error) {

log.Info("Service is stopping. waiting for agent process to terminate...")

svc.daemon.Lock()
defer svc.daemon.Unlock()

svc.daemon.stopRequested.Set(true)

err = svc.daemon.cmd.Process.Signal(signals.GracefulStop)
if err != nil {
log.WithError(err).Debug("Failed to send graceful stop signal to process.")
Expand All @@ -48,9 +53,11 @@ func (svc *Service) Shutdown(_ service.Service) (err error) {

func (d *daemon) run(s service.Service) {
for {
d.Lock()
d.cmd = exec.Command(GetCommandPath(d.args[0]), d.args[1:]...)
d.cmd.Stdout = os.Stdout
d.cmd.Stderr = os.Stderr
d.Unlock()

exitCode := api.CheckExitCode(d.cmd.Run())

Expand All @@ -60,6 +67,7 @@ func (d *daemon) run(s service.Service) {
continue
default:
d.exitWithChildStatus(s, exitCode)
return
}
}
}
11 changes: 11 additions & 0 deletions internal/agent/service/service_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,16 @@ func (svc *Service) Start(s service.Service) (err error) {
// - if we start and stop the service immediately, the agent may not stop properly
// and so we have to kill it forcefully
func (svc *Service) Stop(s service.Service) (err error) {
if svc.daemon.exited.Get() {
return nil
}
log.Info("service is stopping. notifying agent process.")

svc.daemon.Lock()
defer svc.daemon.Unlock()

svc.daemon.stopRequested.Set(true)

// notify the agent to gracefully stop
windows.PostNotificationMessage(windows.GetPipeName(svcName), ipc.Stop)

Expand All @@ -40,11 +45,16 @@ func (svc *Service) Stop(s service.Service) (err error) {
// - if we start the service and shutdown the host immediately, the agent may not stop properly
// and so we have to kill it forcefully
func (svc *Service) Shutdown(s service.Service) (err error) {
if svc.daemon.exited.Get() {
return nil
}
log.Debug("host is shutting down. notifying agent process.")

svc.daemon.Lock()
defer svc.daemon.Unlock()

svc.daemon.stopRequested.Set(true)

// notify the agent to update the shutdown status and then stop gracefully
windows.PostNotificationMessage(windows.GetPipeName(svcName), ipc.Shutdown)

Expand All @@ -68,6 +78,7 @@ func (d *daemon) run(s service.Service) {
continue
default:
d.exitWithChildStatus(s, exitCode)
return
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion internal/integrations/v4/runner/runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func Test_runner_Run(t *testing.T) {
e := &testemit.RecordEmitter{}
r := NewRunner(def, e, nil, nil, cmdrequest.NoopHandleFn, configrequest.NoopHandleFn, nil)

ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()

r.Run(ctx, nil, nil)
Expand Down
Loading

0 comments on commit dd35e33

Please sign in to comment.