Skip to content

Commit

Permalink
Add feature for graceful shutdown of HTTP server
Browse files Browse the repository at this point in the history
If the watchdog is sent SIGTERM from an external process then it
should stop accepting new connections and attempt to finish the
work in progress. This change makes use of the new ability in Go
1.9 and onwards to cancel a HTTP server gracefully.

The write_timeout duration is used as a grace period to allow all
in-flight requests to complete. The pattern is taken directly from
the offical example in the Golang documentation. [1]

Further tuning and testing may be needed for Windows containers which
have a different set of signals for closing work. This change aims
to cover the majority use-case for Linux containers.

The HTTP health-check is also invalidated by creating an and
expression with the existing lock file.

Tested with Kubernetes by deploying a custom watchdog and the
fprocess of `env`. Log message was observed when scaling down and
connections stopped being accepted on terminating replica.

Also corrects some typos from previous PR.

[1] https://golang.org/pkg/net/http/#Server.Shutdown

Signed-off-by: Alex Ellis (VMware) <alexellis2@gmail.com>
  • Loading branch information
alexellis committed Mar 24, 2018
1 parent 6921e4f commit de2c74f
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 19 deletions.
61 changes: 46 additions & 15 deletions watchdog/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,25 @@ package main

import (
"bytes"
"context"
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"os/exec"
"os/signal"
"path/filepath"
"strings"
"sync"
"syscall"
"time"

"github.com/openfaas/faas/watchdog/types"
)

var acceptingConnections bool

// buildFunctionInput for a GET method this is an empty byte array.
func buildFunctionInput(config *WatchdogConfig, r *http.Request) ([]byte, error) {
var res []byte
Expand Down Expand Up @@ -260,25 +265,20 @@ func lockFilePresent() bool {
return true
}

func createLockFile() error {
func createLockFile() (string, error) {
path := filepath.Join(os.TempDir(), ".lock")
log.Printf("Writing lock-file to: %s\n", path)
writeErr := ioutil.WriteFile(path, []byte{}, 0660)
return writeErr
}
acceptingConnections = true

func removeLockFile() error {
path := filepath.Join(os.TempDir(), ".lock")
log.Printf("Removing lock-file : %s\n", path)
removeErr := os.Remove(path)
return removeErr
return path, writeErr
}

func makeHealthHandler() func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
switch r.Method {
case http.MethodGet:
if lockFilePresent() == false {
if acceptingConnections == false || lockFilePresent() == false {
w.WriteHeader(http.StatusInternalServerError)
return
}
Expand All @@ -288,9 +288,7 @@ func makeHealthHandler() func(http.ResponseWriter, *http.Request) {
break
default:
w.WriteHeader(http.StatusMethodNotAllowed)

}

}
}

Expand All @@ -312,6 +310,8 @@ func makeRequestHandler(config *WatchdogConfig) func(http.ResponseWriter, *http.
}

func main() {
acceptingConnections = false

osEnv := types.OsEnv{}
readConfig := ReadConfig{}
config := readConfig.Read(osEnv)
Expand All @@ -335,15 +335,46 @@ func main() {
http.HandleFunc("/", makeRequestHandler(&config))

if config.suppressLock == false {
path := filepath.Join(os.TempDir(), ".lock")
log.Printf("Writing lock-file to: %s\n", path)
writeErr := ioutil.WriteFile(path, []byte{}, 0660)
path, writeErr := createLockFile()

if writeErr != nil {
log.Panicf("Cannot write %s. To disable lock-file set env suppress_lock=true.\n Error: %s.\n", path, writeErr.Error())
}
} else {
log.Println("Warning: \"suppress_lock\" is enabled. No automated health-checks will be in place for your function.")
acceptingConnections = true
}

listenUntilShutdown(config.writeTimeout, s)
}

func listenUntilShutdown(shutdownTimeout time.Duration, s *http.Server) {

idleConnsClosed := make(chan struct{})
go func() {
sig := make(chan os.Signal, 1)
signal.Notify(sig, syscall.SIGTERM)

<-sig

log.Printf("SIGTERM received.. shutting down server")

acceptingConnections = false

if err := s.Shutdown(context.Background()); err != nil {
// Error from closing listeners, or context timeout:
log.Printf("Error in Shutdown: %v", err)
}

<-time.Tick(shutdownTimeout)

close(idleConnsClosed)
}()

if err := s.ListenAndServe(); err != http.ErrServerClosed {
log.Printf("Error ListenAndServe: %v", err)
close(idleConnsClosed)
}

log.Fatal(s.ListenAndServe())
<-idleConnsClosed
}
18 changes: 14 additions & 4 deletions watchdog/requesthandler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
"time"
Expand Down Expand Up @@ -361,11 +362,13 @@ func TestHandler_StatusOKForGETAndNoBody(t *testing.T) {
}
}

func TestHealthHandler_SatusOK_LockFilePresent(t *testing.T) {
func TestHealthHandler_StatusOK_LockFilePresent(t *testing.T) {
rr := httptest.NewRecorder()

if lockFilePresent() == false {
if err := createLockFile(); err != nil {
present := lockFilePresent()

if present == false {
if _, err := createLockFile(); err != nil {
t.Fatal(err)
}
}
Expand Down Expand Up @@ -402,7 +405,7 @@ func TestHealthHandler_StatusInternalServerError_LockFileNotPresent(t *testing.T

required := http.StatusInternalServerError
if status := rr.Code; status != required {
t.Errorf("handler retruned wrong status code: got %v, but wanted %v", status, required)
t.Errorf("handler returned wrong status code - got: %v, want: %v", status, required)
}
}

Expand All @@ -426,3 +429,10 @@ func TestHealthHandler_SatusMethoNotAllowed_ForWriteableVerbs(t *testing.T) {
}
}
}

func removeLockFile() error {
path := filepath.Join(os.TempDir(), ".lock")
log.Printf("Removing lock-file : %s\n", path)
removeErr := os.Remove(path)
return removeErr
}

0 comments on commit de2c74f

Please sign in to comment.