From de2c74fcdb88675f9e1e46e307d8a9852eca262d Mon Sep 17 00:00:00 2001 From: "Alex Ellis (VMware)" Date: Tue, 20 Mar 2018 09:45:31 +0000 Subject: [PATCH] Add feature for graceful shutdown of HTTP server If the watchdog is sent SIGTERM from an external process then it should stop accepting new connections and attempt to finish the work in progress. This change makes use of the new ability in Go 1.9 and onwards to cancel a HTTP server gracefully. The write_timeout duration is used as a grace period to allow all in-flight requests to complete. The pattern is taken directly from the offical example in the Golang documentation. [1] Further tuning and testing may be needed for Windows containers which have a different set of signals for closing work. This change aims to cover the majority use-case for Linux containers. The HTTP health-check is also invalidated by creating an and expression with the existing lock file. Tested with Kubernetes by deploying a custom watchdog and the fprocess of `env`. Log message was observed when scaling down and connections stopped being accepted on terminating replica. Also corrects some typos from previous PR. [1] https://golang.org/pkg/net/http/#Server.Shutdown Signed-off-by: Alex Ellis (VMware) --- watchdog/main.go | 61 +++++++++++++++++++++++++-------- watchdog/requesthandler_test.go | 18 +++++++--- 2 files changed, 60 insertions(+), 19 deletions(-) diff --git a/watchdog/main.go b/watchdog/main.go index d4cc756cb..fa61b2fb2 100644 --- a/watchdog/main.go +++ b/watchdog/main.go @@ -5,20 +5,25 @@ package main import ( "bytes" + "context" "fmt" "io/ioutil" "log" "net/http" "os" "os/exec" + "os/signal" "path/filepath" "strings" "sync" + "syscall" "time" "github.com/openfaas/faas/watchdog/types" ) +var acceptingConnections bool + // buildFunctionInput for a GET method this is an empty byte array. func buildFunctionInput(config *WatchdogConfig, r *http.Request) ([]byte, error) { var res []byte @@ -260,25 +265,20 @@ func lockFilePresent() bool { return true } -func createLockFile() error { +func createLockFile() (string, error) { path := filepath.Join(os.TempDir(), ".lock") log.Printf("Writing lock-file to: %s\n", path) writeErr := ioutil.WriteFile(path, []byte{}, 0660) - return writeErr -} + acceptingConnections = true -func removeLockFile() error { - path := filepath.Join(os.TempDir(), ".lock") - log.Printf("Removing lock-file : %s\n", path) - removeErr := os.Remove(path) - return removeErr + return path, writeErr } func makeHealthHandler() func(http.ResponseWriter, *http.Request) { return func(w http.ResponseWriter, r *http.Request) { switch r.Method { case http.MethodGet: - if lockFilePresent() == false { + if acceptingConnections == false || lockFilePresent() == false { w.WriteHeader(http.StatusInternalServerError) return } @@ -288,9 +288,7 @@ func makeHealthHandler() func(http.ResponseWriter, *http.Request) { break default: w.WriteHeader(http.StatusMethodNotAllowed) - } - } } @@ -312,6 +310,8 @@ func makeRequestHandler(config *WatchdogConfig) func(http.ResponseWriter, *http. } func main() { + acceptingConnections = false + osEnv := types.OsEnv{} readConfig := ReadConfig{} config := readConfig.Read(osEnv) @@ -335,15 +335,46 @@ func main() { http.HandleFunc("/", makeRequestHandler(&config)) if config.suppressLock == false { - path := filepath.Join(os.TempDir(), ".lock") - log.Printf("Writing lock-file to: %s\n", path) - writeErr := ioutil.WriteFile(path, []byte{}, 0660) + path, writeErr := createLockFile() + if writeErr != nil { log.Panicf("Cannot write %s. To disable lock-file set env suppress_lock=true.\n Error: %s.\n", path, writeErr.Error()) } } else { log.Println("Warning: \"suppress_lock\" is enabled. No automated health-checks will be in place for your function.") + acceptingConnections = true + } + + listenUntilShutdown(config.writeTimeout, s) +} + +func listenUntilShutdown(shutdownTimeout time.Duration, s *http.Server) { + + idleConnsClosed := make(chan struct{}) + go func() { + sig := make(chan os.Signal, 1) + signal.Notify(sig, syscall.SIGTERM) + + <-sig + + log.Printf("SIGTERM received.. shutting down server") + + acceptingConnections = false + + if err := s.Shutdown(context.Background()); err != nil { + // Error from closing listeners, or context timeout: + log.Printf("Error in Shutdown: %v", err) + } + + <-time.Tick(shutdownTimeout) + + close(idleConnsClosed) + }() + + if err := s.ListenAndServe(); err != http.ErrServerClosed { + log.Printf("Error ListenAndServe: %v", err) + close(idleConnsClosed) } - log.Fatal(s.ListenAndServe()) + <-idleConnsClosed } diff --git a/watchdog/requesthandler_test.go b/watchdog/requesthandler_test.go index 5fb82be91..4ff4fd3e8 100644 --- a/watchdog/requesthandler_test.go +++ b/watchdog/requesthandler_test.go @@ -11,6 +11,7 @@ import ( "net/http" "net/http/httptest" "os" + "path/filepath" "strings" "testing" "time" @@ -361,11 +362,13 @@ func TestHandler_StatusOKForGETAndNoBody(t *testing.T) { } } -func TestHealthHandler_SatusOK_LockFilePresent(t *testing.T) { +func TestHealthHandler_StatusOK_LockFilePresent(t *testing.T) { rr := httptest.NewRecorder() - if lockFilePresent() == false { - if err := createLockFile(); err != nil { + present := lockFilePresent() + + if present == false { + if _, err := createLockFile(); err != nil { t.Fatal(err) } } @@ -402,7 +405,7 @@ func TestHealthHandler_StatusInternalServerError_LockFileNotPresent(t *testing.T required := http.StatusInternalServerError if status := rr.Code; status != required { - t.Errorf("handler retruned wrong status code: got %v, but wanted %v", status, required) + t.Errorf("handler returned wrong status code - got: %v, want: %v", status, required) } } @@ -426,3 +429,10 @@ func TestHealthHandler_SatusMethoNotAllowed_ForWriteableVerbs(t *testing.T) { } } } + +func removeLockFile() error { + path := filepath.Join(os.TempDir(), ".lock") + log.Printf("Removing lock-file : %s\n", path) + removeErr := os.Remove(path) + return removeErr +}