Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add quit request timeout #18

Merged
merged 2 commits into from
Oct 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ When the application exits, unless `NEVER_KILL_ISTIO_ON_FAILURE` has been set an
| `WAIT_FOR_ENVOY_TIMEOUT` | If provided and set to a valid `time.Duration` string greater than 0 seconds, `scuttle` will wait for that amount of time before starting the main application. By default, it will wait indefinitely. If `QUIT_WITHOUT_ENVOY_TIMEOUT` is set as well, it will take precedence over this variable |
| `ISTIO_QUIT_API` | If provided `scuttle` will send a POST to `/quitquitquit` at the given API. Should be in format `http://127.0.0.1:15020`. This is intended for Istio v1.3 and higher. When not given, Istio will be stopped using a `pkill` command. |
| `GENERIC_QUIT_ENDPOINTS` | If provided `scuttle` will send a POST to the URL given. Multiple URLs are supported and must be provided as a CSV string. Should be in format `http://myendpoint.com` or `http://myendpoint.com,https://myotherendpoint.com`. The status code response is logged (if logging is enabled) but is not used. A 200 is treated the same as a 404 or 500. `GENERIC_QUIT_ENDPOINTS` is handled before Istio is stopped. |
| `QUIT_REQUEST_TIMEOUT` | A deadline provided as a valid `time.Duration` string for requests to the `/quitquitquit` and/or the generic endpoints. If the deadline is exceeded `scuttle` gives up and exits cleanly. The default value is `5s`. |
| `QUIT_WITHOUT_ENVOY_TIMEOUT` | If provided and set to a valid duration, `scuttle` will exit if Envoy does not become available before the end of the timeout and not continue with the passed in executable. If `START_WITHOUT_ENVOY` is also set, this variable will not be taken into account. Also, if `WAIT_FOR_ENVOY_TIMEOUT` is set, this variable will take precedence. |

## How Scuttle stops Istio
Expand Down
27 changes: 19 additions & 8 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"os/exec"
"os/signal"
"strings"
"sync"
"syscall"
"time"

Expand Down Expand Up @@ -144,23 +145,33 @@ func killGenericEndpoints() {
return
}

var wg sync.WaitGroup
ctx, cancel := context.WithTimeout(context.Background(), config.QuitRequestTimeout)
defer cancel()
for _, genericEndpoint := range config.GenericQuitEndpoints {
genericEndpoint = strings.Trim(genericEndpoint, " ")
code, err := postKill(context.TODO(), genericEndpoint)
if err != nil {
log(fmt.Sprintf("Sent POST to '%s', error: %s", genericEndpoint, err))
continue
}
log(fmt.Sprintf("Sent POST to '%s', status code: %d", genericEndpoint, code))
func(ctx context.Context, genericEndpoint string) {
wg.Add(1)
defer wg.Done()
genericEndpoint = strings.Trim(genericEndpoint, " ")
code, err := postKill(ctx, genericEndpoint)
if err != nil {
log(fmt.Sprintf("Sent POST to '%s', error: %s", genericEndpoint, err))
return
}
log(fmt.Sprintf("Sent POST to '%s', status code: %d", genericEndpoint, code))
}(ctx, genericEndpoint)
}
wg.Wait()
}

func killIstioWithAPI() {
log(fmt.Sprintf("Stopping Istio using Istio API '%s' (intended for Istio >v1.2)", config.IstioQuitAPI))

responseSuccess := false
ctx, cancel := context.WithTimeout(context.Background(), config.QuitRequestTimeout)
defer cancel()
url := fmt.Sprintf("%s/quitquitquit", config.IstioQuitAPI)
code, err := postKill(context.TODO(), url)
code, err := postKill(ctx, url)
if err != nil {
log(fmt.Sprintf("Sent quitquitquit to Istio, error: %d", err))
} else {
Expand Down
60 changes: 60 additions & 0 deletions main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"net/http"
"net/http/httptest"
"os"
"strings"
"sync/atomic"
"testing"
"time"
)
Expand All @@ -16,10 +18,23 @@ var (
goodEventuallyServer *httptest.Server
badServer *httptest.Server
genericQuitServer *httptest.Server
slowQuitServer *httptest.Server
callCount counter
envoyDelayTimestamp int64 = 0
envoyDelayMax int64 = 15
)

type counter int64

func (c *counter) increment() int {
atomic.AddInt64((*int64)(c), 1)
return c.int()
}

func (c *counter) int() int {
return int(*c)
}

func TestMain(m *testing.M) {
initTestHTTPServers()
os.Exit(m.Run())
Expand All @@ -30,11 +45,13 @@ func initTestHTTPServers() {

// Always 200 and live envoy state
goodServer = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
callCount.increment()
w.Write([]byte("{\"state\": \"LIVE\"}")) // Envoy live response
}))

// 503 for 5 requests, then 200 + live envoy state
goodEventuallyServer = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
callCount.increment()
timeSinceStarted := time.Now().Unix() - envoyDelayTimestamp
if timeSinceStarted < envoyDelayMax {
fmt.Println("Status Unavailable")
Expand All @@ -46,11 +63,20 @@ func initTestHTTPServers() {

// Always 503
badServer = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
callCount.increment()
fmt.Println("Status Unavailable")
w.WriteHeader(http.StatusServiceUnavailable)
}))

genericQuitServer = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
callCount.increment()
fmt.Println("Status Ok")
w.WriteHeader(http.StatusOK)
}))

slowQuitServer = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
callCount.increment()
time.Sleep(2 * time.Second)
fmt.Println("Status Ok")
w.WriteHeader(http.StatusOK)
}))
Expand All @@ -70,6 +96,8 @@ func clearTestingEnv() {
os.Setenv("QUIT_WITHOUT_ENVOY_TIMEOUT", "")
os.Setenv("WAIT_FOR_ENVOY_TIMEOUT", "")
os.Setenv("GENERIC_QUIT_ENDPOINTS", "")
os.Setenv("QUIT_REQUEST_TIMEOUT", "")
callCount = 0
}

// Inits the test environment and starts the blocking
Expand Down Expand Up @@ -131,6 +159,9 @@ func TestGenericQuitEndpoints(t *testing.T) {
os.Setenv("GENERIC_QUIT_ENDPOINTS", genericQuitServer.URL+", https://google.com/, https://github.com/, 127.0.0.1:1111/idontexist, notaurl^^ ")
initTestingEnv()
killGenericEndpoints()
if callCount != 1 {
t.Errorf("Expected 1 call to genericQuitServer got %d", callCount)
}
clearTestingEnv()
}

Expand All @@ -156,6 +187,35 @@ func TestNoQuitQuitQuitMalformedUrl(t *testing.T) {
clearTestingEnv()
}

func TestQuitTimeout(t *testing.T) {
fmt.Println("Starting TestQuitTimeout")
os.Setenv("START_WITHOUT_ENVOY", "false")
os.Setenv("ENVOY_ADMIN_API", goodServer.URL)
os.Setenv("ISTIO_QUIT_API", slowQuitServer.URL)
os.Setenv(
"GENERIC_QUIT_ENDPOINTS",
strings.Join([]string{slowQuitServer.URL, slowQuitServer.URL, genericQuitServer.URL, slowQuitServer.URL}, ", "),
)
os.Setenv("QUIT_REQUEST_TIMEOUT", "100ms")

measureCheckFunc := func(targetFunc func(), errorPrefix string) {
startCallCount := callCount
startTime := time.Now()
targetFunc()
elapsedTime := time.Now().Sub(startTime)
if elapsedTime > 500*time.Millisecond {
t.Errorf("%s: took %dms, this exceeds the timeout significantly", errorPrefix, elapsedTime/time.Millisecond)
}
if callCount-startCallCount < 1 {
t.Errorf("%s: quit endpoint was not called", errorPrefix)
}
}
initTestingEnv()
measureCheckFunc(killIstioWithAPI, "killIstioWithAPI()")
measureCheckFunc(killGenericEndpoints, "killGenericEndpoints()")
clearTestingEnv()
}

// Tests scuttle waits
func TestWaitTillTimeoutForEnvoy(t *testing.T) {
fmt.Println("Starting TestWaitTillTimeoutForEnvoy")
Expand Down
2 changes: 2 additions & 0 deletions scuttle_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ type ScuttleConfig struct {
IstioFallbackPkill bool
NeverKillIstioOnFailure bool
GenericQuitEndpoints []string
QuitRequestTimeout time.Duration
QuitWithoutEnvoyTimeout time.Duration
}

Expand All @@ -40,6 +41,7 @@ func getConfig() ScuttleConfig {
IstioFallbackPkill: getBoolFromEnv("ISTIO_FALLBACK_PKILL", false, loggingEnabled),
NeverKillIstioOnFailure: getBoolFromEnv("NEVER_KILL_ISTIO_ON_FAILURE", false, loggingEnabled),
GenericQuitEndpoints: getStringArrayFromEnv("GENERIC_QUIT_ENDPOINTS", make([]string, 0), loggingEnabled),
QuitRequestTimeout: getDurationFromEnv("QUIT_REQUEST_TIMEOUT", time.Second*5, loggingEnabled),
QuitWithoutEnvoyTimeout: getDurationFromEnv("QUIT_WITHOUT_ENVOY_TIMEOUT", time.Duration(0), loggingEnabled),
}

Expand Down
Loading