Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stop eating panics #28800

Merged
merged 1 commit into from
Jul 13, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion contrib/mesos/pkg/executor/executor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,8 @@ func TestExecutorInitializeStaticPodsSource(t *testing.T) {
// its state. When a Kamikaze message is received, the executor should
// attempt suicide.
func TestExecutorFrameworkMessage(t *testing.T) {
// create and start executor
// TODO(jdef): Fix the unexpected call in the mocking system.
t.Skip("This test started failing when panic catching was disabled.")
var (
mockDriver = &MockExecutorDriver{}
kubeletFinished = make(chan struct{})
Expand Down
6 changes: 6 additions & 0 deletions contrib/mesos/pkg/proc/proc.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ func stateRun(ps *processState, a *scheduledAction) stateFn {
close(a.errCh) // signal that action was scheduled
func() {
// we don't trust clients of this package
defer func() {
if x := recover(); x != nil {
// HandleCrash has already logged this, so
// nothing to do.
}
}()
defer runtime.HandleCrash()
a.action()
}()
Expand Down
11 changes: 5 additions & 6 deletions pkg/apiserver/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (
"k8s.io/kubernetes/pkg/auth/user"
"k8s.io/kubernetes/pkg/httplog"
"k8s.io/kubernetes/pkg/serviceaccount"
"k8s.io/kubernetes/pkg/util/runtime"
"k8s.io/kubernetes/pkg/util/sets"
)

Expand Down Expand Up @@ -137,12 +138,10 @@ func tooManyRequests(req *http.Request, w http.ResponseWriter) {
// RecoverPanics wraps an http Handler to recover and log panics.
func RecoverPanics(handler http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
defer func() {
if x := recover(); x != nil {
http.Error(w, "apis panic. Look in log for details.", http.StatusInternalServerError)
glog.Errorf("APIServer panic'd on %v %v: %v\n%s\n", req.Method, req.RequestURI, x, debug.Stack())
}
}()
defer runtime.HandleCrash(func(err interface{}) {
http.Error(w, "This request caused apisever to panic. Look in log for details.", http.StatusInternalServerError)
glog.Errorf("APIServer panic'd on %v %v: %v\n%s\n", req.Method, req.RequestURI, err, debug.Stack())
})
defer httplog.NewLogged(req, &w).StacktraceWhen(
httplog.StatusIsNot(
http.StatusOK,
Expand Down
1 change: 1 addition & 0 deletions pkg/kubelet/prober/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ func (w *worker) stop() {
// doProbe probes the container once and records the result.
// Returns whether the worker should continue.
func (w *worker) doProbe() (keepGoing bool) {
defer func() { recover() }() // Actually eat panics (HandleCrash takes care of logging)
defer runtime.HandleCrash(func(_ interface{}) { keepGoing = true })

status, ok := w.probeManager.statusManager.GetPodStatus(w.pod.UID)
Expand Down
30 changes: 21 additions & 9 deletions pkg/util/runtime/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,26 +22,38 @@ import (
"runtime"
)

// For testing, bypass HandleCrash.
var ReallyCrash bool
var (
// ReallyCrash controls the behavior of HandleCrash and now defaults
// true. It's still exposed so components can optionally set to false
// to restore prior behavior.
ReallyCrash = true
)

// PanicHandlers is a list of functions which will be invoked when a panic happens.
var PanicHandlers = []func(interface{}){logPanic}

//TODO search the public functions
// HandleCrash simply catches a crash and logs an error. Meant to be called via defer.
// Additional context-specific handlers can be provided, and will be called in case of panic
// HandleCrash simply catches a crash and logs an error. Meant to be called via
// defer. Additional context-specific handlers can be provided, and will be
// called in case of panic. HandleCrash actually crashes, after calling the
// handlers and logging the panic message.
//
// TODO: remove this function. We are switching to a world where it's safe for
// apiserver to panic, since it will be restarted by kubelet. At the beginning
// of the Kubernetes project, nothing was going to restart apiserver and so
// catching panics was important. But it's actually much simpler for montoring
// software if we just exit when an unexpected panic happens.
func HandleCrash(additionalHandlers ...func(interface{})) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, we want the handlers to be called. The handlers allow us to report to external systems (on my long delayed list is to upstream the sentry code that reports all panics to a remote system).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The handlers are still called.

if ReallyCrash {
return
}
if r := recover(); r != nil {
for _, fn := range PanicHandlers {
fn(r)
}
for _, fn := range additionalHandlers {
fn(r)
}
if ReallyCrash {
// Actually proceed to panic.
panic(r)
}
}
}

Expand All @@ -55,7 +67,7 @@ func logPanic(r interface{}) {
}
callers = callers + fmt.Sprintf("%v:%v\n", file, line)
}
glog.Errorf("Recovered from panic: %#v (%v)\n%v", r, r, callers)
glog.Errorf("Observed a panic: %#v (%v)\n%v", r, r, callers)
}

// ErrorHandlers is a list of functions which will be invoked when an unreturnable
Expand Down
24 changes: 13 additions & 11 deletions pkg/util/runtime/runtime_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,15 @@ import (
)

func TestHandleCrash(t *testing.T) {
count := 0
expect := 10
for i := 0; i < expect; i = i + 1 {
defer HandleCrash()
if i%2 == 0 {
panic("Test Panic")
defer func() {
if x := recover(); x == nil {
t.Errorf("Expected a panic to recover from")
}
count = count + 1
}
if count != expect {
t.Errorf("Expected %d iterations, found %d", expect, count)
}
}()
defer HandleCrash()
panic("Test Panic")
}

func TestCustomHandleCrash(t *testing.T) {
old := PanicHandlers
defer func() { PanicHandlers = old }()
Expand All @@ -45,13 +41,19 @@ func TestCustomHandleCrash(t *testing.T) {
},
}
func() {
defer func() {
if x := recover(); x == nil {
t.Errorf("Expected a panic to recover from")
}
}()
defer HandleCrash()
panic("test")
}()
if result != "test" {
t.Errorf("did not receive custom handler")
}
}

func TestCustomHandleError(t *testing.T) {
old := ErrorHandlers
defer func() { ErrorHandlers = old }()
Expand Down