Permalink
Browse files

more robust backtraces for syscalls on x86

 * new optimization policy: ALIEN-FUNCALL-SAVES-FP-AND-PC Set to 3 for
   self-build on x86 to get reliable more backtraces there, and 0 for
   other platforms. (1 matches the old SPEED <= DEBUG behaviour.)

 * When using a saved FP, and an interrupt context has a bogus
   FP, assume it is an interrupted syscall frame.
  • Loading branch information...
1 parent 913cf0c commit e7b2c507c364da9395ad1f9591210dac44f24afd @sb-studio sb-studio committed Aug 1, 2011
Showing with 114 additions and 85 deletions.
  1. +2 −0 NEWS
  2. +3 −1 make-host-2.lisp
  3. +1 −0 package-data-list.lisp-expr
  4. +89 −83 src/code/debug-int.lisp
  5. +1 −1 src/compiler/aliencomp.lisp
  6. +6 −0 src/compiler/policies.lisp
  7. +12 −0 tests/debug.impure.lisp
View
2 NEWS
@@ -14,6 +14,8 @@ changes relative to sbcl-1.0.50:
(lp#811386)
* bug fix: using GCC >= 4.6 to build SBCL on x86 no longer breaks
backtraces. (lp#818460)
+ * bug fix: better backtraces for interrupted syscall frames on x86.
+ (lp#549673)
changes in sbcl-1.0.50 relative to sbcl-1.0.49:
* enhancement: errors from FD handlers now provide a restart to remove
View
4 make-host-2.lisp
@@ -27,7 +27,9 @@
;; sbcl-internal optimization declarations:
;;
;; never insert stepper conditions
- (sb!c:insert-step-conditions 0)))))
+ (sb!c:insert-step-conditions 0)
+ ;; save FP and PC for alien calls -- or not
+ (sb!c:alien-funcall-saves-fp-and-pc #!+x86 3 #!-x86 0)))))
(compile 'proclaim-target-optimization)
(defun in-target-cross-compilation-mode (fun)
View
1 package-data-list.lisp-expr
@@ -219,6 +219,7 @@ of SBCL which maintained the CMU-CL-style split into two packages.)"
#!+x86 "SET-FPU-WORD-FOR-C"
#!+x86 "SET-FPU-WORD-FOR-LISP"
"ALIGN-STACK-POINTER"
+ "ALIEN-FUNCALL-SAVES-FP-AND-PC"
"ALLOC-ALIEN-STACK-SPACE" "ALLOC-NUMBER-STACK-SPACE"
"ALLOCATE-CODE-OBJECT" "ALLOCATE-FRAME"
"ALLOCATE-DYNAMIC-CODE-OBJECT" "ALLOCATE-FULL-CALL-FRAME"
View
172 src/code/debug-int.lisp
@@ -634,7 +634,8 @@
(when saved-fp
(compute-calling-frame (descriptor-sap saved-fp)
(descriptor-sap saved-pc)
- up-frame))))
+ up-frame
+ t))))
;;; Return the frame immediately below FRAME on the stack; or when
;;; FRAME is the bottom of the stack, return NIL.
@@ -788,13 +789,14 @@
escaped))))))
#!+(or x86 x86-64)
-(defun compute-calling-frame (caller ra up-frame)
+(defun compute-calling-frame (caller ra up-frame &optional savedp)
(declare (type system-area-pointer caller ra))
(/noshow0 "entering COMPUTE-CALLING-FRAME")
(when (control-stack-pointer-valid-p caller)
(/noshow0 "in WHEN")
;; First check for an escaped frame.
- (multiple-value-bind (code pc-offset escaped) (find-escaped-frame caller)
+ (multiple-value-bind (code pc-offset escaped off-stack)
+ (find-escaped-frame caller)
(/noshow0 "at COND")
(cond (code
;; If it's escaped it may be a function end breakpoint trap.
@@ -828,7 +830,11 @@
(code-location-from-pc d-fun pc-offset
escaped)
(if up-frame (1+ (frame-number up-frame)) 0)
- escaped)))))
+ ;; If we have an interrupt-context that's not on
+ ;; our stack at all, and we're computing the
+ ;; from from a saved FP, we're probably looking
+ ;; at an interrupted syscall.
+ (or escaped (and savedp off-stack)))))))
(defun nth-interrupt-context (n)
(declare (type (unsigned-byte 32) n)
@@ -844,101 +850,101 @@
(declare (type system-area-pointer frame-pointer))
(/noshow0 "entering FIND-ESCAPED-FRAME")
(dotimes (index *free-interrupt-context-index* (values nil 0 nil))
- (/noshow0 "at head of WITH-ALIEN")
- (let ((context (nth-interrupt-context index)))
- (/noshow0 "got CONTEXT")
- (when (= (sap-int frame-pointer)
- (sb!vm:context-register context sb!vm::cfp-offset))
- (without-gcing
- (/noshow0 "in WITHOUT-GCING")
- (let* ((component-ptr (component-ptr-from-pc
- (sb!vm:context-pc context)))
- (code (unless (sap= component-ptr (int-sap #x0))
- (component-from-component-ptr component-ptr))))
- (/noshow0 "got CODE")
- (when (null code)
- (return (values code 0 context)))
- (let* ((code-header-len (* (get-header-data code)
- sb!vm:n-word-bytes))
- (pc-offset
+ (let* ((context (nth-interrupt-context index))
+ (cfp (int-sap (sb!vm:context-register context sb!vm::cfp-offset))))
+ (/noshow0 "got CONTEXT")
+ (unless (control-stack-pointer-valid-p cfp)
+ (return (values nil nil nil t)))
+ (when (sap= frame-pointer cfp)
+ (without-gcing
+ (/noshow0 "in WITHOUT-GCING")
+ (let* ((component-ptr (component-ptr-from-pc
+ (sb!vm:context-pc context)))
+ (code (unless (sap= component-ptr (int-sap #x0))
+ (component-from-component-ptr component-ptr))))
+ (/noshow0 "got CODE")
+ (when (null code)
+ (return (values code 0 context)))
+ (let* ((code-header-len (* (get-header-data code)
+ sb!vm:n-word-bytes))
+ (pc-offset
(- (sap-int (sb!vm:context-pc context))
(- (get-lisp-obj-address code)
sb!vm:other-pointer-lowtag)
code-header-len)))
- (/noshow "got PC-OFFSET")
- (unless (<= 0 pc-offset
- (* (code-header-ref code sb!vm:code-code-size-slot)
- sb!vm:n-word-bytes))
- ;; We were in an assembly routine. Therefore, use the
- ;; LRA as the pc.
- ;;
- ;; FIXME: Should this be WARN or ERROR or what?
- (format t "** pc-offset ~S not in code obj ~S?~%"
- pc-offset code))
- (/noshow0 "returning from FIND-ESCAPED-FRAME")
- (return
- (values code pc-offset context)))))))))
+ (/noshow "got PC-OFFSET")
+ (unless (<= 0 pc-offset
+ (* (code-header-ref code sb!vm:code-code-size-slot)
+ sb!vm:n-word-bytes))
+ ;; We were in an assembly routine. Therefore, use the
+ ;; LRA as the pc.
+ ;;
+ ;; FIXME: Should this be WARN or ERROR or what?
+ (format t "** pc-offset ~S not in code obj ~S?~%"
+ pc-offset code))
+ (/noshow0 "returning from FIND-ESCAPED-FRAME")
+ (return
+ (values code pc-offset context)))))))))
#!-(or x86 x86-64)
(defun find-escaped-frame (frame-pointer)
(declare (type system-area-pointer frame-pointer))
(/noshow0 "entering FIND-ESCAPED-FRAME")
(dotimes (index *free-interrupt-context-index* (values nil 0 nil))
- (/noshow0 "at head of WITH-ALIEN")
(let ((scp (nth-interrupt-context index)))
- (/noshow0 "got SCP")
+ (/noshow0 "got SCP")
(when (= (sap-int frame-pointer)
(sb!vm:context-register scp sb!vm::cfp-offset))
(without-gcing
- (/noshow0 "in WITHOUT-GCING")
- (let ((code (code-object-from-bits
- (sb!vm:context-register scp sb!vm::code-offset))))
- (/noshow0 "got CODE")
- (when (symbolp code)
- (return (values code 0 scp)))
- (let* ((code-header-len (* (get-header-data code)
- sb!vm:n-word-bytes))
- (pc-offset
- (- (sap-int (sb!vm:context-pc scp))
- (- (get-lisp-obj-address code)
- sb!vm:other-pointer-lowtag)
- code-header-len)))
- (let ((code-size (* (code-header-ref code
- sb!vm:code-code-size-slot)
- sb!vm:n-word-bytes)))
- (unless (<= 0 pc-offset code-size)
- ;; We were in an assembly routine.
- (multiple-value-bind (new-pc-offset computed-return)
- (find-pc-from-assembly-fun code scp)
- (setf pc-offset new-pc-offset)
- (unless (<= 0 pc-offset code-size)
- (cerror
- "Set PC-OFFSET to zero and continue backtrace."
- 'bug
- :format-control
- "~@<PC-OFFSET (~D) not in code object. Frame details:~
+ (/noshow0 "in WITHOUT-GCING")
+ (let ((code (code-object-from-bits
+ (sb!vm:context-register scp sb!vm::code-offset))))
+ (/noshow0 "got CODE")
+ (when (symbolp code)
+ (return (values code 0 scp)))
+ (let* ((code-header-len (* (get-header-data code)
+ sb!vm:n-word-bytes))
+ (pc-offset
+ (- (sap-int (sb!vm:context-pc scp))
+ (- (get-lisp-obj-address code)
+ sb!vm:other-pointer-lowtag)
+ code-header-len)))
+ (let ((code-size (* (code-header-ref code
+ sb!vm:code-code-size-slot)
+ sb!vm:n-word-bytes)))
+ (unless (<= 0 pc-offset code-size)
+ ;; We were in an assembly routine.
+ (multiple-value-bind (new-pc-offset computed-return)
+ (find-pc-from-assembly-fun code scp)
+ (setf pc-offset new-pc-offset)
+ (unless (<= 0 pc-offset code-size)
+ (cerror
+ "Set PC-OFFSET to zero and continue backtrace."
+ 'bug
+ :format-control
+ "~@<PC-OFFSET (~D) not in code object. Frame details:~
~2I~:@_PC: #X~X~:@_CODE: ~S~:@_CODE FUN: ~S~:@_LRA: ~
#X~X~:@_COMPUTED RETURN: #X~X.~:>"
- :format-arguments
- (list pc-offset
- (sap-int (sb!vm:context-pc scp))
- code
- (%code-entry-points code)
- (sb!vm:context-register scp sb!vm::lra-offset)
- computed-return))
- ;; We failed to pinpoint where PC is, but set
- ;; pc-offset to 0 to keep the backtrace from
- ;; exploding.
- (setf pc-offset 0)))))
- (/noshow0 "returning from FIND-ESCAPED-FRAME")
- (return
- (if (eq (%code-debug-info code) :bogus-lra)
- (let ((real-lra (code-header-ref code
- real-lra-slot)))
- (values (lra-code-header real-lra)
- (get-header-data real-lra)
- nil))
- (values code pc-offset scp))))))))))
+ :format-arguments
+ (list pc-offset
+ (sap-int (sb!vm:context-pc scp))
+ code
+ (%code-entry-points code)
+ (sb!vm:context-register scp sb!vm::lra-offset)
+ computed-return))
+ ;; We failed to pinpoint where PC is, but set
+ ;; pc-offset to 0 to keep the backtrace from
+ ;; exploding.
+ (setf pc-offset 0)))))
+ (/noshow0 "returning from FIND-ESCAPED-FRAME")
+ (return
+ (if (eq (%code-debug-info code) :bogus-lra)
+ (let ((real-lra (code-header-ref code
+ real-lra-slot)))
+ (values (lra-code-header real-lra)
+ (get-header-data real-lra)
+ nil))
+ (values code pc-offset scp))))))))))
#!-(or x86 x86-64)
(defun find-pc-from-assembly-fun (code scp)
View
2 src/compiler/aliencomp.lisp
@@ -702,7 +702,7 @@
;; to it later regardless of how the foreign stack looks
;; like.
#!+:c-stack-is-control-stack
- (when (policy node (<= speed debug))
+ (when (policy node (= 3 alien-funcall-saves-fp-and-pc))
(setf body `(invoke-with-saved-fp-and-pc (lambda () ,body))))
(/noshow "returning from DEFTRANSFORM ALIEN-FUNCALL" (params) body)
`(lambda (function ,@(params))
View
6 src/compiler/policies.lisp
@@ -51,6 +51,12 @@ Enabling this option can increase heap consing of closures.")
"Control conversion of &REST argments to &MORE arguments when
only used as the final argument to APPLY.")
+(define-optimization-quality alien-funcall-saves-fp-and-pc
+ (if (<= speed debug) 3 0)
+ ("no" "maybe" "yes" "yes")
+ "Control ALIEN-FUNCALL saving frame-pointer and program counter for
+more reliable bactracing across foreign calls.")
+
(define-optimization-quality verify-arg-count
(if (zerop safety) 0 3)
("no" "maybe" "yes" "yes"))
View
12 tests/debug.impure.lisp
@@ -173,6 +173,18 @@
(list '(flet not-optimized))
(list '(flet test) #'not-optimized))))))
+(with-test (:name :interrupted-syscall)
+ (let ((m (sb-thread:make-mutex))
+ (q (sb-thread:make-waitqueue)))
+ (assert (verify-backtrace
+ (lambda ()
+ (sb-thread:with-mutex (m)
+ (handler-bind ((timeout (lambda (c)
+ (error "foo"))))
+ (with-timeout 0.1
+ (sb-thread:condition-wait q m)))))
+ `((sb-thread:condition-wait ,q ,m))))))
+
;;; Division by zero was a common error on PPC. It depended on the
;;; return function either being before INTEGER-/-INTEGER in memory,
;;; or more than MOST-POSITIVE-FIXNUM bytes ahead. It also depends on

0 comments on commit e7b2c50

Please sign in to comment.