Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

more robust backtraces for syscalls on x86

 * new optimization policy: ALIEN-FUNCALL-SAVES-FP-AND-PC Set to 3 for
   self-build on x86 to get reliable more backtraces there, and 0 for
   other platforms. (1 matches the old SPEED <= DEBUG behaviour.)

 * When using a saved FP, and an interrupt context has a bogus
   FP, assume it is an interrupted syscall frame.
  • Loading branch information...
commit e7b2c507c364da9395ad1f9591210dac44f24afd 1 parent 913cf0c
sb-studio sb-studio authored
2  NEWS
@@ -14,6 +14,8 @@ changes relative to sbcl-1.0.50:
14 14 (lp#811386)
15 15 * bug fix: using GCC >= 4.6 to build SBCL on x86 no longer breaks
16 16 backtraces. (lp#818460)
  17 + * bug fix: better backtraces for interrupted syscall frames on x86.
  18 + (lp#549673)
17 19
18 20 changes in sbcl-1.0.50 relative to sbcl-1.0.49:
19 21 * enhancement: errors from FD handlers now provide a restart to remove
4 make-host-2.lisp
@@ -27,7 +27,9 @@
27 27 ;; sbcl-internal optimization declarations:
28 28 ;;
29 29 ;; never insert stepper conditions
30   - (sb!c:insert-step-conditions 0)))))
  30 + (sb!c:insert-step-conditions 0)
  31 + ;; save FP and PC for alien calls -- or not
  32 + (sb!c:alien-funcall-saves-fp-and-pc #!+x86 3 #!-x86 0)))))
31 33 (compile 'proclaim-target-optimization)
32 34
33 35 (defun in-target-cross-compilation-mode (fun)
1  package-data-list.lisp-expr
@@ -219,6 +219,7 @@ of SBCL which maintained the CMU-CL-style split into two packages.)"
219 219 #!+x86 "SET-FPU-WORD-FOR-C"
220 220 #!+x86 "SET-FPU-WORD-FOR-LISP"
221 221 "ALIGN-STACK-POINTER"
  222 + "ALIEN-FUNCALL-SAVES-FP-AND-PC"
222 223 "ALLOC-ALIEN-STACK-SPACE" "ALLOC-NUMBER-STACK-SPACE"
223 224 "ALLOCATE-CODE-OBJECT" "ALLOCATE-FRAME"
224 225 "ALLOCATE-DYNAMIC-CODE-OBJECT" "ALLOCATE-FULL-CALL-FRAME"
172 src/code/debug-int.lisp
@@ -634,7 +634,8 @@
634 634 (when saved-fp
635 635 (compute-calling-frame (descriptor-sap saved-fp)
636 636 (descriptor-sap saved-pc)
637   - up-frame))))
  637 + up-frame
  638 + t))))
638 639
639 640 ;;; Return the frame immediately below FRAME on the stack; or when
640 641 ;;; FRAME is the bottom of the stack, return NIL.
@@ -788,13 +789,14 @@
788 789 escaped))))))
789 790
790 791 #!+(or x86 x86-64)
791   -(defun compute-calling-frame (caller ra up-frame)
  792 +(defun compute-calling-frame (caller ra up-frame &optional savedp)
792 793 (declare (type system-area-pointer caller ra))
793 794 (/noshow0 "entering COMPUTE-CALLING-FRAME")
794 795 (when (control-stack-pointer-valid-p caller)
795 796 (/noshow0 "in WHEN")
796 797 ;; First check for an escaped frame.
797   - (multiple-value-bind (code pc-offset escaped) (find-escaped-frame caller)
  798 + (multiple-value-bind (code pc-offset escaped off-stack)
  799 + (find-escaped-frame caller)
798 800 (/noshow0 "at COND")
799 801 (cond (code
800 802 ;; If it's escaped it may be a function end breakpoint trap.
@@ -828,7 +830,11 @@
828 830 (code-location-from-pc d-fun pc-offset
829 831 escaped)
830 832 (if up-frame (1+ (frame-number up-frame)) 0)
831   - escaped)))))
  833 + ;; If we have an interrupt-context that's not on
  834 + ;; our stack at all, and we're computing the
  835 + ;; from from a saved FP, we're probably looking
  836 + ;; at an interrupted syscall.
  837 + (or escaped (and savedp off-stack)))))))
832 838
833 839 (defun nth-interrupt-context (n)
834 840 (declare (type (unsigned-byte 32) n)
@@ -844,101 +850,101 @@
844 850 (declare (type system-area-pointer frame-pointer))
845 851 (/noshow0 "entering FIND-ESCAPED-FRAME")
846 852 (dotimes (index *free-interrupt-context-index* (values nil 0 nil))
847   - (/noshow0 "at head of WITH-ALIEN")
848   - (let ((context (nth-interrupt-context index)))
849   - (/noshow0 "got CONTEXT")
850   - (when (= (sap-int frame-pointer)
851   - (sb!vm:context-register context sb!vm::cfp-offset))
852   - (without-gcing
853   - (/noshow0 "in WITHOUT-GCING")
854   - (let* ((component-ptr (component-ptr-from-pc
855   - (sb!vm:context-pc context)))
856   - (code (unless (sap= component-ptr (int-sap #x0))
857   - (component-from-component-ptr component-ptr))))
858   - (/noshow0 "got CODE")
859   - (when (null code)
860   - (return (values code 0 context)))
861   - (let* ((code-header-len (* (get-header-data code)
862   - sb!vm:n-word-bytes))
863   - (pc-offset
  853 + (let* ((context (nth-interrupt-context index))
  854 + (cfp (int-sap (sb!vm:context-register context sb!vm::cfp-offset))))
  855 + (/noshow0 "got CONTEXT")
  856 + (unless (control-stack-pointer-valid-p cfp)
  857 + (return (values nil nil nil t)))
  858 + (when (sap= frame-pointer cfp)
  859 + (without-gcing
  860 + (/noshow0 "in WITHOUT-GCING")
  861 + (let* ((component-ptr (component-ptr-from-pc
  862 + (sb!vm:context-pc context)))
  863 + (code (unless (sap= component-ptr (int-sap #x0))
  864 + (component-from-component-ptr component-ptr))))
  865 + (/noshow0 "got CODE")
  866 + (when (null code)
  867 + (return (values code 0 context)))
  868 + (let* ((code-header-len (* (get-header-data code)
  869 + sb!vm:n-word-bytes))
  870 + (pc-offset
864 871 (- (sap-int (sb!vm:context-pc context))
865 872 (- (get-lisp-obj-address code)
866 873 sb!vm:other-pointer-lowtag)
867 874 code-header-len)))
868   - (/noshow "got PC-OFFSET")
869   - (unless (<= 0 pc-offset
870   - (* (code-header-ref code sb!vm:code-code-size-slot)
871   - sb!vm:n-word-bytes))
872   - ;; We were in an assembly routine. Therefore, use the
873   - ;; LRA as the pc.
874   - ;;
875   - ;; FIXME: Should this be WARN or ERROR or what?
876   - (format t "** pc-offset ~S not in code obj ~S?~%"
877   - pc-offset code))
878   - (/noshow0 "returning from FIND-ESCAPED-FRAME")
879   - (return
880   - (values code pc-offset context)))))))))
  875 + (/noshow "got PC-OFFSET")
  876 + (unless (<= 0 pc-offset
  877 + (* (code-header-ref code sb!vm:code-code-size-slot)
  878 + sb!vm:n-word-bytes))
  879 + ;; We were in an assembly routine. Therefore, use the
  880 + ;; LRA as the pc.
  881 + ;;
  882 + ;; FIXME: Should this be WARN or ERROR or what?
  883 + (format t "** pc-offset ~S not in code obj ~S?~%"
  884 + pc-offset code))
  885 + (/noshow0 "returning from FIND-ESCAPED-FRAME")
  886 + (return
  887 + (values code pc-offset context)))))))))
881 888
882 889 #!-(or x86 x86-64)
883 890 (defun find-escaped-frame (frame-pointer)
884 891 (declare (type system-area-pointer frame-pointer))
885 892 (/noshow0 "entering FIND-ESCAPED-FRAME")
886 893 (dotimes (index *free-interrupt-context-index* (values nil 0 nil))
887   - (/noshow0 "at head of WITH-ALIEN")
888 894 (let ((scp (nth-interrupt-context index)))
889   - (/noshow0 "got SCP")
  895 + (/noshow0 "got SCP")
890 896 (when (= (sap-int frame-pointer)
891 897 (sb!vm:context-register scp sb!vm::cfp-offset))
892 898 (without-gcing
893   - (/noshow0 "in WITHOUT-GCING")
894   - (let ((code (code-object-from-bits
895   - (sb!vm:context-register scp sb!vm::code-offset))))
896   - (/noshow0 "got CODE")
897   - (when (symbolp code)
898   - (return (values code 0 scp)))
899   - (let* ((code-header-len (* (get-header-data code)
900   - sb!vm:n-word-bytes))
901   - (pc-offset
902   - (- (sap-int (sb!vm:context-pc scp))
903   - (- (get-lisp-obj-address code)
904   - sb!vm:other-pointer-lowtag)
905   - code-header-len)))
906   - (let ((code-size (* (code-header-ref code
907   - sb!vm:code-code-size-slot)
908   - sb!vm:n-word-bytes)))
909   - (unless (<= 0 pc-offset code-size)
910   - ;; We were in an assembly routine.
911   - (multiple-value-bind (new-pc-offset computed-return)
912   - (find-pc-from-assembly-fun code scp)
913   - (setf pc-offset new-pc-offset)
914   - (unless (<= 0 pc-offset code-size)
915   - (cerror
916   - "Set PC-OFFSET to zero and continue backtrace."
917   - 'bug
918   - :format-control
919   - "~@<PC-OFFSET (~D) not in code object. Frame details:~
  899 + (/noshow0 "in WITHOUT-GCING")
  900 + (let ((code (code-object-from-bits
  901 + (sb!vm:context-register scp sb!vm::code-offset))))
  902 + (/noshow0 "got CODE")
  903 + (when (symbolp code)
  904 + (return (values code 0 scp)))
  905 + (let* ((code-header-len (* (get-header-data code)
  906 + sb!vm:n-word-bytes))
  907 + (pc-offset
  908 + (- (sap-int (sb!vm:context-pc scp))
  909 + (- (get-lisp-obj-address code)
  910 + sb!vm:other-pointer-lowtag)
  911 + code-header-len)))
  912 + (let ((code-size (* (code-header-ref code
  913 + sb!vm:code-code-size-slot)
  914 + sb!vm:n-word-bytes)))
  915 + (unless (<= 0 pc-offset code-size)
  916 + ;; We were in an assembly routine.
  917 + (multiple-value-bind (new-pc-offset computed-return)
  918 + (find-pc-from-assembly-fun code scp)
  919 + (setf pc-offset new-pc-offset)
  920 + (unless (<= 0 pc-offset code-size)
  921 + (cerror
  922 + "Set PC-OFFSET to zero and continue backtrace."
  923 + 'bug
  924 + :format-control
  925 + "~@<PC-OFFSET (~D) not in code object. Frame details:~
920 926 ~2I~:@_PC: #X~X~:@_CODE: ~S~:@_CODE FUN: ~S~:@_LRA: ~
921 927 #X~X~:@_COMPUTED RETURN: #X~X.~:>"
922   - :format-arguments
923   - (list pc-offset
924   - (sap-int (sb!vm:context-pc scp))
925   - code
926   - (%code-entry-points code)
927   - (sb!vm:context-register scp sb!vm::lra-offset)
928   - computed-return))
929   - ;; We failed to pinpoint where PC is, but set
930   - ;; pc-offset to 0 to keep the backtrace from
931   - ;; exploding.
932   - (setf pc-offset 0)))))
933   - (/noshow0 "returning from FIND-ESCAPED-FRAME")
934   - (return
935   - (if (eq (%code-debug-info code) :bogus-lra)
936   - (let ((real-lra (code-header-ref code
937   - real-lra-slot)))
938   - (values (lra-code-header real-lra)
939   - (get-header-data real-lra)
940   - nil))
941   - (values code pc-offset scp))))))))))
  928 + :format-arguments
  929 + (list pc-offset
  930 + (sap-int (sb!vm:context-pc scp))
  931 + code
  932 + (%code-entry-points code)
  933 + (sb!vm:context-register scp sb!vm::lra-offset)
  934 + computed-return))
  935 + ;; We failed to pinpoint where PC is, but set
  936 + ;; pc-offset to 0 to keep the backtrace from
  937 + ;; exploding.
  938 + (setf pc-offset 0)))))
  939 + (/noshow0 "returning from FIND-ESCAPED-FRAME")
  940 + (return
  941 + (if (eq (%code-debug-info code) :bogus-lra)
  942 + (let ((real-lra (code-header-ref code
  943 + real-lra-slot)))
  944 + (values (lra-code-header real-lra)
  945 + (get-header-data real-lra)
  946 + nil))
  947 + (values code pc-offset scp))))))))))
942 948
943 949 #!-(or x86 x86-64)
944 950 (defun find-pc-from-assembly-fun (code scp)
2  src/compiler/aliencomp.lisp
@@ -702,7 +702,7 @@
702 702 ;; to it later regardless of how the foreign stack looks
703 703 ;; like.
704 704 #!+:c-stack-is-control-stack
705   - (when (policy node (<= speed debug))
  705 + (when (policy node (= 3 alien-funcall-saves-fp-and-pc))
706 706 (setf body `(invoke-with-saved-fp-and-pc (lambda () ,body))))
707 707 (/noshow "returning from DEFTRANSFORM ALIEN-FUNCALL" (params) body)
708 708 `(lambda (function ,@(params))
6 src/compiler/policies.lisp
@@ -51,6 +51,12 @@ Enabling this option can increase heap consing of closures.")
51 51 "Control conversion of &REST argments to &MORE arguments when
52 52 only used as the final argument to APPLY.")
53 53
  54 +(define-optimization-quality alien-funcall-saves-fp-and-pc
  55 + (if (<= speed debug) 3 0)
  56 + ("no" "maybe" "yes" "yes")
  57 + "Control ALIEN-FUNCALL saving frame-pointer and program counter for
  58 +more reliable bactracing across foreign calls.")
  59 +
54 60 (define-optimization-quality verify-arg-count
55 61 (if (zerop safety) 0 3)
56 62 ("no" "maybe" "yes" "yes"))
12 tests/debug.impure.lisp
@@ -173,6 +173,18 @@
173 173 (list '(flet not-optimized))
174 174 (list '(flet test) #'not-optimized))))))
175 175
  176 +(with-test (:name :interrupted-syscall)
  177 + (let ((m (sb-thread:make-mutex))
  178 + (q (sb-thread:make-waitqueue)))
  179 + (assert (verify-backtrace
  180 + (lambda ()
  181 + (sb-thread:with-mutex (m)
  182 + (handler-bind ((timeout (lambda (c)
  183 + (error "foo"))))
  184 + (with-timeout 0.1
  185 + (sb-thread:condition-wait q m)))))
  186 + `((sb-thread:condition-wait ,q ,m))))))
  187 +
176 188 ;;; Division by zero was a common error on PPC. It depended on the
177 189 ;;; return function either being before INTEGER-/-INTEGER in memory,
178 190 ;;; or more than MOST-POSITIVE-FIXNUM bytes ahead. It also depends on

0 comments on commit e7b2c50

Please sign in to comment.
Something went wrong with that request. Please try again.