Add translations to min/max SSE instructions

shamazmazum · Jan 11, 2023 · 41958ca · 41958ca
1 parent 683540e
commit 41958ca
Show file tree

Hide file tree

Showing 8 changed files with 205 additions and 34 deletions.
diff --git a/README.md b/README.md
@@ -2,20 +2,21 @@
 
 This system tells SBCL how to use math functions from libm for single-float
 type. In other words rather than converting single-float to double-float,
-calling `sin` and converting back, SBCL will simply call `sinf`.
+calling `sin` and converting back, SBCL will simply call `sinf`. Also it makes
+use of some SSE instructions like `min(max)ss(d)` or `sqrtss`.
 
 Here is an example. Consider this code:
 
-~~~~{.lisp}
+``` lisp
 (defun foo (x)
   (declare (optimize (speed 3))
            (type single-float x))
   (sinh (1+ x)))
-~~~~
+```
 
 This is the produced assembly before loading `sbcl-single-float-tran`:
 
-~~~~
+``` lisp
 CL-USER> (disassemble 'foo)
 ; disassembly for FOO
 ; Size: 65 bytes. Origin: #x226390EF                          ; FOO
@@ -38,11 +39,11 @@ CL-USER> (disassemble 'foo)
 ; 12D:       C3               RET
 ; 12E:       CC10             INT3 16                         ; Invalid argument count trap
 NIL
-~~~~
+```
 
 and this is after loading `sbcl-single-float-tran` and recompilation:
 
-~~~~
+``` lisp
 CL-USER> (disassemble 'foo)
 ; disassembly for FOO
 ; Size: 46 bytes. Origin: #x2263B0BF                          ; FOO
@@ -60,18 +61,18 @@ CL-USER> (disassemble 'foo)
 ; EA:       C3               RET
 ; EB:       CC10             INT3 16                          ; Invalid argument count trap
 NIL
-~~~~
+```
 
 Note, that `sbcl-single-float-tran` correctly handles domain of a function:
 
-~~~~{.lisp}
+``` lisp
 (defun foo (x)
   (declare (optimize (speed 3))
            (type (single-float -1f0) x))
   (+ (log x) (log (1+ x))))
-~~~~
+```
 
-~~~~
+``` lisp
 CL-USER> (disassemble 'foo)
 ; disassembly for FOO
 ; Size: 109 bytes. Origin: #x2263B1A0                         ; FOO
@@ -105,6 +106,19 @@ CL-USER> (disassemble 'foo)
 ; 20A:       C3               RET
 ; 20B:       CC10             INT3 16                         ; Invalid argument count trap
 NIL
+```
+
+## MIN and MAX
+
+`sbcl-single-float-tran` converts calls to `min` and `max` with two or three
+arguments to SSE instructions where possible. Despite the name, this works for
+double float numbers too. The result of such call is converted to the largest
+format of floating point arguments if one of the arguments is a floating point
+value or otherwise remains as is. E.g.
+
+~~~~
+(MIN SINGLE-FLOAT DOUBLE-FLOAT FIXNUM) -> DOUBLE-FLOAT
+(MIN FIXNUM RATIO) -> FIXNUM or RATIO
 ~~~~
 
 ## Portability

diff --git a/sbcl-single-float-tran.asd b/sbcl-single-float-tran.asd
@@ -6,9 +6,12 @@
   :licence "2-clause BSD"
   :serial t
   :pathname "src/"
-  :components ((:file "expt-fix"        :if-feature :sbcl)
-               (:file "package"         :if-feature :sbcl)
-               (:file "fndb"            :if-feature :sbcl)
-               (:file "vop"             :if-feature (:and :sbcl :x86-64))
-               (:file "sbcl-transforms" :if-feature :sbcl)
-               (:file "no-sbcl"         :if-feature (:not :sbcl))))
+  :components ((:file "expt-fix"           :if-feature :sbcl)
+               (:file "package"            :if-feature :sbcl)
+               (:file "fndb"               :if-feature :sbcl)
+               (:file "vop"                :if-feature (:and :sbcl :x86-64))
+               (:file "transforms"         :if-feature :sbcl)
+               (:file "irrat-transforms"   :if-feature :sbcl)
+               (:file "min-max-transforms" :if-feature (:and :sbcl :x86-64))
+               (:file "no-sbcl"            :if-feature (:not :sbcl)))
+  :depends-on (:alexandria))
diff --git a/src/fndb.lisp b/src/fndb.lisp
@@ -24,3 +24,17 @@
     (single-float single-float) single-float
     (sb-c:movable sb-c:foldable sb-c:flushable)
   :overwrite-fndb-silently t)
+
+;; Min and max functions which translate to SSE instructions
+
+#+x86-64
+(progn
+  (sb-c:defknown (%minf %maxf)
+      (single-float single-float) single-float
+      (sb-c:movable sb-c:foldable sb-c:flushable)
+    :overwrite-fndb-silently t)
+
+  (sb-c:defknown (%mind %maxd)
+      (double-float double-float) double-float
+      (sb-c:movable sb-c:foldable sb-c:flushable)
+    :overwrite-fndb-silently t))
diff --git a/src/sbcl-transforms.lisp → src/irrat-transforms.lisp b/src/sbcl-transforms.lisp → src/irrat-transforms.lisp
@@ -52,16 +52,15 @@
 (defun %sqrtf (x)
   (%sqrtf x))
 
-(handler-bind
-    ((sb-kernel:redefinition-with-deftransform #'muffle-warning))
-  ;; Define IR1 transformations from EXP to %EXP and so on.
-  ;; (look at src/compiler/float-tran.lisp in SBCL source code).
-  (macrolet
-      ((def-trans (name ret-type)
-         (let ((trans-name (symbolicate "%" name "f"))
-               (arg (gensym)))
-           `(sb-c:deftransform ,name ((,arg) (single-float) ,ret-type)
-              '(,trans-name ,arg)))))
+;; Define IR1 transformations from EXP to %EXP and so on.
+;; (look at src/compiler/float-tran.lisp in SBCL source code).
+(macrolet
+    ((def-trans (name ret-type)
+       (let ((trans-name (symbolicate "%" name "f"))
+             (arg (gensym)))
+         `(sb-c:deftransform ,name ((,arg) (single-float) ,ret-type)
+            '(,trans-name ,arg)))))
+  (with-silent-transform-overwrite
     (def-trans exp  *)
     (def-trans log  float)
     (def-trans sqrt float)
@@ -70,10 +69,10 @@
     (def-trans tan  *)
     (def-trans sinh *)
     (def-trans cosh *)
-    (def-trans tanh *))
+    (def-trans tanh *)))
 
+(with-silent-transform-overwrite
   (sb-c:deftransform expt ((x y) (single-float single-float) single-float)
     '(%powf x y))
-
   (sb-c:deftransform expt ((x y) (single-float integer) single-float)
     '(%powf x (coerce y 'single-float))))
diff --git a/src/min-max-transforms.lisp b/src/min-max-transforms.lisp
@@ -0,0 +1,93 @@
+(in-package :sbcl-single-float-tran)
+
+;; Define stubs which translate to VOPS
+(macrolet ((def-stubs (names)
+             `(progn
+                ,@(loop for name in names collect
+                        `(defun ,name (x y)
+                           (,name x y))))))
+  (def-stubs (%minf %mind %maxf %maxd)))
+
+;; Remove source transforms for min and max
+(setf (sb-int:info :function :source-transform 'min) nil
+      (sb-int:info :function :source-transform 'max) nil)
+;; MIN and MAX share the same FUN-INFO for some reason,
+;; so we need to redefine it
+(sb-c:defknown (max min) (real &rest real) real
+  (sb-c:movable sb-c:foldable sb-c:flushable)
+  :overwrite-fndb-silently t)
+
+;; NB: Order matters
+
+;; Two arg transforms
+(defun two-arg-min-max-transform-body (compare-op)
+  (alex:with-gensyms (arg1 arg2)
+    `(let ((,arg1 x)
+           (,arg2 y))
+       (if (,compare-op ,arg1 ,arg2)
+           ,arg1 ,arg2))))
+
+(macrolet ((def-min-max-transform (op compare-op)
+             `(sb-c:deftransform ,op ((x y) (real real) *)
+                (two-arg-min-max-transform-body ',compare-op))))
+  (with-silent-transform-overwrite
+    (def-min-max-transform min <=)
+    (def-min-max-transform max >=)))
+
+(macrolet ((def-min-max-transform (op single-op double-op)
+             `(progn
+                (sb-c:deftransform ,op ((x y) (single-float real) *)
+                  '(,single-op x (coerce y 'single-float)))
+                (sb-c:deftransform ,op ((x y) (real single-float) *)
+                  '(,single-op (coerce x 'single-float) y))
+                (sb-c:deftransform ,op ((x y) (double-float real) *)
+                  '(,double-op x (coerce y 'double-float)))
+                (sb-c:deftransform ,op ((x y) (real double-float) *)
+                  '(,double-op (coerce x 'double-float) y)))))
+  (with-silent-transform-overwrite
+    (def-min-max-transform min %minf %mind)
+    (def-min-max-transform max %maxf %maxd)))
+
+;; Three arg transforms.
+;;
+;; FIXME: MIN and MAX on floating point numbers are not commutative
+;; and associative (because of NaNs), but MIN and MAX on (AND REAL
+;; (NOT FLOAT)) should be.
+(defun three-arg-min-max-transform-body (compare-op)
+  (alex:with-gensyms (arg1 arg2 arg3 tmp)
+    `(let* ((,arg1 x)
+            (,arg2 y)
+            (,arg3 z)
+
+            (,tmp (if (,compare-op ,arg1 ,arg2)
+                      ,arg1 ,arg2)))
+       (if (,compare-op ,tmp ,arg3)
+           ,tmp ,arg3))))
+
+(macrolet ((def-min-max-transform (op compare-op)
+             `(sb-c:deftransform ,op ((x y z) (real real real) *)
+                (three-arg-min-max-transform-body ',compare-op))))
+  (with-silent-transform-overwrite
+    (def-min-max-transform min <=)
+    (def-min-max-transform max >=)))
+
+(macrolet ((def-min-max-transform (op single-op double-op)
+             `(progn
+                (sb-c:deftransform ,op ((x y z) (single-float real real) *)
+                  '(,single-op (,single-op x (coerce y 'single-float)) (coerce z 'single-float)))
+                (sb-c:deftransform ,op ((x y z) (real single-float real) *)
+                  '(,single-op (,single-op (coerce x 'single-float) y) (coerce z 'single-float)))
+                (sb-c:deftransform ,op ((x y z) (real real single-float) *)
+                  '(,single-op (,single-op (coerce x 'single-float) (coerce y 'single-float)) z))
+
+                (sb-c:deftransform ,op ((x y z) (double-float real real) *)
+                  '(,double-op (,double-op x (coerce y 'double-float)) (coerce z 'double-float)))
+                (sb-c:deftransform ,op ((x y z) (real double-float real) *)
+                  '(,double-op (,double-op (coerce x 'double-float) y) (coerce z 'double-float)))
+                (sb-c:deftransform ,op ((x y z) (real real double-float) *)
+                  '(,double-op (,double-op (coerce x 'double-float) (coerce y 'double-float)) z)))))
+  (with-silent-transform-overwrite
+    (def-min-max-transform min %minf %mind)
+    (def-min-max-transform max %maxf %maxd)))
+
+;; Four arguments and more are compiled as a regular function call.
diff --git a/src/package.lisp b/src/package.lisp
@@ -1,2 +1,3 @@
 (defpackage sbcl-single-float-tran
-  (:use :cl))
+  (:use :cl)
+  (:local-nicknames (:alex :alexandria)))
diff --git a/src/transforms.lisp b/src/transforms.lisp
@@ -0,0 +1,6 @@
+(in-package :sbcl-single-float-tran)
+
+(defmacro with-silent-transform-overwrite (&body body)
+  `(handler-bind
+       ((sb-kernel:redefinition-with-deftransform #'muffle-warning))
+     ,@body))
diff --git a/src/vop.lisp b/src/vop.lisp
@@ -10,8 +10,49 @@
   (:note "inline float arithmetic")
   (:vop-var vop)
   (:save-p :compute-only)
-  (:generator 1
-     (unless (location= x y)
-       (inst xorps y y))
-     (note-float-location 'sqrt vop x)
-     (inst sqrtss y x)))
+  (:generator
+   1
+   (unless (location= x y)
+     (inst xorps y y))
+   (note-float-location 'sqrt vop x)
+   (inst sqrtss y x)))
+
+(macrolet ((frob (op-name vop-name op-inst double-p)
+             (let ((sc-name    (if double-p 'double-reg 'single-reg))
+                   (move-inst  (if double-p 'movsd 'movss))
+                   (immediate  (if double-p 'fp-double-immediate 'fp-single-immediate))
+                   (parent-vop (if double-p 'double-float-op 'single-float-op)))
+               `(define-vop (,vop-name ,parent-vop)
+                  (:translate ,op-name)
+                  (:temporary (:sc ,sc-name) tmp)
+                  (:vop-var vop)
+                  (:generator
+                   2
+                   (flet ((get-constant (tn)
+                            (let ((value (tn-value tn)))
+                              (register-inline-constant value))))
+                     (cond
+                       ((location= x r)
+                        (note-float-location ',op-name vop x y)
+                        (when (sc-is y ,immediate)
+                          (setf y (get-constant y)))
+                        (inst ,op-inst x y))
+                       ((not (location= r y))
+                        (if (sc-is x ,immediate)
+                            (inst ,move-inst r (get-constant x))
+                            (move r x))
+                        (note-float-location ',op-name vop r y)
+                        (when (sc-is y ,immediate)
+                          (setf y (get-constant y)))
+                        (inst ,op-inst r y))
+                       (t
+                        (if (sc-is x ,immediate)
+                            (inst ,move-inst tmp (get-constant x))
+                            (move tmp x))
+                        (note-float-location ',op-name vop tmp y)
+                        (inst ,op-inst tmp y)
+                        (move r tmp)))))))))
+  (frob sbcl-single-float-tran::%minf fminf minss nil)
+  (frob sbcl-single-float-tran::%mind fmind minsd t)
+  (frob sbcl-single-float-tran::%maxf fmaxf maxss nil)
+  (frob sbcl-single-float-tran::%maxd fmaxd maxsd t))