fix and optimize bswap

Also allow FLOATTYPE_16PPC, but this format seems to be double-double, not __float128
parrot · Sep 9, 2012 · 5851213 · 5851213
1 parent b0ab7d2
commit 5851213
Show file tree

Hide file tree

Showing 3 changed files with 88 additions and 52 deletions.
diff --git a/include/parrot/bswap.h b/include/parrot/bswap.h
@@ -5,48 +5,53 @@
  *
  */
 
-#ifdef HAS_HEADER_BYTESWAP
+#include "parrot/config.h"
+
+#ifdef PARROT_HAS_HEADER_BYTESWAP
 #  include <byteswap.h>                    /* GNU */
 #  define bswap_16(x) __bswap_16(x)
 #  define bswap_32(x) __bswap_32(x)
-#  if __WORDSIZE == 64
+/* we want fast bswap on 64bit cpus with -m32 */
+#  ifdef HAS_LONGLONG
 #    define bswap_64(x) __bswap_64(x)
 #   endif
 #else
-#  ifdef HAS_HEADER_ENDIAN                 /* linux */
+#  ifdef PARROT_HAS_HEADER_ENDIAN                 /* linux */
 #    include <endian.h>
-#  elif defined(HAS_HEADER_SYS_ENDIAN)	   /* FreeBSD */
+#  elif defined(PARROT_HAS_HEADER_SYS_ENDIAN)	   /* FreeBSD */
 #    include <sys/endian.h>
 #  endif
-#  if defined(HAS_HEADER_ENDIAN) \
-      || defined(HAS_HEADER_SYS_ENDIAN)
+#  if defined(PARROT_HAS_HEADER_ENDIAN) \
+      || defined(PARROT_HAS_HEADER_SYS_ENDIAN)
 #    define bswap_16(x) __bswap_16(x)
 #    define bswap_32(x) __bswap_32(x)
-#    if __WORDSIZE == 64
+#    ifdef HAS_LONGLONG
 #      define bswap_64(x) __bswap_64(x)
 #    endif
 #  else
-#    ifdef HAS_HEADER_LIBKERN_OSBYTEORDER
+#    ifdef PARROT_HAS_HEADER_LIBKERN_OSBYTEORDER
 #      include <libkern/OSByteOrder.h>
 #      define bswap_16(x) OSSwapInt16(x)
 #      define bswap_32(x) OSSwapInt32(x)
-#      if __WORDSIZE == 64
+#      ifdef HAS_LONGLONG
 #        define bswap_64(x) OSSwapInt64(x)
 #      endif
 #    else
-#      ifdef HAS_HEADER_SYS_BYTEORDER
+#      ifdef PARROT_HAS_HEADER_SYS_BYTEORDER
 #        define bswap_16(x) BSWAP_16(x)
 #        define bswap_32(x) BSWAP_32(x)
-#        if __WORDSIZE == 64
+#        ifdef HAS_LONGLONG
 #          define bswap_64(x) BSWAP_64(x)
 #        endif
 #      else
 #        ifdef __MSC_VER
 #          define bswap_16(x) _byteswap_ushort(x)
 #          define bswap_32(x) _byteswap_ulong(x)
-#          define bswap_64(x) _byteswap_uint64(x)
+#          ifdef HAS_LONGLONG
+#            define bswap_64(x) _byteswap_uint64(x)
+#          endif
 #        else
-           /* lost */
+           /* no native bswap */
 #          define bswap_16(x)						\
   ({									\
     Parrot_UInt2 __x = (x);						\
@@ -63,7 +68,7 @@
 		(((Parrot_UInt4)(__x) & (Parrot_UInt4)0x00ff0000UL) >>  8) | \
 		(((Parrot_UInt4)(__x) & (Parrot_UInt4)0xff000000UL) >> 24) )); \
   })
-#          if __WORDSIZE == 64
+#          ifdef HAS_LONGLONG
 #            define bswap_64(x)						\
   ({									\
     Parrot_UInt8 __x = (x);						\
@@ -78,11 +83,11 @@
       (Parrot_UInt8)(((Parrot_UInt8)(__x) & (Parrot_UInt8)0xff00000000000000ULL) >> 56) )); \
   })
 #          else
-#            define bswap_64(x) ({   \
-               unsigned char rb[16]; \
+#            define bswap_64(x) ({          \
+               unsigned char rb[8];         \
                const unsigned char *c = &x; \
-               SWAB_16(rb,c); \
-	       memcpy(x,rb,16); })
+               SWAB_8(rb, c);               \
+               (Parrot_UInt8)rb; })
 #          endif
 #        endif
 #      endif
@@ -187,10 +192,10 @@ static inline Parrot_UInt8 bswap64(Parrot_UInt8 x)
 #if defined(bswap_64)
     return bswap_64(x);
 #else
-    unsigned char rb[16];
+    unsigned char rb[8];
     const unsigned char *c = &x;
-    SWAB_16(rb, c);
-    return rb;
+    SWAB_8(rb, c);
+    return (Parrot_UInt8)rb;
 #endif
 }
 

diff --git a/include/parrot/packfile.h b/include/parrot/packfile.h
@@ -32,20 +32,21 @@
 #define FLOATTYPE_4           3
 #define FLOATTYPE_4_NAME      "IEEE-754 4-byte single float"
 
+#define FLOATTYPE_16PPC       4
+#define FLOATTYPE_16PPC_NAME  "PPC64 16 byte double-double"
+
 static
-int PF_floattype_size[] = { 8,12,16,4,2,16,16,16 };
+int PF_floattype_size[] = { 8,12,16,4,16,2,16,16 };
 
 /* Supported until here. */
-#define FLOATTYPE_MAX         3
+#define FLOATTYPE_MAX         4
 
 /* Reserved for later */
-#define FLOATTYPE_2           4
+#define FLOATTYPE_2           5
 #define FLOATTYPE_2_NAME      "IEEE-754 2-byte half-precision float"
 
 /* Non IEEE-754 versions, yet unsupported. */
 /* NaN and other minor differences, but patches welcome */
-#define FLOATTYPE_16PPC       5
-#define FLOATTYPE_16PPC_NAME  "PPC64 16 byte double-double"
 
 #define FLOATTYPE_16MIPS      6
 #define FLOATTYPE_16MIPS_NAME "MIPS 16 byte long double"

diff --git a/src/packfile/pf_items.c b/src/packfile/pf_items.c
@@ -285,9 +285,9 @@ static opcode_t fetch_op_le_8(ARGIN(const unsigned char *b))
  * Floattype 2 = IEEE-754 128 bit quad precision stored in 16 byte,
  *               Sparc64 quad-float or __float128, gcc since 4.3 (binary128)
  * Floattype 3 = IEEE-754 4 byte float (binary32)
+ * Floattype 4 = PowerPC 16 byte double-double
  * not yet:
  * Floattype 4 = IEEE-754 2 byte half-precision float (binary16)
- * Floattype 5 = PowerPC64 16 byte double-double
  * Floattype 6 = MIPS64 16 byte long double
  * Floattype 7 = AIX 16 byte long double
  *
@@ -1055,12 +1055,16 @@ PF_fetch_number(ARGIN_NULLOK(PackFile *pf), ARGIN(const opcode_t **stream))
             else if (floatsize == 4) {
                 *((const unsigned char **)stream) = bswap32((Parrot_UInt4)*stream);
             }
-            else if (floatsize == 2) {
-                *((const unsigned char **)stream) = bswap16((Parrot_UInt2)*stream);
-            }
             else if (floatsize == 16) {
+#if 0
+                /* TODO 64bit CPU: bswap64 with temp */
+                Parrot_UInt8 tmp = bswap64((Parrot_UInt8)*stream);
+                *((const unsigned char **)stream) =
+                    bswap64((Parrot_UInt8)*((const unsigned char *)stream+8));
+                *(((const unsigned char **)stream+8)) = tmp;
+#endif
                 unsigned char rb[16];
-                const unsigned char *c = (const unsigned char *) *stream;
+                const unsigned char *c = (const unsigned char *)*stream;
                 SWAB_16(rb, c);
                 memcpy(*((const unsigned char **)stream), rb, 16);
             }
@@ -1508,68 +1512,96 @@ PackFile_assign_transforms(ARGMOD(PackFile *pf))
     pf->fetch_iv = pf->fetch_op;
 
     switch (pf->header->floattype) {
-#if NUMVAL_SIZE == 8
+#if FLOATTYPE == FLOATTYPE_8
       case FLOATTYPE_8:
         pf->fetch_nv = pf->header->byteorder ? fetch_buf_be_8 : fetch_buf_le_8;
         break;
       case FLOATTYPE_10:
         pf->fetch_nv = cvt_num10_num8;
         break;
       case FLOATTYPE_16:
+      case FLOATTYPE_16PPC:
         pf->fetch_nv = cvt_num16_num8;
         break;
       case FLOATTYPE_4:
         pf->fetch_nv = cvt_num4_num8;
         break;
 #endif
-#if NUMVAL_SIZE == 12
+#if FLOATTYPE == FLOATTYPE_10
       case FLOATTYPE_8:
         pf->fetch_nv = cvt_num8_num10;
         break;
       case FLOATTYPE_10:
-        pf->fetch_nv = pf->header->byteorder ? fetch_buf_be_12 : fetch_buf_le_12;
+        pf->fetch_nv = pf->header->wordsize == 8
+            ? fetch_buf_le_16;
+            ? fetch_buf_le_12;
         break;
       case FLOATTYPE_16:
+      case FLOATTYPE_16PPC:
         pf->fetch_nv = cvt_num16_num10;
         break;
       case FLOATTYPE_4:
         pf->fetch_nv = cvt_num4_num10;
         break;
 #endif
-#if NUMVAL_SIZE == 16
+#if FLOATTYPE == FLOATTYPE_16
       case FLOATTYPE_8:
         pf->fetch_nv = cvt_num8_num16;
         break;
       case FLOATTYPE_10:
         pf->fetch_nv = cvt_num10_num16;
         break;
       case FLOATTYPE_16:
+      case FLOATTYPE_16PPC:
         pf->fetch_nv = pf->header->byteorder ? fetch_buf_be_16 : fetch_buf_le_16;
         break;
       case FLOATTYPE_4:
         pf->fetch_nv = cvt_num4_num16;
         break;
 #endif
-#if NUMVAL_SIZE == 4
+#if FLOATTYPE == FLOATTYPE_16PPC
+      case FLOATTYPE_8:
+        pf->fetch_nv = cvt_num8_num16;
+        break;
+      case FLOATTYPE_10:
+        pf->fetch_nv = cvt_num10_num16;
+        break;
+      case FLOATTYPE_16:
+      case FLOATTYPE_16PPC:
+        pf->fetch_nv = pf->header->byteorder ? fetch_buf_be_16 : fetch_buf_le_16;
+        break;
+      case FLOATTYPE_4:
+        pf->fetch_nv = cvt_num4_num16;
+        break;
+#endif
+#if FLOATTYPE == FLOATTYPE_4
       case FLOATTYPE_8:
         pf->fetch_nv = cvt_num8_num4;
         break;
       case FLOATTYPE_10:
         pf->fetch_nv = cvt_num10_num4;
         break;
       case FLOATTYPE_16:
+      case FLOATTYPE_16PPC:
         pf->fetch_nv = cvt_num16_num4;
         break;
       case FLOATTYPE_4:
         pf->fetch_nv = pf->header->byteorder ? fetch_buf_be_4 : fetch_buf_le_4;
         break;
 #endif
       default:
+      {
+        int floatsize = PF_floattype_size[ pf->header->floattype ];
+        /* Intel x86_64 has FLOATTYPE_10 aligned to size 16. */
+        if ( floatsize == 12 && pf->header->wordsize == 8 )
+            floatsize = 16;
         Parrot_x_force_error_exit(NULL, 1,
               "PackFile_unpack: unsupported float conversion %d to %d, "
-              "PARROT_BIGENDIAN=%d\n",
-              NUMVAL_SIZE, pf->header->floattype, PARROT_BIGENDIAN);
+              "size=%d, bigendian=%d\n",
+              pf->header->floattype, FLOATTYPE,
+              floatsize, pf->header->byteorder );
             break;
+      }
     }
     return;
 }
@@ -1729,8 +1761,7 @@ fetch_buf_be_4(ARGOUT(unsigned char *rb), ARGIN(const unsigned char *b))
 #if PARROT_BIGENDIAN
     memcpy(rb, b, 4);
 #else
-    memcpy(rb, bswap16((Parrot_UInt2)*b), 4);
-    //SWAB_4(rb,b);
+    *(Parrot_UInt4*)rb = bswap32(*(Parrot_UInt4*)b);
 #endif
 }
 
@@ -1752,8 +1783,7 @@ fetch_buf_le_4(ARGOUT(unsigned char *rb), ARGIN(const unsigned char *b))
 #if !PARROT_BIGENDIAN
     memcpy(rb, b, 4);
 #else
-    memcpy(rb, bswap16((Parrot_UInt2)*b), 4);
-    //SWAB_4(rb,b);
+    *(Parrot_UInt4*)rb = bswap32(*(Parrot_UInt4*)b);
 #endif
 }
 
@@ -1774,9 +1804,10 @@ fetch_buf_be_8(ARGOUT(unsigned char *rb), ARGIN(const unsigned char *b))
     ASSERT_ARGS(fetch_buf_be_8)
 #if PARROT_BIGENDIAN
     memcpy(rb, b, 8);
+#elif defined(HAS_LONGLONG)
+    *(Parrot_UInt8*)rb = bswap64(*(Parrot_UInt8*)b);
 #else
-    memcpy(rb, bswap32((Parrot_UInt4)*b), 8);
-    //SWAB_8(rb,b);
+    SWAB_8(rb, b);
 #endif
 }
 
@@ -1797,9 +1828,10 @@ fetch_buf_le_8(ARGOUT(unsigned char *rb), ARGIN(const unsigned char *b))
     ASSERT_ARGS(fetch_buf_le_8)
 #if !PARROT_BIGENDIAN
     memcpy(rb, b, 8);
+#elif defined(HAS_LONGLONG)
+    *(Parrot_UInt8*)rb = bswap64(*(Parrot_UInt8*)b);
 #else
-    memcpy(rb, bswap32((Parrot_UInt4)*b), 8);
-    //SWAB_8(rb,b);
+    SWAB_8(rb, b);
 #endif
 }
 
@@ -1880,8 +1912,7 @@ fetch_buf_le_16(ARGOUT(unsigned char *rb), ARGIN(const unsigned char *b))
 #if !PARROT_BIGENDIAN
     memcpy(rb, b, 16);
 #else
-    memcpy(rb, bswap64((Parrot_UInt8)*b), 16);
-    //SWAB_16(rb,b);
+    SWAB_16(rb, b);
 #endif
 }
 
@@ -1903,8 +1934,7 @@ fetch_buf_be_16(ARGOUT(unsigned char *rb), ARGIN(const unsigned char *b))
 #if PARROT_BIGENDIAN
     memcpy(rb, b, 16);
 #else
-    memcpy(rb, bswap64((Parrot_UInt8)*b), 16);
-    //SWAB_16(rb,b);
+    SWAB_16(rb, b);
 #endif
 }
 
@@ -1926,7 +1956,7 @@ fetch_buf_le_32(ARGOUT(unsigned char *rb), ARGIN(const unsigned char *b))
 #if !PARROT_BIGENDIAN
     memcpy(rb, b, 32);
 #else
-    SWAB_32(rb,b);
+    SWAB_32(rb, b);
 #endif
 }
 
@@ -1948,7 +1978,7 @@ fetch_buf_be_32(ARGOUT(unsigned char *rb), ARGIN(const unsigned char *b))
 #if PARROT_BIGENDIAN
     memcpy(rb, b, 32);
 #else
-    SWAB_32(rb,b);
+    SWAB_32(rb, b);
 #endif
 }