Implemented little-endian load/store with bswap intrinsics

This reduces compiled code size under GCC+ARM and adds some extra checks for float/int endianness. See Issue #15
ludocode · Nov 19, 2015 · 0d3e351 · 0d3e351
1 parent 6bcc947
commit 0d3e351
Show file tree

Hide file tree

Showing 4 changed files with 109 additions and 0 deletions.
diff --git a/src/mpack/mpack-common.h b/src/mpack/mpack-common.h
@@ -341,6 +341,11 @@ MPACK_INLINE bool mpack_tag_equal(mpack_tag_t left, mpack_tag_t right) {
  * use them for other purposes, but they are undocumented. (Note
  * also that they are static always-inline; they do not follow
  * the normal MPack inline linkage.)
+ *
+ * The bswap builtins are used when needed and available. With
+ * GCC 5.2 they appear to give better performance and smaller
+ * code size on little-endian ARM while compiling to the same
+ * assembly as the bit-shifting code on x86_64.
  */
 
 MPACK_ALWAYS_INLINE uint8_t mpack_load_native_u8(const char* p) {
@@ -353,13 +358,24 @@ MPACK_ALWAYS_INLINE uint16_t mpack_load_native_u16(const char* p) {
 }
 
 MPACK_ALWAYS_INLINE uint32_t mpack_load_native_u32(const char* p) {
+    #ifdef MPACK_NHSWAP32
+    uint32_t val;
+    mpack_memcpy(&val, p, sizeof(val));
+    return MPACK_NHSWAP32(val);
+    #else
     return (((uint32_t)(uint8_t)p[0]) << 24) |
            (((uint32_t)(uint8_t)p[1]) << 16) |
            (((uint32_t)(uint8_t)p[2]) <<  8) |
             ((uint32_t)(uint8_t)p[3]);
+    #endif
 }
 
 MPACK_ALWAYS_INLINE uint64_t mpack_load_native_u64(const char* p) {
+    #ifdef MPACK_NHSWAP64
+    uint64_t val;
+    mpack_memcpy(&val, p, sizeof(val));
+    return MPACK_NHSWAP64(val);
+    #else
     return (((uint64_t)(uint8_t)p[0]) << 56) |
            (((uint64_t)(uint8_t)p[1]) << 48) |
            (((uint64_t)(uint8_t)p[2]) << 40) |
@@ -368,6 +384,7 @@ MPACK_ALWAYS_INLINE uint64_t mpack_load_native_u64(const char* p) {
            (((uint64_t)(uint8_t)p[5]) << 16) |
            (((uint64_t)(uint8_t)p[6]) <<  8) |
             ((uint64_t)(uint8_t)p[7]);
+    #endif
 }
 
 MPACK_ALWAYS_INLINE void mpack_store_native_u8(char* p, uint8_t val) {
@@ -382,14 +399,23 @@ MPACK_ALWAYS_INLINE void mpack_store_native_u16(char* p, uint16_t val) {
 }
 
 MPACK_ALWAYS_INLINE void mpack_store_native_u32(char* p, uint32_t val) {
+    #ifdef MPACK_NHSWAP32
+    val = MPACK_NHSWAP32(val);
+    mpack_memcpy(p, &val, sizeof(val));
+    #else
     uint8_t* u = (uint8_t*)p;
     u[0] = (uint8_t)((val >> 24) & 0xFF);
     u[1] = (uint8_t)((val >> 16) & 0xFF);
     u[2] = (uint8_t)((val >>  8) & 0xFF);
     u[3] = (uint8_t)( val        & 0xFF);
+    #endif
 }
 
 MPACK_ALWAYS_INLINE void mpack_store_native_u64(char* p, uint64_t val) {
+    #ifdef MPACK_NHSWAP64
+    val = MPACK_NHSWAP64(val);
+    mpack_memcpy(p, &val, sizeof(val));
+    #else
     uint8_t* u = (uint8_t*)p;
     u[0] = (uint8_t)((val >> 56) & 0xFF);
     u[1] = (uint8_t)((val >> 48) & 0xFF);
@@ -399,6 +425,7 @@ MPACK_ALWAYS_INLINE void mpack_store_native_u64(char* p, uint64_t val) {
     u[5] = (uint8_t)((val >> 16) & 0xFF);
     u[6] = (uint8_t)((val >>  8) & 0xFF);
     u[7] = (uint8_t)( val        & 0xFF);
+    #endif
 }
 
 /** @endcond */

diff --git a/src/mpack/mpack-platform.h b/src/mpack/mpack-platform.h
@@ -393,6 +393,84 @@ MPACK_HEADER_START
 
 
 
+/*
+ * Endianness checks
+ *
+ * These define MPACK_NHSWAP*() which swap network<->host byte
+ * order when needed.
+ *
+ * We leave them undefined if we can't determine the endianness
+ * at compile-time, in which case we fall back to bit-shifts.
+ *
+ * See the notes in mpack-common.h.
+ */
+
+#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && defined(__ORDER_BIG_ENDIAN__)
+    #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+        #define MPACK_NHSWAP32(x) (x)
+        #define MPACK_NHSWAP64(x) (x)
+    #elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+
+        #if defined(__clang__)
+            #ifdef __has_builtin
+                #if __has_builtin(__builtin_bswap32)
+                    #define MPACK_NHSWAP32(x) __builtin_bswap32(x)
+                #endif
+                #if __has_builtin(__builtin_bswap64)
+                    #define MPACK_NHSWAP64(x) __builtin_bswap64(x)
+                #endif
+            #endif
+
+        #elif defined(__GNUC__)
+
+            // The GCC bswap builtins are apparently poorly optimized on older
+            // versions of GCC, so we set a minimum version here just in case
+            //     http://hardwarebug.org/2010/01/14/beware-the-builtins/
+
+            #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
+                #define MPACK_NHSWAP32(x) __builtin_bswap32(x)
+                #define MPACK_NHSWAP64(x) __builtin_bswap64(x)
+            #endif
+
+        #endif
+    #endif
+
+#elif defined(_MSC_VER) && defined(_WIN32)
+
+    // On Windows, we assume x86 and x86_64 are always little-endian.
+    // We make no assumptions about ARM even though all current
+    // Windows Phone devices are little-endian just in case they
+    // release one that isn't.
+
+    #if defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64)
+        #define MPACK_NHSWAP32(x) _byteswap_ulong(x)
+        #define MPACK_NHSWAP64(x) _byteswap_uint64(x)
+    #endif
+
+#endif
+
+#if defined(__FLOAT_WORD_ORDER__) && defined(__BYTE_ORDER__)
+
+    // We check where possible that the float byte order matches the
+    // integer byte order. This is extremely unlikely to fail, but
+    // we check anyway just in case.
+    //
+    // (The static assert is placed in float/double encoders instead
+    // of here because our static assert fallback doesn't work at
+    // file scope)
+
+    #define MPACK_CHECK_FLOAT_ORDER() \
+        MPACK_STATIC_ASSERT(__FLOAT_WORD_ORDER__ == __BYTE_ORDER__, \
+            "float byte order does not match int byte order! float/double " \
+            "encoding is not properly implemented on this platform.")
+
+#endif
+
+#ifndef MPACK_CHECK_FLOAT_ORDER
+    #define MPACK_CHECK_FLOAT_ORDER() /* nothing */
+#endif
+
+
 /*
  * Here we define mpack_assert() and mpack_break(). They both work like a normal
  * assertion function in debug mode, causing a trap or abort. However, on some platforms

diff --git a/src/mpack/mpack-reader.h b/src/mpack/mpack-reader.h
@@ -597,6 +597,7 @@ MPACK_ALWAYS_INLINE int32_t mpack_read_native_i32 (mpack_reader_t* reader) {retu
 MPACK_ALWAYS_INLINE int64_t mpack_read_native_i64 (mpack_reader_t* reader) {return (int64_t)mpack_read_native_u64 (reader);}
 
 MPACK_ALWAYS_INLINE float mpack_read_native_float(mpack_reader_t* reader) {
+    MPACK_CHECK_FLOAT_ORDER();
     union {
         float f;
         uint32_t i;
@@ -606,6 +607,7 @@ MPACK_ALWAYS_INLINE float mpack_read_native_float(mpack_reader_t* reader) {
 }
 
 MPACK_ALWAYS_INLINE double mpack_read_native_double(mpack_reader_t* reader) {
+    MPACK_CHECK_FLOAT_ORDER();
     union {
         double d;
         uint64_t i;

diff --git a/src/mpack/mpack-writer.c b/src/mpack/mpack-writer.c
@@ -360,6 +360,7 @@ MPACK_STATIC_INLINE void mpack_write_native_i64_unchecked(mpack_writer_t* writer
 }
 
 MPACK_STATIC_INLINE void mpack_write_native_float_unchecked(mpack_writer_t* writer, float value) {
+    MPACK_CHECK_FLOAT_ORDER();
     union {
         float f;
         uint32_t i;
@@ -369,6 +370,7 @@ MPACK_STATIC_INLINE void mpack_write_native_float_unchecked(mpack_writer_t* writ
 }
 
 MPACK_STATIC_INLINE void mpack_write_native_double_unchecked(mpack_writer_t* writer, double value) {
+    MPACK_CHECK_FLOAT_ORDER();
     union {
         double d;
         uint64_t i;