Improved file buffering in CArchive

Even though memcpy is typically inlined by the compiler into byte/word loads and stores (at least for release builds), the frequency with which 1, 2 and 4 byte loads/stores are encountered in cases where the size is *not* determinable at compile time is still high enough that it's worth handling these specially. On the ARM1176JZF-S in the Raspberry Pi, this improves the total time to open a library (in the case where it's fetched from a CArchive) by around 4%. It should be noted that this code uses 16-bit and 32-bit word loads and stores that are not necessarily aligned to their respective widths. It is possible that there are some architectures out there which do not support this, although all ARMs since ARMv6 have supported it (and ARMs earlier than that are probably not powerful enough to be good targets for XBMC).
popcornmix · Apr 3, 2020 · 4cca43f · 4cca43f
1 parent 1230d51
commit 4cca43f
Showing 1 changed file with 16 additions and 0 deletions.
diff --git a/xbmc/utils/Archive.h b/xbmc/utils/Archive.h
@@ -144,9 +144,17 @@ class CArchive
      * than waiting until we attempt to put more data into an already full buffer */
     if (m_BufferRemain > size)
     {
+      switch (size)
+      {
+      case 1: *m_BufferPos++ = *ptr; m_BufferRemain--; break;
+      case 2: *(uint16_t *) m_BufferPos = *(const uint16_t *) ptr; m_BufferPos += 2; m_BufferRemain -= 2; break;
+      case 4: *(uint32_t *) m_BufferPos = *(const uint32_t *) ptr; m_BufferPos += 4; m_BufferRemain -= 4; break;
+      default:
       memcpy(m_BufferPos, ptr, size);
       m_BufferPos += size;
       m_BufferRemain -= size;
+      break;
+      }
       return *this;
     }
 
@@ -159,9 +167,17 @@ class CArchive
     /* Note, refilling the buffer is deferred until we know we need to read more from it */
     if (m_BufferRemain >= size)
     {
+      switch (size)
+      {
+      case 1: *ptr = *m_BufferPos++; m_BufferRemain--; break;
+      case 2: *(uint16_t *) ptr = *(const uint16_t *) m_BufferPos; m_BufferPos += 2; m_BufferRemain -= 2; break;
+      case 4: *(uint32_t *) ptr = *(const uint32_t *) m_BufferPos; m_BufferPos += 4; m_BufferRemain -= 4; break;
+      default:
       memcpy(ptr, m_BufferPos, size);
       m_BufferPos += size;
       m_BufferRemain -= size;
+      break;
+      }
       return *this;
     }