Skip to content

Commit

Permalink
Reduce memory reads when looking up MIME type from extension
Browse files Browse the repository at this point in the history
Since, by construction, the extensions are stored sorted and
sequentially in memory, if we're comparing only the strings found in
the blob generated by mimegen, we can compare only the pointers;
otherwise, fall back to two 64-bit reads, replacing the indirect call
to strncmp().

The extensions are now stored in big-endian rather than in host-endian,
making it possible to compare things properly.  As a result, Brotli
liked the input data a bit more and was able to save a bit over 50
bytes; it's not much, but, hey, savings are savings.
  • Loading branch information
lpereira committed May 10, 2024
1 parent f8b41ed commit 292d7ac
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 27 deletions.
30 changes: 12 additions & 18 deletions src/bin/tools/mimegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <endian.h>
#include <fcntl.h>
#include <stdint.h>
#include <stdio.h>
Expand Down Expand Up @@ -67,20 +68,9 @@ output_append_full(struct output *output, const char *str, size_t str_len)
return 0;
}

static int output_append_padded(struct output *output, const char *str)
static int output_append_u64(struct output *output, uint64_t value)
{
size_t str_len = strlen(str);

assert(str_len <= 8);

int r = output_append_full(output, str, str_len);
if (r < 0)
return r;

if (str_len != 8)
return output_append_full(output, "\0\0\0\0\0\0\0\0", 8 - str_len);

return 0;
return output_append_full(output, (char *)&value, 8);
}

static int output_append(struct output *output, const char *str)
Expand Down Expand Up @@ -316,14 +306,18 @@ int main(int argc, char *argv[])
return 1;
}
for (i = 0; i < hash_get_count(ext_mime); i++) {
char ext_lower[9] = {0};
uint64_t ext_lower = 0;

strncpy(ext_lower, exts[i], 8);
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstringop-truncation"
/* See lwan_determine_mime_type_for_file_name() in lwan-tables.c */
strncpy((char *)&ext_lower, exts[i], 8);
#pragma GCC diagnostic pop

for (char *p = ext_lower; *p; p++)
*p &= ~0x20;
ext_lower &= ~0x2020202020202020ull;
ext_lower = htobe64(ext_lower);

if (output_append_padded(&output, ext_lower) < 0) {
if (output_append_u64(&output, ext_lower) < 0) {
fprintf(stderr, "Could not append to output\n");
fclose(fp);
return 1;
Expand Down
34 changes: 25 additions & 9 deletions src/lib/lwan-tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,29 @@ LWAN_SELF_TEST(status_codes)
#undef ASSERT_STATUS
}

static int
compare_mime_entry(const void *a, const void *b)
static int compare_mime_entry(const void *a, const void *b)
{
const char *exta = (const char *)a;
const char *extb = (const char *)b;
static const uintptr_t begin = (uintptr_t)uncompressed_mime_entries;
static const uintptr_t end = begin + 8 * MIME_ENTRIES;
const uintptr_t pa = (uintptr_t)a;
const uintptr_t pb = (uintptr_t)b;
uint64_t exta;
uint64_t extb;

if (end - pa >= begin && end - pb >= begin) {
/* If both keys are within the uncompressed mime entries range, then
* we don't need to load from memory, just compare the pointers: they're
* all stored sequentially in memory by construction. */
exta = pa;
extb = pb;
} else {
/* These are stored in big-endian so the comparison below works
* as expected. */
exta = string_as_uint64((const char *)a);
extb = string_as_uint64((const char *)b);
}

return strncmp(exta, extb, 8);
return (exta > extb) - (exta < extb);
}

const char *
Expand All @@ -147,19 +163,19 @@ lwan_determine_mime_type_for_file_name(const char *file_name)
}

if (LIKELY(*last_dot)) {
uint64_t key;
uint64_t key = 0;
const unsigned char *extension;

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstringop-truncation"
/* Data is stored with NULs on strings up to 7 chars, and no NULs
* for 8-char strings, because that's implicit. So truncation is
* intentional here: comparison in compare_mime_entry() uses
* strncmp(..., 8), so even if NUL isn't present, it'll stop at the
* right place. */
* intentional here: comparison in compare_mime_entry() always loads
* 8 bytes per extension. */
strncpy((char *)&key, last_dot + 1, 8);
#pragma GCC diagnostic pop
key &= ~0x2020202020202020ull;
key = htobe64(key);

extension = bsearch(&key, uncompressed_mime_entries, MIME_ENTRIES, 8,
compare_mime_entry);
Expand Down

0 comments on commit 292d7ac

Please sign in to comment.