Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
2253 lines (2093 sloc) 112 KB
#include <stdint.h>
#include <assert.h>
#if defined(_MSC_VER)
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
/* GCC-compatible compiler, targeting x86/x86-64 */
#include <x86intrin.h>
#elif defined(__GNUC__) && defined(__ARM_NEON__)
/* GCC-compatible compiler, targeting ARM with NEON */
#include <arm_neon.h>
#elif defined(__GNUC__) && defined(__IWMMXT__)
/* GCC-compatible compiler, targeting ARM with WMMX */
#include <mmintrin.h>
#elif (defined(__GNUC__) || defined(__xlC__)) && (defined(__VEC__) || defined(__ALTIVEC__))
/* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */
#include <altivec.h>
#elif defined(__GNUC__) && defined(__SPE__)
/* GCC-compatible compiler, targeting PowerPC with SPE */
#include <spe.h>
#endif
#include "platform.h"
static int read_int(const uint8_t *in, uint32_t *out) {
*out = in[0] & 0x7F;
if (in[0] < 128) {
return 1;
}
*out = ((in[1] & 0x7FU) << 7) | *out;
if (in[1] < 128) {
return 2;
}
*out = ((in[2] & 0x7FU) << 14) | *out;
if (in[2] < 128) {
return 3;
}
*out = ((in[3] & 0x7FU) << 21) | *out;
if (in[3] < 128) {
return 4;
}
*out = ((in[4] & 0x7FU) << 28) | *out;
return 5;
}
static inline int read_int_delta(const uint8_t *in, uint32_t *out,
uint32_t *prev) {
*out = in[0] & 0x7F;
if (in[0] < 128) {
*prev += *out;
*out = *prev;
return 1;
}
*out = ((in[1] & 0x7FU) << 7) | *out;
if (in[1] < 128) {
*prev += *out;
*out = *prev;
return 2;
}
*out = ((in[2] & 0x7FU) << 14) | *out;
if (in[2] < 128) {
*prev += *out;
*out = *prev;
return 3;
}
*out = ((in[3] & 0x7FU) << 21) | *out;
if (in[3] < 128) {
*prev += *out;
*out = *prev;
return 4;
}
*out = ((in[4] & 0x7FU) << 28) | *out;
*prev += *out;
*out = *prev;
return 5;
}
typedef struct index_bytes_consumed {
uint8_t index;
uint8_t bytes_consumed;
} index_bytes_consumed;
static SIMDCOMP_ALIGNED(0x1000) index_bytes_consumed combined_lookup[] = {
{0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6},
{160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7},
{148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8},
{56, 9}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8},
{145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8},
{119, 7}, {10, 8}, {52, 9}, {83, 7}, {160, 5}, {6, 8}, {44, 9},
{28, 9}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4},
{93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7},
{145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0},
{0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {50, 9},
{82, 6}, {160, 5}, {5, 8}, {42, 9}, {26, 9}, {128, 8}, {70, 6},
{110, 8}, {148, 5}, {165, 6}, {3, 8}, {38, 9}, {22, 9}, {122, 8},
{14, 9}, {60, 10}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8},
{139, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5},
{73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5},
{96, 8}, {78, 8}, {133, 9}, {72, 8}, {115, 9}, {148, 5}, {167, 8},
{64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4},
{163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0},
{0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7},
{49, 9}, {82, 6}, {160, 5}, {4, 7}, {41, 9}, {25, 9}, {127, 7},
{70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {37, 9}, {21, 9},
{121, 7}, {13, 9}, {58, 10}, {85, 7}, {161, 6}, {66, 6}, {97, 7},
{79, 7}, {137, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7},
{35, 9}, {19, 9}, {119, 7}, {11, 9}, {54, 10}, {83, 7}, {160, 5},
{7, 9}, {46, 10}, {30, 10}, {131, 9}, {71, 7}, {113, 9}, {148, 5},
{166, 7}, {64, 4}, {93, 7}, {75, 7}, {125, 9}, {69, 7}, {107, 9},
{89, 9}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2},
{154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6},
{67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6},
{129, 9}, {70, 6}, {111, 9}, {148, 5}, {165, 6}, {64, 4}, {92, 6},
{74, 6}, {123, 9}, {68, 6}, {105, 9}, {87, 9}, {161, 6}, {66, 6},
{99, 9}, {81, 9}, {142, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0},
{64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4},
{160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5},
{148, 5}, {168, 9}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2},
{151, 4}, {147, 4}, {164, 9}, {0, 2}, {0, 3}, {0, 3}, {0, 0},
{0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7},
{118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8},
{24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7},
{36, 8}, {20, 8}, {121, 7}, {12, 8}, {57, 10}, {85, 7}, {161, 6},
{66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6},
{0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {53, 10},
{83, 7}, {160, 5}, {6, 8}, {45, 10}, {29, 10}, {130, 8}, {71, 7},
{112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8},
{69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3},
{158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8},
{17, 8}, {118, 6}, {9, 8}, {51, 10}, {82, 6}, {160, 5}, {5, 8},
{43, 10}, {27, 10}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6},
{3, 8}, {39, 10}, {23, 10}, {122, 8}, {15, 10}, {62, 11}, {86, 8},
{161, 6}, {66, 6}, {98, 8}, {80, 8}, {140, 10}, {145, 2}, {153, 6},
{149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5},
{102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {134, 10},
{72, 8}, {116, 10}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3},
{155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3},
{146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6},
{32, 7}, {16, 7}, {118, 6}, {8, 7}, {100, 6}, {82, 6}, {160, 5},
{4, 7}, {94, 6}, {76, 6}, {127, 7}, {70, 6}, {109, 7}, {148, 5},
{165, 6}, {2, 7}, {92, 6}, {74, 6}, {121, 7}, {68, 6}, {103, 7},
{85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {138, 10}, {145, 2},
{153, 6}, {149, 6}, {0, 0}, {1, 7}, {91, 5}, {73, 5}, {119, 7},
{67, 5}, {101, 7}, {83, 7}, {160, 5}, {65, 5}, {95, 7}, {77, 7},
{132, 10}, {71, 7}, {114, 10}, {148, 5}, {166, 7}, {64, 4}, {93, 7},
{75, 7}, {126, 10}, {69, 7}, {108, 10}, {90, 10}, {162, 7}, {145, 2},
{150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0},
{0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6},
{160, 5}, {65, 5}, {94, 6}, {76, 6}, {156, 5}, {70, 6}, {152, 5},
{148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {155, 4}, {68, 6},
{151, 4}, {147, 4}, {161, 6}, {66, 6}, {150, 3}, {146, 3}, {157, 6},
{145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5},
{155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3},
{146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, {169, 10}, {0, 4},
{0, 3}, {0, 3}, {0, 4}, {0, 2}, {0, 4}, {0, 4}, {0, 0},
{0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0},
{0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8},
{82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6},
{109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7},
{12, 8}, {56, 9}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7},
{136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8},
{18, 8}, {119, 7}, {10, 8}, {52, 9}, {83, 7}, {160, 5}, {6, 8},
{44, 9}, {28, 9}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7},
{64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8},
{162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7},
{0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8},
{50, 9}, {82, 6}, {160, 5}, {5, 8}, {42, 9}, {26, 9}, {128, 8},
{70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {38, 9}, {22, 9},
{122, 8}, {14, 9}, {61, 11}, {86, 8}, {161, 6}, {66, 6}, {98, 8},
{80, 8}, {139, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4},
{91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5},
{65, 5}, {96, 8}, {78, 8}, {133, 9}, {72, 8}, {115, 9}, {148, 5},
{167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4},
{147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2},
{0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6},
{8, 7}, {49, 9}, {82, 6}, {160, 5}, {4, 7}, {41, 9}, {25, 9},
{127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {37, 9},
{21, 9}, {121, 7}, {13, 9}, {59, 11}, {85, 7}, {161, 6}, {66, 6},
{97, 7}, {79, 7}, {137, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0},
{1, 7}, {35, 9}, {19, 9}, {119, 7}, {11, 9}, {55, 11}, {83, 7},
{160, 5}, {7, 9}, {47, 11}, {31, 11}, {131, 9}, {71, 7}, {113, 9},
{148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {125, 9}, {69, 7},
{107, 9}, {89, 9}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7},
{145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5},
{118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6},
{76, 6}, {129, 9}, {70, 6}, {111, 9}, {148, 5}, {165, 6}, {64, 4},
{92, 6}, {74, 6}, {123, 9}, {68, 6}, {105, 9}, {87, 9}, {161, 6},
{66, 6}, {99, 9}, {81, 9}, {143, 11}, {145, 2}, {153, 6}, {149, 6},
{0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4},
{147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2},
{152, 5}, {148, 5}, {168, 9}, {64, 4}, {150, 3}, {146, 3}, {155, 4},
{145, 2}, {151, 4}, {147, 4}, {164, 9}, {0, 2}, {0, 3}, {0, 3},
{0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7},
{16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7},
{40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6},
{2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {103, 7}, {85, 7},
{161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6},
{149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8},
{101, 7}, {83, 7}, {160, 5}, {6, 8}, {95, 7}, {77, 7}, {130, 8},
{71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7},
{124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3},
{146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6},
{33, 8}, {17, 8}, {118, 6}, {9, 8}, {100, 6}, {82, 6}, {160, 5},
{5, 8}, {94, 6}, {76, 6}, {128, 8}, {70, 6}, {110, 8}, {148, 5},
{165, 6}, {3, 8}, {92, 6}, {74, 6}, {122, 8}, {68, 6}, {104, 8},
{86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {141, 11}, {145, 2},
{153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8},
{67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8},
{135, 11}, {72, 8}, {117, 11}, {148, 5}, {167, 8}, {64, 4}, {150, 3},
{146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2},
{150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0},
{0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {100, 6}, {82, 6},
{160, 5}, {4, 7}, {94, 6}, {76, 6}, {127, 7}, {70, 6}, {109, 7},
{148, 5}, {165, 6}, {2, 7}, {92, 6}, {74, 6}, {121, 7}, {68, 6},
{103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {157, 6},
{145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {91, 5}, {73, 5},
{119, 7}, {67, 5}, {101, 7}, {83, 7}, {160, 5}, {65, 5}, {95, 7},
{77, 7}, {156, 5}, {71, 7}, {152, 5}, {148, 5}, {166, 7}, {64, 4},
{93, 7}, {75, 7}, {155, 4}, {69, 7}, {151, 4}, {147, 4}, {162, 7},
{145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0},
{0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6},
{82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {156, 5}, {70, 6},
{152, 5}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {155, 4},
{68, 6}, {151, 4}, {147, 4}, {161, 6}, {66, 6}, {150, 3}, {146, 3},
{157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {0, 4}, {0, 5},
{0, 5}, {0, 4}, {0, 5}, {0, 4}, {0, 4}, {0, 5}, {0, 5},
{0, 3}, {0, 3}, {0, 5}, {0, 2}, {0, 5}, {0, 5}, {0, 0},
{0, 4}, {0, 3}, {0, 3}, {0, 4}, {0, 2}, {0, 4}, {0, 4},
{0, 0}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0},
{0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7},
{48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7},
{70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8},
{121, 7}, {12, 8}, {56, 9}, {85, 7}, {161, 6}, {66, 6}, {97, 7},
{79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7},
{34, 8}, {18, 8}, {119, 7}, {10, 8}, {52, 9}, {83, 7}, {160, 5},
{6, 8}, {44, 9}, {28, 9}, {130, 8}, {71, 7}, {112, 8}, {148, 5},
{166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8},
{88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2},
{154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6},
{9, 8}, {50, 9}, {82, 6}, {160, 5}, {5, 8}, {42, 9}, {26, 9},
{128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {38, 9},
{22, 9}, {122, 8}, {14, 9}, {60, 10}, {86, 8}, {161, 6}, {66, 6},
{98, 8}, {80, 8}, {139, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0},
{64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8},
{160, 5}, {65, 5}, {96, 8}, {78, 8}, {133, 9}, {72, 8}, {115, 9},
{148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2},
{151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8},
{0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7},
{118, 6}, {8, 7}, {49, 9}, {82, 6}, {160, 5}, {4, 7}, {41, 9},
{25, 9}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7},
{37, 9}, {21, 9}, {121, 7}, {13, 9}, {58, 10}, {85, 7}, {161, 6},
{66, 6}, {97, 7}, {79, 7}, {137, 9}, {145, 2}, {153, 6}, {149, 6},
{0, 0}, {1, 7}, {35, 9}, {19, 9}, {119, 7}, {11, 9}, {54, 10},
{83, 7}, {160, 5}, {7, 9}, {46, 10}, {30, 10}, {131, 9}, {71, 7},
{113, 9}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {125, 9},
{69, 7}, {107, 9}, {89, 9}, {162, 7}, {145, 2}, {150, 3}, {146, 3},
{158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5},
{73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5},
{94, 6}, {76, 6}, {129, 9}, {70, 6}, {111, 9}, {148, 5}, {165, 6},
{64, 4}, {92, 6}, {74, 6}, {123, 9}, {68, 6}, {105, 9}, {87, 9},
{161, 6}, {66, 6}, {99, 9}, {81, 9}, {142, 10}, {145, 2}, {153, 6},
{149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5},
{151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5},
{145, 2}, {152, 5}, {148, 5}, {168, 9}, {64, 4}, {150, 3}, {146, 3},
{155, 4}, {145, 2}, {151, 4}, {147, 4}, {164, 9}, {0, 2}, {0, 3},
{0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6},
{32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5},
{4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5},
{165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {57, 10},
{85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2},
{153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7},
{10, 8}, {53, 10}, {83, 7}, {160, 5}, {6, 8}, {45, 10}, {29, 10},
{130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7},
{75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2},
{150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0},
{0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {51, 10}, {82, 6},
{160, 5}, {5, 8}, {43, 10}, {27, 10}, {128, 8}, {70, 6}, {110, 8},
{148, 5}, {165, 6}, {3, 8}, {39, 10}, {23, 10}, {122, 8}, {15, 10},
{63, 12}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {140, 10},
{145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5},
{120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8},
{78, 8}, {134, 10}, {72, 8}, {116, 10}, {148, 5}, {167, 8}, {64, 4},
{150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8},
{145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0},
{0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {100, 6},
{82, 6}, {160, 5}, {4, 7}, {94, 6}, {76, 6}, {127, 7}, {70, 6},
{109, 7}, {148, 5}, {165, 6}, {2, 7}, {92, 6}, {74, 6}, {121, 7},
{68, 6}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7},
{138, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {91, 5},
{73, 5}, {119, 7}, {67, 5}, {101, 7}, {83, 7}, {160, 5}, {65, 5},
{95, 7}, {77, 7}, {132, 10}, {71, 7}, {114, 10}, {148, 5}, {166, 7},
{64, 4}, {93, 7}, {75, 7}, {126, 10}, {69, 7}, {108, 10}, {90, 10},
{162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7},
{0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5},
{100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {156, 5},
{70, 6}, {152, 5}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6},
{155, 4}, {68, 6}, {151, 4}, {147, 4}, {161, 6}, {66, 6}, {150, 3},
{146, 3}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4},
{91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5},
{65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5},
{169, 10}, {0, 4}, {0, 3}, {0, 3}, {0, 4}, {0, 2}, {0, 4},
{0, 4}, {0, 0}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2},
{0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6},
{8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8},
{127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8},
{20, 8}, {121, 7}, {12, 8}, {56, 9}, {85, 7}, {161, 6}, {66, 6},
{97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0},
{1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {52, 9}, {83, 7},
{160, 5}, {6, 8}, {44, 9}, {28, 9}, {130, 8}, {71, 7}, {112, 8},
{148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7},
{106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7},
{145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8},
{118, 6}, {9, 8}, {50, 9}, {82, 6}, {160, 5}, {5, 8}, {42, 9},
{26, 9}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8},
{38, 9}, {22, 9}, {122, 8}, {14, 9}, {104, 8}, {86, 8}, {161, 6},
{66, 6}, {98, 8}, {80, 8}, {139, 9}, {145, 2}, {153, 6}, {149, 6},
{0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8},
{84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {133, 9}, {72, 8},
{115, 9}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4},
{145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3},
{159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7},
{16, 7}, {118, 6}, {8, 7}, {49, 9}, {82, 6}, {160, 5}, {4, 7},
{41, 9}, {25, 9}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6},
{2, 7}, {37, 9}, {21, 9}, {121, 7}, {13, 9}, {103, 7}, {85, 7},
{161, 6}, {66, 6}, {97, 7}, {79, 7}, {137, 9}, {145, 2}, {153, 6},
{149, 6}, {0, 0}, {1, 7}, {35, 9}, {19, 9}, {119, 7}, {11, 9},
{101, 7}, {83, 7}, {160, 5}, {7, 9}, {95, 7}, {77, 7}, {131, 9},
{71, 7}, {113, 9}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7},
{125, 9}, {69, 7}, {107, 9}, {89, 9}, {162, 7}, {145, 2}, {150, 3},
{146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6},
{91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5},
{65, 5}, {94, 6}, {76, 6}, {129, 9}, {70, 6}, {111, 9}, {148, 5},
{165, 6}, {64, 4}, {92, 6}, {74, 6}, {123, 9}, {68, 6}, {105, 9},
{87, 9}, {161, 6}, {66, 6}, {99, 9}, {81, 9}, {144, 12}, {145, 2},
{153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4},
{67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3},
{156, 5}, {145, 2}, {152, 5}, {148, 5}, {168, 9}, {64, 4}, {150, 3},
{146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {164, 9}, {0, 2},
{0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0},
{0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6},
{160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7},
{148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8},
{103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8},
{145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8},
{119, 7}, {10, 8}, {101, 7}, {83, 7}, {160, 5}, {6, 8}, {95, 7},
{77, 7}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4},
{93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7},
{145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0},
{0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {100, 6},
{82, 6}, {160, 5}, {5, 8}, {94, 6}, {76, 6}, {128, 8}, {70, 6},
{110, 8}, {148, 5}, {165, 6}, {3, 8}, {92, 6}, {74, 6}, {122, 8},
{68, 6}, {104, 8}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8},
{157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5},
{73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5},
{96, 8}, {78, 8}, {156, 5}, {72, 8}, {152, 5}, {148, 5}, {167, 8},
{64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4},
{163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0},
{0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7},
{100, 6}, {82, 6}, {160, 5}, {4, 7}, {94, 6}, {76, 6}, {127, 7},
{70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {92, 6}, {74, 6},
{121, 7}, {68, 6}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7},
{79, 7}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7},
{91, 5}, {73, 5}, {119, 7}, {67, 5}, {101, 7}, {83, 7}, {160, 5},
{65, 5}, {95, 7}, {77, 7}, {156, 5}, {71, 7}, {152, 5}, {148, 5},
{166, 7}, {64, 4}, {93, 7}, {75, 7}, {155, 4}, {69, 7}, {151, 4},
{147, 4}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2},
{154, 7}, {0, 0}, {0, 0}, {0, 6}, {0, 5}, {0, 5}, {0, 6},
{0, 5}, {0, 6}, {0, 6}, {0, 5}, {0, 5}, {0, 6}, {0, 6},
{0, 5}, {0, 6}, {0, 5}, {0, 5}, {0, 6}, {0, 4}, {0, 6},
{0, 6}, {0, 4}, {0, 6}, {0, 4}, {0, 4}, {0, 6}, {0, 6},
{0, 3}, {0, 3}, {0, 6}, {0, 2}, {0, 6}, {0, 6}, {0, 0},
{0, 4}, {0, 5}, {0, 5}, {0, 4}, {0, 5}, {0, 4}, {0, 4},
{0, 5}, {0, 5}, {0, 3}, {0, 3}, {0, 5}, {0, 2}, {0, 5},
{0, 5}, {0, 0}, {0, 4}, {0, 3}, {0, 3}, {0, 4}, {0, 2},
{0, 4}, {0, 4}, {0, 0}, {0, 2}, {0, 3}, {0, 3}, {0, 0},
{0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7},
{118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8},
{24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7},
{36, 8}, {20, 8}, {121, 7}, {12, 8}, {56, 9}, {85, 7}, {161, 6},
{66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6},
{0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {52, 9},
{83, 7}, {160, 5}, {6, 8}, {44, 9}, {28, 9}, {130, 8}, {71, 7},
{112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8},
{69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3},
{158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8},
{17, 8}, {118, 6}, {9, 8}, {50, 9}, {82, 6}, {160, 5}, {5, 8},
{42, 9}, {26, 9}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6},
{3, 8}, {38, 9}, {22, 9}, {122, 8}, {14, 9}, {60, 10}, {86, 8},
{161, 6}, {66, 6}, {98, 8}, {80, 8}, {139, 9}, {145, 2}, {153, 6},
{149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5},
{102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {133, 9},
{72, 8}, {115, 9}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3},
{155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3},
{146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6},
{32, 7}, {16, 7}, {118, 6}, {8, 7}, {49, 9}, {82, 6}, {160, 5},
{4, 7}, {41, 9}, {25, 9}, {127, 7}, {70, 6}, {109, 7}, {148, 5},
{165, 6}, {2, 7}, {37, 9}, {21, 9}, {121, 7}, {13, 9}, {58, 10},
{85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {137, 9}, {145, 2},
{153, 6}, {149, 6}, {0, 0}, {1, 7}, {35, 9}, {19, 9}, {119, 7},
{11, 9}, {54, 10}, {83, 7}, {160, 5}, {7, 9}, {46, 10}, {30, 10},
{131, 9}, {71, 7}, {113, 9}, {148, 5}, {166, 7}, {64, 4}, {93, 7},
{75, 7}, {125, 9}, {69, 7}, {107, 9}, {89, 9}, {162, 7}, {145, 2},
{150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0},
{0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6},
{160, 5}, {65, 5}, {94, 6}, {76, 6}, {129, 9}, {70, 6}, {111, 9},
{148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {123, 9}, {68, 6},
{105, 9}, {87, 9}, {161, 6}, {66, 6}, {99, 9}, {81, 9}, {142, 10},
{145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5},
{155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3},
{146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, {168, 9}, {64, 4},
{150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {164, 9},
{0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0},
{0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8},
{82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6},
{109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7},
{12, 8}, {57, 10}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7},
{136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8},
{18, 8}, {119, 7}, {10, 8}, {53, 10}, {83, 7}, {160, 5}, {6, 8},
{45, 10}, {29, 10}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7},
{64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8},
{162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7},
{0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8},
{51, 10}, {82, 6}, {160, 5}, {5, 8}, {43, 10}, {27, 10}, {128, 8},
{70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {39, 10}, {23, 10},
{122, 8}, {15, 10}, {62, 11}, {86, 8}, {161, 6}, {66, 6}, {98, 8},
{80, 8}, {140, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4},
{91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5},
{65, 5}, {96, 8}, {78, 8}, {134, 10}, {72, 8}, {116, 10}, {148, 5},
{167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4},
{147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2},
{0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6},
{8, 7}, {100, 6}, {82, 6}, {160, 5}, {4, 7}, {94, 6}, {76, 6},
{127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {92, 6},
{74, 6}, {121, 7}, {68, 6}, {103, 7}, {85, 7}, {161, 6}, {66, 6},
{97, 7}, {79, 7}, {138, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0},
{1, 7}, {91, 5}, {73, 5}, {119, 7}, {67, 5}, {101, 7}, {83, 7},
{160, 5}, {65, 5}, {95, 7}, {77, 7}, {132, 10}, {71, 7}, {114, 10},
{148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {126, 10}, {69, 7},
{108, 10}, {90, 10}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7},
{145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5},
{118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6},
{76, 6}, {156, 5}, {70, 6}, {152, 5}, {148, 5}, {165, 6}, {64, 4},
{92, 6}, {74, 6}, {155, 4}, {68, 6}, {151, 4}, {147, 4}, {161, 6},
{66, 6}, {150, 3}, {146, 3}, {157, 6}, {145, 2}, {153, 6}, {149, 6},
{0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4},
{147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2},
{152, 5}, {148, 5}, {169, 10}, {0, 4}, {0, 3}, {0, 3}, {0, 4},
{0, 2}, {0, 4}, {0, 4}, {0, 0}, {0, 2}, {0, 3}, {0, 3},
{0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7},
{16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7},
{40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6},
{2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {56, 9}, {85, 7},
{161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6},
{149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8},
{52, 9}, {83, 7}, {160, 5}, {6, 8}, {44, 9}, {28, 9}, {130, 8},
{71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7},
{124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3},
{146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6},
{33, 8}, {17, 8}, {118, 6}, {9, 8}, {50, 9}, {82, 6}, {160, 5},
{5, 8}, {42, 9}, {26, 9}, {128, 8}, {70, 6}, {110, 8}, {148, 5},
{165, 6}, {3, 8}, {38, 9}, {22, 9}, {122, 8}, {14, 9}, {61, 11},
{86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {139, 9}, {145, 2},
{153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8},
{67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8},
{133, 9}, {72, 8}, {115, 9}, {148, 5}, {167, 8}, {64, 4}, {150, 3},
{146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2},
{150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0},
{0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {49, 9}, {82, 6},
{160, 5}, {4, 7}, {41, 9}, {25, 9}, {127, 7}, {70, 6}, {109, 7},
{148, 5}, {165, 6}, {2, 7}, {37, 9}, {21, 9}, {121, 7}, {13, 9},
{59, 11}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {137, 9},
{145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {35, 9}, {19, 9},
{119, 7}, {11, 9}, {55, 11}, {83, 7}, {160, 5}, {7, 9}, {47, 11},
{31, 11}, {131, 9}, {71, 7}, {113, 9}, {148, 5}, {166, 7}, {64, 4},
{93, 7}, {75, 7}, {125, 9}, {69, 7}, {107, 9}, {89, 9}, {162, 7},
{145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0},
{0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6},
{82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {129, 9}, {70, 6},
{111, 9}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {123, 9},
{68, 6}, {105, 9}, {87, 9}, {161, 6}, {66, 6}, {99, 9}, {81, 9},
{143, 11}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5},
{73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5},
{150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, {168, 9},
{64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4},
{164, 9}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0},
{0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7},
{48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7},
{70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8},
{121, 7}, {12, 8}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7},
{79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7},
{34, 8}, {18, 8}, {119, 7}, {10, 8}, {101, 7}, {83, 7}, {160, 5},
{6, 8}, {95, 7}, {77, 7}, {130, 8}, {71, 7}, {112, 8}, {148, 5},
{166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8},
{88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2},
{154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6},
{9, 8}, {100, 6}, {82, 6}, {160, 5}, {5, 8}, {94, 6}, {76, 6},
{128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {92, 6},
{74, 6}, {122, 8}, {68, 6}, {104, 8}, {86, 8}, {161, 6}, {66, 6},
{98, 8}, {80, 8}, {141, 11}, {145, 2}, {153, 6}, {149, 6}, {0, 0},
{64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8},
{160, 5}, {65, 5}, {96, 8}, {78, 8}, {135, 11}, {72, 8}, {117, 11},
{148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2},
{151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8},
{0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7},
{118, 6}, {8, 7}, {100, 6}, {82, 6}, {160, 5}, {4, 7}, {94, 6},
{76, 6}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7},
{92, 6}, {74, 6}, {121, 7}, {68, 6}, {103, 7}, {85, 7}, {161, 6},
{66, 6}, {97, 7}, {79, 7}, {157, 6}, {145, 2}, {153, 6}, {149, 6},
{0, 0}, {1, 7}, {91, 5}, {73, 5}, {119, 7}, {67, 5}, {101, 7},
{83, 7}, {160, 5}, {65, 5}, {95, 7}, {77, 7}, {156, 5}, {71, 7},
{152, 5}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {155, 4},
{69, 7}, {151, 4}, {147, 4}, {162, 7}, {145, 2}, {150, 3}, {146, 3},
{158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5},
{73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5},
{94, 6}, {76, 6}, {156, 5}, {70, 6}, {152, 5}, {148, 5}, {165, 6},
{64, 4}, {92, 6}, {74, 6}, {155, 4}, {68, 6}, {151, 4}, {147, 4},
{161, 6}, {66, 6}, {150, 3}, {146, 3}, {157, 6}, {145, 2}, {153, 6},
{149, 6}, {0, 0}, {0, 4}, {0, 5}, {0, 5}, {0, 4}, {0, 5},
{0, 4}, {0, 4}, {0, 5}, {0, 5}, {0, 3}, {0, 3}, {0, 5},
{0, 2}, {0, 5}, {0, 5}, {0, 0}, {0, 4}, {0, 3}, {0, 3},
{0, 4}, {0, 2}, {0, 4}, {0, 4}, {0, 0}, {0, 2}, {0, 3},
{0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6},
{32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5},
{4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5},
{165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {56, 9},
{85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2},
{153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7},
{10, 8}, {52, 9}, {83, 7}, {160, 5}, {6, 8}, {44, 9}, {28, 9},
{130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7},
{75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2},
{150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0},
{0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {50, 9}, {82, 6},
{160, 5}, {5, 8}, {42, 9}, {26, 9}, {128, 8}, {70, 6}, {110, 8},
{148, 5}, {165, 6}, {3, 8}, {38, 9}, {22, 9}, {122, 8}, {14, 9},
{60, 10}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {139, 9},
{145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5},
{120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8},
{78, 8}, {133, 9}, {72, 8}, {115, 9}, {148, 5}, {167, 8}, {64, 4},
{150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8},
{145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0},
{0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {49, 9},
{82, 6}, {160, 5}, {4, 7}, {41, 9}, {25, 9}, {127, 7}, {70, 6},
{109, 7}, {148, 5}, {165, 6}, {2, 7}, {37, 9}, {21, 9}, {121, 7},
{13, 9}, {58, 10}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7},
{137, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {35, 9},
{19, 9}, {119, 7}, {11, 9}, {54, 10}, {83, 7}, {160, 5}, {7, 9},
{46, 10}, {30, 10}, {131, 9}, {71, 7}, {113, 9}, {148, 5}, {166, 7},
{64, 4}, {93, 7}, {75, 7}, {125, 9}, {69, 7}, {107, 9}, {89, 9},
{162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7},
{0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5},
{100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {129, 9},
{70, 6}, {111, 9}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6},
{123, 9}, {68, 6}, {105, 9}, {87, 9}, {161, 6}, {66, 6}, {99, 9},
{81, 9}, {142, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4},
{91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5},
{65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5},
{168, 9}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4},
{147, 4}, {164, 9}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2},
{0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6},
{8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8},
{127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8},
{20, 8}, {121, 7}, {12, 8}, {57, 10}, {85, 7}, {161, 6}, {66, 6},
{97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0},
{1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {53, 10}, {83, 7},
{160, 5}, {6, 8}, {45, 10}, {29, 10}, {130, 8}, {71, 7}, {112, 8},
{148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7},
{106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7},
{145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8},
{118, 6}, {9, 8}, {51, 10}, {82, 6}, {160, 5}, {5, 8}, {43, 10},
{27, 10}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8},
{39, 10}, {23, 10}, {122, 8}, {15, 10}, {104, 8}, {86, 8}, {161, 6},
{66, 6}, {98, 8}, {80, 8}, {140, 10}, {145, 2}, {153, 6}, {149, 6},
{0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8},
{84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {134, 10}, {72, 8},
{116, 10}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4},
{145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3},
{159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7},
{16, 7}, {118, 6}, {8, 7}, {100, 6}, {82, 6}, {160, 5}, {4, 7},
{94, 6}, {76, 6}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6},
{2, 7}, {92, 6}, {74, 6}, {121, 7}, {68, 6}, {103, 7}, {85, 7},
{161, 6}, {66, 6}, {97, 7}, {79, 7}, {138, 10}, {145, 2}, {153, 6},
{149, 6}, {0, 0}, {1, 7}, {91, 5}, {73, 5}, {119, 7}, {67, 5},
{101, 7}, {83, 7}, {160, 5}, {65, 5}, {95, 7}, {77, 7}, {132, 10},
{71, 7}, {114, 10}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7},
{126, 10}, {69, 7}, {108, 10}, {90, 10}, {162, 7}, {145, 2}, {150, 3},
{146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6},
{91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5},
{65, 5}, {94, 6}, {76, 6}, {156, 5}, {70, 6}, {152, 5}, {148, 5},
{165, 6}, {64, 4}, {92, 6}, {74, 6}, {155, 4}, {68, 6}, {151, 4},
{147, 4}, {161, 6}, {66, 6}, {150, 3}, {146, 3}, {157, 6}, {145, 2},
{153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4},
{67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3},
{156, 5}, {145, 2}, {152, 5}, {148, 5}, {169, 10}, {0, 4}, {0, 3},
{0, 3}, {0, 4}, {0, 2}, {0, 4}, {0, 4}, {0, 0}, {0, 2},
{0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0},
{0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6},
{160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7},
{148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8},
{56, 9}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8},
{145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8},
{119, 7}, {10, 8}, {52, 9}, {83, 7}, {160, 5}, {6, 8}, {44, 9},
{28, 9}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4},
{93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7},
{145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0},
{0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {50, 9},
{82, 6}, {160, 5}, {5, 8}, {42, 9}, {26, 9}, {128, 8}, {70, 6},
{110, 8}, {148, 5}, {165, 6}, {3, 8}, {38, 9}, {22, 9}, {122, 8},
{14, 9}, {104, 8}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8},
{139, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5},
{73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5},
{96, 8}, {78, 8}, {133, 9}, {72, 8}, {115, 9}, {148, 5}, {167, 8},
{64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4},
{163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0},
{0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7},
{49, 9}, {82, 6}, {160, 5}, {4, 7}, {41, 9}, {25, 9}, {127, 7},
{70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {37, 9}, {21, 9},
{121, 7}, {13, 9}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7},
{79, 7}, {137, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7},
{35, 9}, {19, 9}, {119, 7}, {11, 9}, {101, 7}, {83, 7}, {160, 5},
{7, 9}, {95, 7}, {77, 7}, {131, 9}, {71, 7}, {113, 9}, {148, 5},
{166, 7}, {64, 4}, {93, 7}, {75, 7}, {125, 9}, {69, 7}, {107, 9},
{89, 9}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2},
{154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6},
{67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6},
{129, 9}, {70, 6}, {111, 9}, {148, 5}, {165, 6}, {64, 4}, {92, 6},
{74, 6}, {123, 9}, {68, 6}, {105, 9}, {87, 9}, {161, 6}, {66, 6},
{99, 9}, {81, 9}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0},
{64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4},
{160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5},
{148, 5}, {168, 9}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2},
{151, 4}, {147, 4}, {164, 9}, {0, 2}, {0, 3}, {0, 3}, {0, 0},
{0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7},
{118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8},
{24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7},
{36, 8}, {20, 8}, {121, 7}, {12, 8}, {103, 7}, {85, 7}, {161, 6},
{66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6},
{0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {101, 7},
{83, 7}, {160, 5}, {6, 8}, {95, 7}, {77, 7}, {130, 8}, {71, 7},
{112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8},
{69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3},
{158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8},
{17, 8}, {118, 6}, {9, 8}, {100, 6}, {82, 6}, {160, 5}, {5, 8},
{94, 6}, {76, 6}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6},
{3, 8}, {92, 6}, {74, 6}, {122, 8}, {68, 6}, {104, 8}, {86, 8},
{161, 6}, {66, 6}, {98, 8}, {80, 8}, {157, 6}, {145, 2}, {153, 6},
{149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5},
{102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {156, 5},
{72, 8}, {152, 5}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3},
{155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3},
{146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6},
{0, 7}, {0, 7}, {0, 6}, {0, 7}, {0, 6}, {0, 6}, {0, 5},
{0, 7}, {0, 6}, {0, 6}, {0, 7}, {0, 6}, {0, 7}, {0, 5},
{0, 6}, {0, 7}, {0, 6}, {0, 6}, {0, 7}, {0, 6}, {0, 7},
{0, 7}, {0, 6}, {0, 6}, {0, 7}, {0, 7}, {0, 6}, {0, 2},
{0, 6}, {0, 6}, {0, 0}, {0, 7}, {0, 5}, {0, 5}, {0, 7},
{0, 5}, {0, 7}, {0, 7}, {0, 5}, {0, 5}, {0, 7}, {0, 7},
{0, 5}, {0, 7}, {0, 5}, {0, 5}, {0, 7}, {0, 4}, {0, 7},
{0, 7}, {0, 4}, {0, 7}, {0, 4}, {0, 4}, {0, 7}, {0, 2},
{0, 3}, {0, 3}, {0, 7}, {0, 2}, {0, 7}, {0, 0}, {0, 0},
{0, 6}, {0, 5}, {0, 5}, {0, 6}, {0, 5}, {0, 6}, {0, 6},
{0, 5}, {0, 5}, {0, 6}, {0, 6}, {0, 5}, {0, 6}, {0, 5},
{0, 5}, {0, 6}, {0, 4}, {0, 6}, {0, 6}, {0, 4}, {0, 6},
{0, 4}, {0, 4}, {0, 6}, {0, 6}, {0, 3}, {0, 3}, {0, 6},
{0, 2}, {0, 6}, {0, 6}, {0, 0}, {0, 4}, {0, 5}, {0, 5},
{0, 4}, {0, 5}, {0, 4}, {0, 4}, {0, 5}, {0, 5}, {0, 3},
{0, 3}, {0, 5}, {0, 2}, {0, 5}, {0, 5}, {0, 0}, {0, 4},
{0, 3}, {0, 3}, {0, 4}, {0, 2}, {0, 4}, {0, 4}, {0, 0},
{0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0},
{0, 0}};
static SIMDCOMP_ALIGNED(0x1000) const int8_t vectorsrawbytes[] = {
0, -1, 4, -1, 1, -1, 5, -1, 2, -1, -1, -1, 3, -1, -1, -1, // 0
0, -1, 4, -1, 1, -1, 5, 6, 2, -1, -1, -1, 3, -1, -1, -1, // 1
0, -1, 4, 5, 1, -1, 6, -1, 2, -1, -1, -1, 3, -1, -1, -1, // 2
0, -1, 4, 5, 1, -1, 6, 7, 2, -1, -1, -1, 3, -1, -1, -1, // 3
0, -1, 5, -1, 1, -1, 6, -1, 2, -1, -1, -1, 3, 4, -1, -1, // 4
0, -1, 5, -1, 1, -1, 6, 7, 2, -1, -1, -1, 3, 4, -1, -1, // 5
0, -1, 5, 6, 1, -1, 7, -1, 2, -1, -1, -1, 3, 4, -1, -1, // 6
0, -1, 5, 6, 1, -1, 7, 8, 2, -1, -1, -1, 3, 4, -1, -1, // 7
0, -1, 5, -1, 1, -1, 6, -1, 2, 3, -1, -1, 4, -1, -1, -1, // 8
0, -1, 5, -1, 1, -1, 6, 7, 2, 3, -1, -1, 4, -1, -1, -1, // 9
0, -1, 5, 6, 1, -1, 7, -1, 2, 3, -1, -1, 4, -1, -1, -1, // 10
0, -1, 5, 6, 1, -1, 7, 8, 2, 3, -1, -1, 4, -1, -1, -1, // 11
0, -1, 6, -1, 1, -1, 7, -1, 2, 3, -1, -1, 4, 5, -1, -1, // 12
0, -1, 6, -1, 1, -1, 7, 8, 2, 3, -1, -1, 4, 5, -1, -1, // 13
0, -1, 6, 7, 1, -1, 8, -1, 2, 3, -1, -1, 4, 5, -1, -1, // 14
0, -1, 6, 7, 1, -1, 8, 9, 2, 3, -1, -1, 4, 5, -1, -1, // 15
0, -1, 5, -1, 1, 2, 6, -1, 3, -1, -1, -1, 4, -1, -1, -1, // 16
0, -1, 5, -1, 1, 2, 6, 7, 3, -1, -1, -1, 4, -1, -1, -1, // 17
0, -1, 5, 6, 1, 2, 7, -1, 3, -1, -1, -1, 4, -1, -1, -1, // 18
0, -1, 5, 6, 1, 2, 7, 8, 3, -1, -1, -1, 4, -1, -1, -1, // 19
0, -1, 6, -1, 1, 2, 7, -1, 3, -1, -1, -1, 4, 5, -1, -1, // 20
0, -1, 6, -1, 1, 2, 7, 8, 3, -1, -1, -1, 4, 5, -1, -1, // 21
0, -1, 6, 7, 1, 2, 8, -1, 3, -1, -1, -1, 4, 5, -1, -1, // 22
0, -1, 6, 7, 1, 2, 8, 9, 3, -1, -1, -1, 4, 5, -1, -1, // 23
0, -1, 6, -1, 1, 2, 7, -1, 3, 4, -1, -1, 5, -1, -1, -1, // 24
0, -1, 6, -1, 1, 2, 7, 8, 3, 4, -1, -1, 5, -1, -1, -1, // 25
0, -1, 6, 7, 1, 2, 8, -1, 3, 4, -1, -1, 5, -1, -1, -1, // 26
0, -1, 6, 7, 1, 2, 8, 9, 3, 4, -1, -1, 5, -1, -1, -1, // 27
0, -1, 7, -1, 1, 2, 8, -1, 3, 4, -1, -1, 5, 6, -1, -1, // 28
0, -1, 7, -1, 1, 2, 8, 9, 3, 4, -1, -1, 5, 6, -1, -1, // 29
0, -1, 7, 8, 1, 2, 9, -1, 3, 4, -1, -1, 5, 6, -1, -1, // 30
0, -1, 7, 8, 1, 2, 9, 10, 3, 4, -1, -1, 5, 6, -1, -1, // 31
0, 1, 5, -1, 2, -1, 6, -1, 3, -1, -1, -1, 4, -1, -1, -1, // 32
0, 1, 5, -1, 2, -1, 6, 7, 3, -1, -1, -1, 4, -1, -1, -1, // 33
0, 1, 5, 6, 2, -1, 7, -1, 3, -1, -1, -1, 4, -1, -1, -1, // 34
0, 1, 5, 6, 2, -1, 7, 8, 3, -1, -1, -1, 4, -1, -1, -1, // 35
0, 1, 6, -1, 2, -1, 7, -1, 3, -1, -1, -1, 4, 5, -1, -1, // 36
0, 1, 6, -1, 2, -1, 7, 8, 3, -1, -1, -1, 4, 5, -1, -1, // 37
0, 1, 6, 7, 2, -1, 8, -1, 3, -1, -1, -1, 4, 5, -1, -1, // 38
0, 1, 6, 7, 2, -1, 8, 9, 3, -1, -1, -1, 4, 5, -1, -1, // 39
0, 1, 6, -1, 2, -1, 7, -1, 3, 4, -1, -1, 5, -1, -1, -1, // 40
0, 1, 6, -1, 2, -1, 7, 8, 3, 4, -1, -1, 5, -1, -1, -1, // 41
0, 1, 6, 7, 2, -1, 8, -1, 3, 4, -1, -1, 5, -1, -1, -1, // 42
0, 1, 6, 7, 2, -1, 8, 9, 3, 4, -1, -1, 5, -1, -1, -1, // 43
0, 1, 7, -1, 2, -1, 8, -1, 3, 4, -1, -1, 5, 6, -1, -1, // 44
0, 1, 7, -1, 2, -1, 8, 9, 3, 4, -1, -1, 5, 6, -1, -1, // 45
0, 1, 7, 8, 2, -1, 9, -1, 3, 4, -1, -1, 5, 6, -1, -1, // 46
0, 1, 7, 8, 2, -1, 9, 10, 3, 4, -1, -1, 5, 6, -1, -1, // 47
0, 1, 6, -1, 2, 3, 7, -1, 4, -1, -1, -1, 5, -1, -1, -1, // 48
0, 1, 6, -1, 2, 3, 7, 8, 4, -1, -1, -1, 5, -1, -1, -1, // 49
0, 1, 6, 7, 2, 3, 8, -1, 4, -1, -1, -1, 5, -1, -1, -1, // 50
0, 1, 6, 7, 2, 3, 8, 9, 4, -1, -1, -1, 5, -1, -1, -1, // 51
0, 1, 7, -1, 2, 3, 8, -1, 4, -1, -1, -1, 5, 6, -1, -1, // 52
0, 1, 7, -1, 2, 3, 8, 9, 4, -1, -1, -1, 5, 6, -1, -1, // 53
0, 1, 7, 8, 2, 3, 9, -1, 4, -1, -1, -1, 5, 6, -1, -1, // 54
0, 1, 7, 8, 2, 3, 9, 10, 4, -1, -1, -1, 5, 6, -1, -1, // 55
0, 1, 7, -1, 2, 3, 8, -1, 4, 5, -1, -1, 6, -1, -1, -1, // 56
0, 1, 7, -1, 2, 3, 8, 9, 4, 5, -1, -1, 6, -1, -1, -1, // 57
0, 1, 7, 8, 2, 3, 9, -1, 4, 5, -1, -1, 6, -1, -1, -1, // 58
0, 1, 7, 8, 2, 3, 9, 10, 4, 5, -1, -1, 6, -1, -1, -1, // 59
0, 1, 8, -1, 2, 3, 9, -1, 4, 5, -1, -1, 6, 7, -1, -1, // 60
0, 1, 8, -1, 2, 3, 9, 10, 4, 5, -1, -1, 6, 7, -1, -1, // 61
0, 1, 8, 9, 2, 3, 10, -1, 4, 5, -1, -1, 6, 7, -1, -1, // 62
0, 1, 8, 9, 2, 3, 10, 11, 4, 5, -1, -1, 6, 7, -1, -1, // 63
0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, // 64
0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, // 65
0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, // 66
0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, // 67
0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, // 68
0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, // 69
0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, // 70
0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, // 71
0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, // 72
0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1, // 73
0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1, // 74
0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1, // 75
0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1, // 76
0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1, // 77
0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1, // 78
0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1, // 79
0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1, // 80
0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1, // 81
0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, -1, -1, -1, // 82
0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, -1, -1, // 83
0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, 7, -1, // 84
0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, -1, -1, -1, // 85
0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, -1, -1, // 86
0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, 8, -1, // 87
0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, -1, -1, -1, // 88
0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, -1, -1, // 89
0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, 9, -1, // 90
0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1, // 91
0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1, // 92
0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1, // 93
0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1, // 94
0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1, // 95
0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1, // 96
0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1, // 97
0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1, // 98
0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1, // 99
0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1, // 100
0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1, // 101
0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1, // 102
0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1, // 103
0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1, // 104
0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1, // 105
0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1, // 106
0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1, // 107
0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1, // 108
0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, -1, -1, -1, // 109
0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, -1, -1, // 110
0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, 8, -1, // 111
0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, -1, -1, -1, // 112
0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, -1, -1, // 113
0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, 9, -1, // 114
0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, -1, -1, -1, // 115
0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, -1, -1, // 116
0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, 10, -1, // 117
0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1, // 118
0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1, // 119
0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1, // 120
0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1, // 121
0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1, // 122
0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1, // 123
0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1, // 124
0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1, // 125
0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1, // 126
0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1, // 127
0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, -1, -1, // 128
0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, 8, -1, // 129
0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, -1, -1, -1, // 130
0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, -1, -1, // 131
0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, 9, -1, // 132
0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, -1, -1, -1, // 133
0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, -1, -1, // 134
0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, 10, -1, // 135
0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, -1, -1, -1, // 136
0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, -1, -1, // 137
0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, 9, -1, // 138
0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, -1, -1, -1, // 139
0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, -1, -1, // 140
0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, 10, -1, // 141
0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, -1, -1, -1, // 142
0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, -1, -1, // 143
0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, -1, // 144
-1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, 1, // 145
-1, -1, -1, -1, -1, -1, -1, 0, 2, -1, -1, -1, -1, -1, -1, 1, // 146
-1, -1, -1, -1, -1, -1, -1, 0, 2, -1, 3, -1, -1, -1, -1, 1, // 147
-1, -1, -1, -1, -1, -1, -1, 0, 2, -1, 3, -1, 4, -1, -1, 1, // 148
-1, -1, -1, -1, -1, -1, -1, 0, 2, -1, 3, -1, 4, -1, 5, 1, // 149
1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, 2, // 150
1, -1, -1, -1, -1, -1, -1, 0, 3, -1, -1, -1, -1, -1, -1, 2, // 151
1, -1, -1, -1, -1, -1, -1, 0, 3, -1, 4, -1, -1, -1, -1, 2, // 152
1, -1, -1, -1, -1, -1, -1, 0, 3, -1, 4, -1, 5, -1, -1, 2, // 153
1, -1, -1, -1, -1, -1, -1, 0, 3, -1, 4, -1, 5, -1, 6, 2, // 154
1, -1, 2, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, 3, // 155
1, -1, 2, -1, -1, -1, -1, 0, 4, -1, -1, -1, -1, -1, -1, 3, // 156
1, -1, 2, -1, -1, -1, -1, 0, 4, -1, 5, -1, -1, -1, -1, 3, // 157
1, -1, 2, -1, -1, -1, -1, 0, 4, -1, 5, -1, 6, -1, -1, 3, // 158
1, -1, 2, -1, -1, -1, -1, 0, 4, -1, 5, -1, 6, -1, 7, 3, // 159
1, -1, 2, -1, 3, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, 4, // 160
1, -1, 2, -1, 3, -1, -1, 0, 5, -1, -1, -1, -1, -1, -1, 4, // 161
1, -1, 2, -1, 3, -1, -1, 0, 5, -1, 6, -1, -1, -1, -1, 4, // 162
1, -1, 2, -1, 3, -1, -1, 0, 5, -1, 6, -1, 7, -1, -1, 4, // 163
1, -1, 2, -1, 3, -1, -1, 0, 5, -1, 6, -1, 7, -1, 8, 4, // 164
1, -1, 2, -1, 3, -1, 4, 0, -1, -1, -1, -1, -1, -1, -1, 5, // 165
1, -1, 2, -1, 3, -1, 4, 0, 6, -1, -1, -1, -1, -1, -1, 5, // 166
1, -1, 2, -1, 3, -1, 4, 0, 6, -1, 7, -1, -1, -1, -1, 5, // 167
1, -1, 2, -1, 3, -1, 4, 0, 6, -1, 7, -1, 8, -1, -1, 5, // 168
1, -1, 2, -1, 3, -1, 4, 0, 6, -1, 7, -1, 8, -1, 9, 5, // 169
};
static const __m128i *vectors = (const __m128i *)vectorsrawbytes;
static uint8_t masked_vbyte_read_group(const uint8_t *in, uint32_t *out,
uint64_t mask, uint64_t *ints_read) {
__m128i initial = _mm_lddqu_si128((const __m128i *)(in));
__m128i *mout = (__m128i *)out;
if (!(mask & 0xFFFF)) {
__m128i result = _mm_cvtepi8_epi32(initial);
_mm_storeu_si128(mout, result);
initial = _mm_srli_si128(initial, 4);
result = _mm_cvtepi8_epi32(initial);
_mm_storeu_si128(mout + 1, result);
initial = _mm_srli_si128(initial, 4);
result = _mm_cvtepi8_epi32(initial);
_mm_storeu_si128(mout + 2, result);
initial = _mm_srli_si128(initial, 4);
result = _mm_cvtepi8_epi32(initial);
_mm_storeu_si128(mout + 3, result);
*ints_read = 16;
return 16;
}
uint32_t low_12_bits = mask & 0xFFF;
// combine index and bytes consumed into a single lookup
index_bytes_consumed combined = combined_lookup[low_12_bits];
uint8_t consumed = combined.bytes_consumed;
uint8_t index = combined.index;
// Slightly slower to use combined than to lookup individually?
// uint64_t consumed = bytes_consumed[low_12_bits];
// uint8_t index = vec_lookup[low_12_bits];
__m128i shuffle_vector = vectors[index];
// __m128i shuffle_vector = {0, 0}; // speed check: 20% faster at large,
//less at small
if (index < 64) {
*ints_read = 6;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x007F));
__m128i high_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x7F00));
__m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1);
__m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted);
__m128i unpacked_result_a =
_mm_and_si128(packed_result, _mm_set1_epi32(0x0000FFFF));
_mm_storeu_si128(mout, unpacked_result_a);
__m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16);
_mm_storel_epi64(mout + 1, unpacked_result_b);
//_mm_storeu_si128(mout + 1, unpacked_result_b); // maybe faster to write 16
//bytes?
return consumed;
}
if (index < 145) {
*ints_read = 4;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes =
_mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x0000007F));
__m128i middle_bytes =
_mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x00007F00));
__m128i high_bytes =
_mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x007F0000));
__m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1);
__m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2);
__m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted);
__m128i result = _mm_or_si128(low_middle, high_bytes_shifted);
_mm_storeu_si128(mout, result);
return consumed;
}
*ints_read = 2;
__m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F));
__m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector);
__m128i split_bytes = _mm_mullo_epi16(
bytes_to_decode, _mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16));
__m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8);
__m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes);
__m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56);
__m128i result_evens = _mm_or_si128(recombined, low_byte);
__m128i result = _mm_shuffle_epi8(
result_evens,
_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1));
_mm_storel_epi64(mout, result);
//_mm_storeu_si128(mout, result); // maybe faster to write 16 bytes?
return consumed;
}
static inline __m128i PrefixSum(__m128i curr, __m128i prev) {
__m128i Add = _mm_slli_si128(curr, 4); // Cycle 1: [- A B C] (already done)
prev = _mm_shuffle_epi32(prev, 0xff); // Cycle 2: [P P P P]
curr = _mm_add_epi32(curr, Add); // Cycle 2: [A AB BC CD]
Add = _mm_slli_si128(curr, 8); // Cycle 3: [- - A AB]
curr = _mm_add_epi32(curr, prev); // Cycle 3: [PA PAB PBC PCD]
curr = _mm_add_epi32(curr, Add); // Cycle 4: [PA PAB PABC PABCD]
return curr;
}
// only the first two ints of curr are meaningful, rest is garbage to beignored
static inline __m128i PrefixSum2ints(__m128i curr, __m128i prev) {
__m128i Add = _mm_slli_si128(curr, 4); // Cycle 1: [- A B G] (already done)
prev = _mm_shuffle_epi32(prev, 0xff); // Cycle 2: [P P P P]
curr = _mm_add_epi32(curr, Add); // Cycle 2: [A AB BG GG]
curr = _mm_shuffle_epi32(curr, 0x54); // Cycle 3:[A AB AB AB]
curr = _mm_add_epi32(curr, prev); // Cycle 4: [PA PAB PAB PAB]
return curr;
}
static uint8_t masked_vbyte_read_group_delta(const uint8_t *in, uint32_t *out,
uint64_t mask, uint64_t *ints_read,
__m128i *prev) {
__m128i initial = _mm_lddqu_si128((const __m128i *)(in));
__m128i *mout = (__m128i *)out;
if (!(mask & 0xFFFF)) {
__m128i result = _mm_cvtepi8_epi32(initial);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout, *prev);
initial = _mm_srli_si128(initial, 4);
result = _mm_cvtepi8_epi32(initial);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout + 1, *prev);
initial = _mm_srli_si128(initial, 4);
result = _mm_cvtepi8_epi32(initial);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout + 2, *prev);
initial = _mm_srli_si128(initial, 4);
result = _mm_cvtepi8_epi32(initial);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout + 3, *prev);
*ints_read = 16;
return 16;
}
uint32_t low_12_bits = mask & 0xFFF;
// combine index and bytes consumed into a single lookup
index_bytes_consumed combined = combined_lookup[low_12_bits];
uint8_t consumed = combined.bytes_consumed;
uint8_t index = combined.index;
__m128i shuffle_vector = vectors[index];
// __m128i shuffle_vector = {0, 0}; // speed check: 20% faster at large,
//less at small
if (index < 64) {
*ints_read = 6;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x007F));
__m128i high_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x7F00));
__m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1);
__m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted);
__m128i unpacked_result_a =
_mm_and_si128(packed_result, _mm_set1_epi32(0x0000FFFF));
*prev = PrefixSum(unpacked_result_a, *prev);
_mm_storeu_si128(mout, *prev);
__m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16);
*prev = PrefixSum2ints(unpacked_result_b, *prev);
_mm_storel_epi64(mout + 1, *prev);
// _mm_storeu_si128(mout + 1, *prev);
return consumed;
}
if (index < 145) {
*ints_read = 4;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes =
_mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x0000007F));
__m128i middle_bytes =
_mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x00007F00));
__m128i high_bytes =
_mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x007F0000));
__m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1);
__m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2);
__m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted);
__m128i result = _mm_or_si128(low_middle, high_bytes_shifted);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout, *prev);
return consumed;
}
*ints_read = 2;
__m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F));
__m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector);
__m128i split_bytes = _mm_mullo_epi16(
bytes_to_decode, _mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16));
__m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8);
__m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes);
__m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56);
__m128i result_evens = _mm_or_si128(recombined, low_byte);
__m128i result = _mm_shuffle_epi8(
result_evens,
_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1));
*prev = PrefixSum2ints(result, *prev);
//_mm_storeu_si128(mout, *prev);
_mm_storel_epi64(mout, *prev);
return consumed;
}
static int read_int_group(const uint8_t *in, uint32_t *out, int *ints_read) {
__m128i initial = _mm_lddqu_si128((const __m128i *)in);
__m128i *const mout = (__m128i *)out;
int mask = _mm_movemask_epi8(initial);
if (mask == 0) {
__m128i result;
result = _mm_cvtepi8_epi32(initial);
initial = _mm_srli_si128(initial, 4);
_mm_storeu_si128(mout, result);
result = _mm_cvtepi8_epi32(initial);
initial = _mm_srli_si128(initial, 4);
_mm_storeu_si128(mout + 1, result);
result = _mm_cvtepi8_epi32(initial);
initial = _mm_srli_si128(initial, 4);
_mm_storeu_si128(mout + 2, result);
result = _mm_cvtepi8_epi32(initial);
_mm_storeu_si128(mout + 3, result);
*ints_read = 16;
return 16;
}
int mask2 = mask & 0xFFF;
index_bytes_consumed combined = combined_lookup[mask2];
int index = combined.index;
__m128i shuffle_vector = vectors[index];
int consumed = combined.bytes_consumed;
if (index < 64) {
*ints_read = 6;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x007F));
__m128i high_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x7F00));
__m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1);
__m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted);
__m128i unpacked_result_a =
_mm_and_si128(packed_result, _mm_set1_epi32(0x0000FFFF));
_mm_storeu_si128(mout, unpacked_result_a);
__m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16);
_mm_storel_epi64(mout + 1, unpacked_result_b);
return consumed;
}
if (index < 145) {
*ints_read = 4;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes =
_mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x0000007F));
__m128i middle_bytes =
_mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x00007F00));
__m128i high_bytes =
_mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x007F0000));
__m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1);
__m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2);
__m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted);
__m128i result = _mm_or_si128(low_middle, high_bytes_shifted);
_mm_storeu_si128(mout, result);
return consumed;
}
*ints_read = 2;
__m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F));
__m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector);
__m128i split_bytes = _mm_mullo_epi16(
bytes_to_decode, _mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16));
__m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8);
__m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes);
__m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56);
__m128i result_evens = _mm_or_si128(recombined, low_byte);
__m128i result = _mm_shuffle_epi8(
result_evens,
_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1));
_mm_storel_epi64(mout, result);
return consumed;
}
// len_signed : number of ints we want to decode
size_t masked_vbyte_read_loop(const uint8_t *in, uint32_t *out,
uint64_t length) {
// uint64_t length = (uint64_t) len_signed; // number of ints we want to
// decode
size_t consumed = 0; // number of bytes read
uint64_t count = 0; // how many integers we have read so far
uint64_t sig = 0;
uint64_t availablebytes = 0;
if (96 < length) {
size_t scanned = 0;
#ifdef __AVX2__
__m256i low = _mm256_loadu_si256((__m256i *)(in + scanned));
uint32_t lowSig = _mm256_movemask_epi8(low);
#else
__m128i low1 = _mm_loadu_si128((__m128i *)(in + scanned));
uint32_t lowSig1 = _mm_movemask_epi8(low1);
__m128i low2 = _mm_loadu_si128((__m128i *)(in + scanned + 16));
uint32_t lowSig2 = _mm_movemask_epi8(low2);
uint32_t lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
// excess verbosity to avoid problems with sign extension on conversions
// better to think about what's happening and make it clearer
__m128i high = _mm_loadu_si128((__m128i *)(in + scanned + 32));
uint32_t highSig = _mm_movemask_epi8(high);
uint64_t nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
scanned += 48;
do {
uint64_t thisSig = nextSig;
#ifdef __AVX2__
low = _mm256_loadu_si256((__m256i *)(in + scanned));
lowSig = _mm256_movemask_epi8(low);
#else
low1 = _mm_loadu_si128((__m128i *)(in + scanned));
lowSig1 = _mm_movemask_epi8(low1);
low2 = _mm_loadu_si128((__m128i *)(in + scanned + 16));
lowSig2 = _mm_movemask_epi8(low2);
lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
high = _mm_loadu_si128((__m128i *)(in + scanned + 32));
highSig = _mm_movemask_epi8(high);
nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
uint64_t remaining = scanned - (consumed + 48);
sig = (thisSig << remaining) | sig;
uint64_t reload = scanned - 16;
scanned += 48;
// need to reload when less than 16 scanned bytes remain in sig
while (consumed < reload) {
uint64_t ints_read;
uint8_t bytes = masked_vbyte_read_group(in + consumed, out + count, sig,
&ints_read);
sig >>= bytes;
// seems like this might force the compiler to prioritize shifting sig
// >>= bytes
if (sig == 0xFFFFFFFFFFFFFFFF)
return 0; // fake check to force earliest evaluation
consumed += bytes;
count += ints_read;
}
} while (count + 112 < length); // 112 == 48 + 48 ahead for scanning + up to
// 16 remaining in sig
sig = (nextSig << (scanned - consumed - 48)) | sig;
availablebytes = scanned - consumed;
}
while (availablebytes + count < length) {
if (availablebytes < 16) {
if (availablebytes + count + 31 < length) {
#ifdef __AVX2__
uint64_t newsigavx = (uint32_t)_mm256_movemask_epi8(
_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed)));
sig |= (newsigavx << availablebytes);
#else
uint64_t newsig = _mm_movemask_epi8(
_mm_lddqu_si128((const __m128i *)(in + availablebytes + consumed)));
uint64_t newsig2 = _mm_movemask_epi8(_mm_lddqu_si128(
(const __m128i *)(in + availablebytes + 16 + consumed)));
sig |= (newsig << availablebytes) | (newsig2 << (availablebytes + 16));
#endif
availablebytes += 32;
} else if (availablebytes + count + 15 < length) {
int newsig = _mm_movemask_epi8(
_mm_lddqu_si128((const __m128i *)(in + availablebytes + consumed)));
sig |= newsig << availablebytes;
availablebytes += 16;
} else {
break;
}
}
uint64_t ints_read;
uint8_t eaten =
masked_vbyte_read_group(in + consumed, out + count, sig, &ints_read);
consumed += eaten;
availablebytes -= eaten;
sig >>= eaten;
count += ints_read;
}
for (; count < length; count++) {
consumed += read_int(in + consumed, out + count);
}
return consumed;
}
// inputsize : number of input bytes we want to decode
// returns the number of written ints
size_t masked_vbyte_read_loop_fromcompressedsize(const uint8_t *in,
uint32_t *out,
size_t inputsize) {
size_t consumed = 0; // number of bytes read
uint32_t *initout = out;
uint64_t sig = 0;
uint64_t availablebytes = 0;
if (96 < inputsize) {
size_t scanned = 0;
#ifdef __AVX2__
__m256i low = _mm256_loadu_si256((__m256i *)(in + scanned));
uint32_t lowSig = _mm256_movemask_epi8(low);
#else
__m128i low1 = _mm_loadu_si128((__m128i *)(in + scanned));
uint32_t lowSig1 = _mm_movemask_epi8(low1);
__m128i low2 = _mm_loadu_si128((__m128i *)(in + scanned + 16));
uint32_t lowSig2 = _mm_movemask_epi8(low2);
uint32_t lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
// excess verbosity to avoid problems with sign extension on conversions
// better to think about what's happening and make it clearer
__m128i high = _mm_loadu_si128((__m128i *)(in + scanned + 32));
uint32_t highSig = _mm_movemask_epi8(high);
uint64_t nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
scanned += 48;
do {
uint64_t thisSig = nextSig;
#ifdef __AVX2__
low = _mm256_loadu_si256((__m256i *)(in + scanned));
lowSig = _mm256_movemask_epi8(low);
#else
low1 = _mm_loadu_si128((__m128i *)(in + scanned));
lowSig1 = _mm_movemask_epi8(low1);
low2 = _mm_loadu_si128((__m128i *)(in + scanned + 16));
lowSig2 = _mm_movemask_epi8(low2);
lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
high = _mm_loadu_si128((__m128i *)(in + scanned + 32));
highSig = _mm_movemask_epi8(high);
nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
uint64_t remaining = scanned - (consumed + 48);
sig = (thisSig << remaining) | sig;
uint64_t reload = scanned - 16;
scanned += 48;
// need to reload when less than 16 scanned bytes remain in sig
while (consumed < reload) {
uint64_t ints_read;
uint8_t bytes =
masked_vbyte_read_group(in + consumed, out, sig, &ints_read);
sig >>= bytes;
// seems like this might force the compiler to prioritize shifting sig
// >>= bytes
if (sig == 0xFFFFFFFFFFFFFFFF)
return 0; // fake check to force earliest evaluation
consumed += bytes;
out += ints_read;
}
} while (scanned + 112 < inputsize); // 112 == 48 + 48 ahead for scanning +
// up to 16 remaining in sig
sig = (nextSig << (scanned - consumed - 48)) | sig;
availablebytes = scanned - consumed;
}
while (1) {
if (availablebytes < 16) {
if (availablebytes + consumed + 31 < inputsize) {
#ifdef __AVX2__
uint64_t newsigavx = (uint32_t)_mm256_movemask_epi8(
_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed)));
sig |= (newsigavx << availablebytes);
#else
uint64_t newsig = _mm_movemask_epi8(
_mm_lddqu_si128((const __m128i *)(in + availablebytes + consumed)));
uint64_t newsig2 = _mm_movemask_epi8(_mm_lddqu_si128(
(const __m128i *)(in + availablebytes + 16 + consumed)));
sig |= (newsig << availablebytes) | (newsig2 << (availablebytes + 16));
#endif
availablebytes += 32;
} else if (availablebytes + consumed + 15 < inputsize) {
int newsig = _mm_movemask_epi8(
_mm_lddqu_si128((const __m128i *)(in + availablebytes + consumed)));
sig |= newsig << availablebytes;
availablebytes += 16;
} else {
break;
}
}
uint64_t ints_read;
uint8_t bytes =
masked_vbyte_read_group(in + consumed, out, sig, &ints_read);
consumed += bytes;
availablebytes -= bytes;
sig >>= bytes;
out += ints_read;
}
while (consumed < inputsize) {
unsigned int shift = 0;
for (uint32_t v = 0; consumed < inputsize; shift += 7) {
uint8_t c = in[consumed++];
if ((c & 128) == 0) {
out[0] = v + (c << shift);
++out;
break;
} else {
v += (c & 127) << shift;
}
}
}
return out - initout;
}
size_t read_ints(const uint8_t *in, uint32_t *out, int length) {
size_t consumed = 0;
int count;
for (count = 0; count + 15 < length;) {
int ints_read;
consumed += read_int_group(in + consumed, out + count, &ints_read);
count += ints_read;
}
for (; count < length; count++) {
consumed += read_int(in + consumed, out + count);
}
return consumed;
}
static int read_int_group_delta(const uint8_t *in, uint32_t *out,
int *ints_read, __m128i *prev) {
__m128i initial = _mm_lddqu_si128((const __m128i *)in);
__m128i *const mout = (__m128i *)out;
int mask = _mm_movemask_epi8(initial);
if (mask == 0) {
__m128i result;
result = _mm_cvtepi8_epi32(initial);
initial = _mm_srli_si128(initial, 4);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout, *prev);
result = _mm_cvtepi8_epi32(initial);
initial = _mm_srli_si128(initial, 4);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout + 1, *prev);
result = _mm_cvtepi8_epi32(initial);
initial = _mm_srli_si128(initial, 4);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout + 2, *prev);
result = _mm_cvtepi8_epi32(initial);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout + 3, *prev);
*ints_read = 16;
return 16;
}
int mask2 = mask & 0xFFF;
index_bytes_consumed combined = combined_lookup[mask2];
int index = combined.index;
__m128i shuffle_vector = vectors[index];
int consumed = combined.bytes_consumed;
if (index < 64) {
*ints_read = 6;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x007F));
__m128i high_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x7F00));
__m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1);
__m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted);
__m128i unpacked_result_a =
_mm_and_si128(packed_result, _mm_set1_epi32(0x0000FFFF));
*prev = PrefixSum(unpacked_result_a, *prev);
_mm_storeu_si128(mout, *prev);
__m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16);
*prev = PrefixSum2ints(unpacked_result_b, *prev);
_mm_storeu_si128(mout + 1, *prev);
return consumed;
}
if (index < 145) {
*ints_read = 4;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes =
_mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x0000007F));
__m128i middle_bytes =
_mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x00007F00));
__m128i high_bytes =
_mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x007F0000));
__m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1);
__m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2);
__m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted);
__m128i result = _mm_or_si128(low_middle, high_bytes_shifted);
*prev = PrefixSum(result, *prev);
_mm_storeu_si128(mout, *prev);
return consumed;
}
*ints_read = 2;
__m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F));
__m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector);
__m128i split_bytes = _mm_mullo_epi16(
bytes_to_decode, _mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16));
__m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8);
__m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes);
__m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56);
__m128i result_evens = _mm_or_si128(recombined, low_byte);
__m128i result = _mm_shuffle_epi8(
result_evens,
_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1));
*prev = PrefixSum2ints(result, *prev);
_mm_storeu_si128(mout, *prev);
return consumed;
}
// len_signed : number of ints we want to decode
size_t masked_vbyte_read_loop_delta(const uint8_t *in, uint32_t *out,
uint64_t length, uint32_t prev) {
// uint64_t length = (uint64_t) len_signed; // number of ints we want to
// decode
size_t consumed = 0; // number of bytes read
__m128i mprev = _mm_set1_epi32(prev);
uint64_t count = 0; // how many integers we have read so far
uint64_t sig = 0;
int availablebytes = 0;
if (96 < length) {
size_t scanned = 0;
#ifdef __AVX2__
__m256i low = _mm256_loadu_si256((__m256i *)(in + scanned));
uint32_t lowSig = _mm256_movemask_epi8(low);
#else
__m128i low1 = _mm_loadu_si128((__m128i *)(in + scanned));
uint32_t lowSig1 = _mm_movemask_epi8(low1);
__m128i low2 = _mm_loadu_si128((__m128i *)(in + scanned + 16));
uint32_t lowSig2 = _mm_movemask_epi8(low2);
uint32_t lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
// excess verbosity to avoid problems with sign extension on conversions
// better to think about what's happening and make it clearer
__m128i high = _mm_loadu_si128((__m128i *)(in + scanned + 32));
uint32_t highSig = _mm_movemask_epi8(high);
uint64_t nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
scanned += 48;
do {
uint64_t thisSig = nextSig;
#ifdef __AVX2__
low = _mm256_loadu_si256((__m256i *)(in + scanned));
lowSig = _mm256_movemask_epi8(low);
#else
low1 = _mm_loadu_si128((__m128i *)(in + scanned));
lowSig1 = _mm_movemask_epi8(low1);
low2 = _mm_loadu_si128((__m128i *)(in + scanned + 16));
lowSig2 = _mm_movemask_epi8(low2);
lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
high = _mm_loadu_si128((__m128i *)(in + scanned + 32));
highSig = _mm_movemask_epi8(high);
nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
uint64_t remaining = scanned - (consumed + 48);
sig = (thisSig << remaining) | sig;
uint64_t reload = scanned - 16;
scanned += 48;
// need to reload when less than 16 scanned bytes remain in sig
while (consumed < reload) {
uint64_t ints_read;
uint8_t bytes = masked_vbyte_read_group_delta(
in + consumed, out + count, sig, &ints_read, &mprev);
sig >>= bytes;
// seems like this might force the compiler to prioritize shifting sig
// >>= bytes
if (sig == 0xFFFFFFFFFFFFFFFF)
return 0; // fake check to force earliest evaluation
consumed += bytes;
count += ints_read;
}
} while (count + 112 < length); // 112 == 48 + 48 ahead for scanning + up to
// 16 remaining in sig
sig = (nextSig << (scanned - consumed - 48)) | sig;
availablebytes = (int)(scanned - consumed);
}
while (availablebytes + count < length) {
if (availablebytes < 16)
break;
uint64_t ints_read;
uint8_t eaten = masked_vbyte_read_group_delta(in + consumed, out + count,
sig, &ints_read, &mprev);
consumed += eaten;
availablebytes -= eaten;
sig >>= eaten;
count += ints_read;
}
prev = _mm_extract_epi32(mprev, 3);
for (; count < length; count++) {
consumed += read_int_delta(in + consumed, out + count, &prev);
}
return consumed;
}
size_t read_ints_delta(const uint8_t *in, uint32_t *out, int length,
uint32_t prev) {
__m128i mprev = _mm_set1_epi32(prev);
size_t consumed = 0;
int count;
for (count = 0; count + 15 < length;) {
int ints_read;
consumed +=
read_int_group_delta(in + consumed, out + count, &ints_read, &mprev);
count += ints_read;
}
prev = _mm_extract_epi32(mprev, 3);
for (; count < length; count++) {
consumed += read_int_delta(in + consumed, out + count, &prev);
}
return consumed;
}
// inputsize : number of input bytes we want to decode
// returns the number of written ints
size_t masked_vbyte_read_loop_fromcompressedsize_delta(const uint8_t *in,
uint32_t *out,
size_t inputsize,
uint32_t prev) {
size_t consumed = 0; // number of bytes read
uint32_t *initout = out;
__m128i mprev = _mm_set1_epi32(prev);
uint64_t sig = 0;
uint64_t availablebytes = 0;
if (96 < inputsize) {
size_t scanned = 0;
#ifdef __AVX2__
__m256i low = _mm256_loadu_si256((__m256i *)(in + scanned));
uint32_t lowSig = _mm256_movemask_epi8(low);
#else
__m128i low1 = _mm_loadu_si128((__m128i *)(in + scanned));
uint32_t lowSig1 = _mm_movemask_epi8(low1);
__m128i low2 = _mm_loadu_si128((__m128i *)(in + scanned + 16));
uint32_t lowSig2 = _mm_movemask_epi8(low2);
uint32_t lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
// excess verbosity to avoid problems with sign extension on conversions
// better to think about what's happening and make it clearer
__m128i high = _mm_loadu_si128((__m128i *)(in + scanned + 32));
uint32_t highSig = _mm_movemask_epi8(high);
uint64_t nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
scanned += 48;
do {
uint64_t thisSig = nextSig;
#ifdef __AVX2__
low = _mm256_loadu_si256((__m256i *)(in + scanned));
lowSig = _mm256_movemask_epi8(low);
#else
low1 = _mm_loadu_si128((__m128i *)(in + scanned));
lowSig1 = _mm_movemask_epi8(low1);
low2 = _mm_loadu_si128((__m128i *)(in + scanned + 16));
lowSig2 = _mm_movemask_epi8(low2);
lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
high = _mm_loadu_si128((__m128i *)(in + scanned + 32));
highSig = _mm_movemask_epi8(high);
nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
uint64_t remaining = scanned - (consumed + 48);
sig = (thisSig << remaining) | sig;
uint64_t reload = scanned - 16;
scanned += 48;
// need to reload when less than 16 scanned bytes remain in sig
while (consumed < reload) {
uint64_t ints_read;
uint8_t bytes = masked_vbyte_read_group_delta(in + consumed, out, sig,
&ints_read, &mprev);
sig >>= bytes;
// seems like this might force the compiler to prioritize shifting sig
// >>= bytes
if (sig == 0xFFFFFFFFFFFFFFFF)
return 0; // fake check to force earliest evaluation
consumed += bytes;
out += ints_read;
}
} while (scanned + 112 < inputsize); // 112 == 48 + 48 ahead for scanning +
// up to 16 remaining in sig
sig = (nextSig << (scanned - consumed - 48)) | sig;
availablebytes = scanned - consumed;
}
while (1) {
if (availablebytes < 16) {
if (availablebytes + consumed + 31 < inputsize) {
#ifdef __AVX2__
uint64_t newsigavx = (uint32_t)_mm256_movemask_epi8(
_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed)));
sig |= (newsigavx << availablebytes);
#else
uint64_t newsig = _mm_movemask_epi8(
_mm_lddqu_si128((const __m128i *)(in + availablebytes + consumed)));
uint64_t newsig2 = _mm_movemask_epi8(_mm_lddqu_si128(
(const __m128i *)(in + availablebytes + 16 + consumed)));
sig |= (newsig << availablebytes) | (newsig2 << (availablebytes + 16));
#endif
availablebytes += 32;
} else if (availablebytes + consumed + 15 < inputsize) {
int newsig = _mm_movemask_epi8(
_mm_lddqu_si128((const __m128i *)(in + availablebytes + consumed)));
sig |= newsig << availablebytes;
availablebytes += 16;
} else {
break;
}
}
uint64_t ints_read;
uint8_t bytes = masked_vbyte_read_group_delta(in + consumed, out, sig,
&ints_read, &mprev);
consumed += bytes;
availablebytes -= bytes;
sig >>= bytes;
out += ints_read;
}
prev = _mm_extract_epi32(mprev, 3);
while (consumed < inputsize) {
unsigned int shift = 0;
for (uint32_t v = 0; consumed < inputsize; shift += 7) {
uint8_t c = in[consumed++];
if ((c & 128) == 0) {
uint32_t delta = v + (c << shift);
prev += delta;
*out++ = prev;
break;
} else {
v += (c & 127) << shift;
}
}
}
return out - initout;
}
static SIMDCOMP_ALIGNED(16) int8_t shuffle_mask_bytes1[16 * 16] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 4, 5, 6, 7, 4, 5,
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14,
15, 4, 5, 6, 7, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 12, 13, 14, 15, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 12, 13, 14, 15,
8, 9, 10, 11, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15, 8, 9, 10,
11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 12, 13,
14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 0,
1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15, 12, 13, 14, 15, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12, 13, 14, 15,
};
static const __m128i *shuffle_mask = (__m128i *)shuffle_mask_bytes1;
/* perform a lower-bound search for |key| in |out|; the resulting uint32
* is stored in |*presult|.*/
#define CHECK_AND_INCREMENT(i, out, key, presult) \
do { \
__m128i tmpout = _mm_sub_epi32(out, conversion); \
uint32_t mask = \
_mm_movemask_ps(_mm_castsi128_ps(_mm_cmplt_epi32(tmpout, key4))); \
if (mask != 15) { \
__m128i p = _mm_shuffle_epi8(out, shuffle_mask[mask ^ 15]); \
int offset; \
offset = __builtin_ctz(mask ^ 15); \
*presult = _mm_cvtsi128_si32(p); \
return (i + offset); \
} \
i += 4; \
} while (0)
/* perform a lower-bound search for |key| in |out|; the resulting uint32
* is stored in |*presult|.*/
#define CHECK_AND_INCREMENT_2(i, out, key, presult) \
do { \
__m128i tmpout = _mm_sub_epi32(out, conversion); \
uint32_t mask = \
3 & _mm_movemask_ps(_mm_castsi128_ps(_mm_cmplt_epi32(tmpout, key4))); \
if (mask != 3) { \
__m128i p = _mm_shuffle_epi8(out, shuffle_mask[mask ^ 3]); \
int offset; \
offset = __builtin_ctz(mask ^ 3); \
*presult = _mm_cvtsi128_si32(p); \
return (i + offset); \
} \
i += 2; \
} while (0)
static int masked_vbyte_search_group_delta(const uint8_t *in, uint8_t *p,
uint64_t mask, uint64_t *ints_read,
__m128i *prev, int i, uint32_t key,
uint32_t *presult) {
__m128i initial = _mm_lddqu_si128((const __m128i *)(in));
__m128i conversion = _mm_set1_epi32(2147483648U);
__m128i key4 = _mm_set1_epi32(key - 2147483648U);
if (!(mask & 0xFFFF)) {
__m128i result = _mm_cvtepi8_epi32(initial);
*prev = PrefixSum(result, *prev);
CHECK_AND_INCREMENT(i, *prev, key, presult);
initial = _mm_srli_si128(initial, 4);
result = _mm_cvtepi8_epi32(initial);
*prev = PrefixSum(result, *prev);
CHECK_AND_INCREMENT(i, *prev, key, presult);
initial = _mm_srli_si128(initial, 4);
result = _mm_cvtepi8_epi32(initial);
*prev = PrefixSum(result, *prev);
CHECK_AND_INCREMENT(i, *prev, key, presult);
initial = _mm_srli_si128(initial, 4);
result = _mm_cvtepi8_epi32(initial);
*prev = PrefixSum(result, *prev);
CHECK_AND_INCREMENT(i, *prev, key, presult);
*ints_read = 16;
*p = 16;
return (-1);
}
uint32_t low_12_bits = mask & 0xFFF;
// combine index and bytes consumed into a single lookup
index_bytes_consumed combined = combined_lookup[low_12_bits];
uint8_t consumed = combined.bytes_consumed;
uint8_t index = combined.index;
__m128i shuffle_vector = vectors[index];
// __m128i shuffle_vector = {0, 0}; // speed check: 20% faster at large,
//less at small
if (index < 64) {
*ints_read = 6;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x007F));
__m128i high_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x7F00));
__m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1);
__m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted);
__m128i unpacked_result_a =
_mm_and_si128(packed_result, _mm_set1_epi32(0x0000FFFF));
*prev = PrefixSum(unpacked_result_a, *prev);
CHECK_AND_INCREMENT(i, *prev, key, presult);
__m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16);
*prev = PrefixSum2ints(unpacked_result_b, *prev);
//_mm_storel_epi64(&out, *prev);
CHECK_AND_INCREMENT_2(i, *prev, key, presult);
*p = consumed;
return (-1);
}
if (index < 145) {
*ints_read = 4;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes =
_mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x0000007F));
__m128i middle_bytes =
_mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x00007F00));
__m128i high_bytes =
_mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x007F0000));
__m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1);
__m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2);
__m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted);
__m128i result = _mm_or_si128(low_middle, high_bytes_shifted);
*prev = PrefixSum(result, *prev);
CHECK_AND_INCREMENT(i, *prev, key, presult);
*p = consumed;
return (-1);
}
*ints_read = 2;
__m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F));
__m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector);
__m128i split_bytes = _mm_mullo_epi16(
bytes_to_decode, _mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16));
__m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8);
__m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes);
__m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56);
__m128i result_evens = _mm_or_si128(recombined, low_byte);
__m128i result = _mm_shuffle_epi8(
result_evens,
_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1));
*prev = PrefixSum2ints(result, *prev);
//_mm_storel_epi64(&out, *prev);
CHECK_AND_INCREMENT_2(i, *prev, key, presult);
*p = consumed;
return (-1);
}
// returns the index of the matching key
int masked_vbyte_search_delta(const uint8_t *in, uint64_t length, uint32_t prev,
uint32_t key, uint32_t *presult) {
size_t consumed = 0; // number of bytes read
__m128i mprev = _mm_set1_epi32(prev);
uint64_t count = 0; // how many integers we have read so far
uint64_t sig = 0;
uint64_t availablebytes = 0;
if (96 < length) {
size_t scanned = 0;
#ifdef __AVX2__
__m256i low = _mm256_loadu_si256((__m256i *)(in + scanned));
uint32_t lowSig = _mm256_movemask_epi8(low);
#else
__m128i low1 = _mm_loadu_si128((__m128i *)(in + scanned));
uint32_t lowSig1 = _mm_movemask_epi8(low1);
__m128i low2 = _mm_loadu_si128((__m128i *)(in + scanned + 16));
uint32_t lowSig2 = _mm_movemask_epi8(low2);
uint32_t lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
// excess verbosity to avoid problems with sign extension on conversions
// better to think about what's happening and make it clearer
__m128i high = _mm_loadu_si128((__m128i *)(in + scanned + 32));
uint32_t highSig = _mm_movemask_epi8(high);
uint64_t nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
scanned += 48;
do {
uint64_t thisSig = nextSig;
#ifdef __AVX2__
low = _mm256_loadu_si256((__m256i *)(in + scanned));
lowSig = _mm256_movemask_epi8(low);
#else
low1 = _mm_loadu_si128((__m128i *)(in + scanned));
lowSig1 = _mm_movemask_epi8(low1);
low2 = _mm_loadu_si128((__m128i *)(in + scanned + 16));
lowSig2 = _mm_movemask_epi8(low2);
lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
high = _mm_loadu_si128((__m128i *)(in + scanned + 32));
highSig = _mm_movemask_epi8(high);
nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
uint64_t remaining = scanned - (consumed + 48);
sig = (thisSig << remaining) | sig;
uint64_t reload = scanned - 16;
scanned += 48;
// need to reload when less than 16 scanned bytes remain in sig
while (consumed < reload) {
uint64_t ints_read = 0;
uint8_t bytes = 0;
int ret = masked_vbyte_search_group_delta(in + consumed, &bytes, sig,
&ints_read, &mprev,
(int)count, key, presult);
if (ret >= 0)
return (ret);
sig >>= bytes;
// seems like this might force the compiler to prioritize shifting sig
// >>= bytes
if (sig == 0xFFFFFFFFFFFFFFFF)
return 0; // fake check to force earliest evaluation
consumed += bytes;
count += ints_read;
}
} while (count + 112 < length); // 112 == 48 + 48 ahead for scanning + up to
// 16 remaining in sig
sig = (nextSig << (scanned - consumed - 48)) | sig;
availablebytes = scanned - consumed;
}
while (availablebytes + count < length) {
if (availablebytes < 16)
break;
uint64_t ints_read = 0;
uint8_t bytes = 0;
int ret =
masked_vbyte_search_group_delta(in + consumed, &bytes, sig, &ints_read,
&mprev, (int)count, key, presult);
if (ret >= 0)
return (ret);
consumed += bytes;
availablebytes -= bytes;
sig >>= bytes;
count += ints_read;
}
prev = _mm_extract_epi32(mprev, 3);
for (; count < length; count++) {
uint32_t out;
consumed += read_int_delta(in + consumed, &out, &prev);
if (key <= prev) {
*presult = prev;
return (int)count;
}
}
*presult = key + 1;
return (int)length;
}
static SIMDCOMP_ALIGNED(16) int8_t shuffle_mask_bytes2[16 * 16] = {
0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const __m128i *shuffle_mask_extract = (__m128i *)shuffle_mask_bytes2;
static uint32_t branchlessextract(__m128i out, int i) {
return _mm_cvtsi128_si32(_mm_shuffle_epi8(out, shuffle_mask_extract[i]));
}
#define CHECK_SELECT(i, out, slot, presult) \
i += 4; \
if (i > slot) { \
*presult = branchlessextract(out, slot - (i - 4)); \
return (1); \
}
#define CHECK_SELECT_2(i, out, slot, presult) \
i += 2; \
if (i > slot) { \
*presult = branchlessextract(out, slot - (i - 2)); \
return (1); \
}
static int masked_vbyte_select_group_delta(const uint8_t *in, uint8_t *p,
uint64_t mask, uint64_t *ints_read,
__m128i *prev, int slot,
uint32_t *presult) {
__m128i initial = _mm_lddqu_si128((const __m128i *)(in));
int i = 0;
if (!(mask & 0xFFFF)) {
__m128i result = _mm_cvtepi8_epi32(initial);
*prev = PrefixSum(result, *prev);
CHECK_SELECT(i, *prev, slot, presult);
initial = _mm_srli_si128(initial, 4);
result = _mm_cvtepi8_epi32(initial);
*prev = PrefixSum(result, *prev);
CHECK_SELECT(i, *prev, slot, presult);
initial = _mm_srli_si128(initial, 4);
result = _mm_cvtepi8_epi32(initial);
*prev = PrefixSum(result, *prev);
CHECK_SELECT(i, *prev, slot, presult);
initial = _mm_srli_si128(initial, 4);
result = _mm_cvtepi8_epi32(initial);
*prev = PrefixSum(result, *prev);
CHECK_SELECT(i, *prev, slot, presult);
*ints_read = 16;
*p = 16;
return (0);
}
uint32_t low_12_bits = mask & 0xFFF;
// combine index and bytes consumed into a single lookup
index_bytes_consumed combined = combined_lookup[low_12_bits];
uint8_t consumed = combined.bytes_consumed;
uint8_t index = combined.index;
__m128i shuffle_vector = vectors[index];
// __m128i shuffle_vector = {0, 0}; // speed check: 20% faster at large,
//less at small
if (index < 64) {
*ints_read = 6;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x007F));
__m128i high_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x7F00));
__m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1);
__m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted);
__m128i unpacked_result_a =
_mm_and_si128(packed_result, _mm_set1_epi32(0x0000FFFF));
*prev = PrefixSum(unpacked_result_a, *prev);
CHECK_SELECT(i, *prev, slot, presult);
__m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16);
*prev = PrefixSum2ints(unpacked_result_b, *prev);
//_mm_storel_epi64(&out, *prev);
CHECK_SELECT_2(i, *prev, slot, presult);
*p = consumed;
return (0);
}
if (index < 145) {
*ints_read = 4;
__m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector);
__m128i low_bytes =
_mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x0000007F));
__m128i middle_bytes =
_mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x00007F00));
__m128i high_bytes =
_mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x007F0000));
__m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1);
__m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2);
__m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted);
__m128i result = _mm_or_si128(low_middle, high_bytes_shifted);
*prev = PrefixSum(result, *prev);
CHECK_SELECT(i, *prev, slot, presult);
*p = consumed;
return (0);
}
*ints_read = 2;
__m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F));
__m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector);
__m128i split_bytes = _mm_mullo_epi16(
bytes_to_decode, _mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16));
__m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8);
__m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes);
__m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56);
__m128i result_evens = _mm_or_si128(recombined, low_byte);
__m128i result = _mm_shuffle_epi8(
result_evens,
_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1));
*prev = PrefixSum2ints(result, *prev);
//_mm_storel_epi64(&out, *prev);
CHECK_SELECT_2(i, *prev, slot, presult);
*p = consumed;
return (0);
}
uint32_t masked_vbyte_select_delta(const uint8_t *in, uint64_t length,
uint32_t prev, size_t slot) {
size_t consumed = 0; // number of bytes read
__m128i mprev = _mm_set1_epi32(prev);
uint64_t count = 0; // how many integers we have read so far
uint64_t sig = 0;
uint64_t availablebytes = 0;
if (96 < length) {
size_t scanned = 0;
#ifdef __AVX2__
__m256i low = _mm256_loadu_si256((__m256i *)(in + scanned));
uint32_t lowSig = _mm256_movemask_epi8(low);
#else
__m128i low1 = _mm_loadu_si128((__m128i *)(in + scanned));
uint32_t lowSig1 = _mm_movemask_epi8(low1);
__m128i low2 = _mm_loadu_si128((__m128i *)(in + scanned + 16));
uint32_t lowSig2 = _mm_movemask_epi8(low2);
uint32_t lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
// excess verbosity to avoid problems with sign extension on conversions
// better to think about what's happening and make it clearer
__m128i high = _mm_loadu_si128((__m128i *)(in + scanned + 32));
uint32_t highSig = _mm_movemask_epi8(high);
uint64_t nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
scanned += 48;
do {
uint64_t thisSig = nextSig;
#ifdef __AVX2__
low = _mm256_loadu_si256((__m256i *)(in + scanned));
lowSig = _mm256_movemask_epi8(low);
#else
low1 = _mm_loadu_si128((__m128i *)(in + scanned));
lowSig1 = _mm_movemask_epi8(low1);
low2 = _mm_loadu_si128((__m128i *)(in + scanned + 16));
lowSig2 = _mm_movemask_epi8(low2);
lowSig = lowSig2 << 16;
lowSig |= lowSig1;
#endif
high = _mm_loadu_si128((__m128i *)(in + scanned + 32));
highSig = _mm_movemask_epi8(high);
nextSig = highSig;
nextSig <<= 32;
nextSig |= lowSig;
uint64_t remaining = scanned - (consumed + 48);
sig = (thisSig << remaining) | sig;
uint64_t reload = scanned - 16;
scanned += 48;
// need to reload when less than 16 scanned bytes remain in sig
while (consumed < reload) {
uint32_t result;
uint64_t ints_read;
uint8_t bytes;
if (masked_vbyte_select_group_delta(in + consumed, &bytes, sig,
&ints_read, &mprev,
(int)(slot - count), &result)) {
return (result);
}
sig >>= bytes;
// seems like this might force the compiler to prioritize shifting sig
// >>= bytes
if (sig == 0xFFFFFFFFFFFFFFFF)
return 0; // fake check to force earliest evaluation
consumed += bytes;
count += ints_read;
}
} while (count + 112 < length); // 112 == 48 + 48 ahead for scanning + up to
// 16 remaining in sig
sig = (nextSig << (scanned - consumed - 48)) | sig;
availablebytes = scanned - consumed;
}
while (availablebytes + count < length) {
if (availablebytes < 16)
break;
if (availablebytes < 16) {
if (availablebytes + count + 31 < length) {
#ifdef __AVX2__
uint64_t newsigavx = (uint32_t)_mm256_movemask_epi8(
_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed)));
sig |= (newsigavx << availablebytes);
#else
uint64_t newsig = _mm_movemask_epi8(
_mm_lddqu_si128((const __m128i *)(in + availablebytes + consumed)));
uint64_t newsig2 = _mm_movemask_epi8(_mm_lddqu_si128(
(const __m128i *)(in + availablebytes + 16 + consumed)));
sig |= (newsig << availablebytes) | (newsig2 << (availablebytes + 16));
#endif
availablebytes += 32;
} else if (availablebytes + count + 15 < length) {
int newsig = _mm_movemask_epi8(
_mm_lddqu_si128((const __m128i *)(in + availablebytes + consumed)));
sig |= newsig << availablebytes;
availablebytes += 16;
} else {
break;
}
}
uint32_t result;
uint64_t ints_read;
uint8_t bytes;
if (masked_vbyte_select_group_delta(in + consumed, &bytes, sig, &ints_read,
&mprev, (int)(slot - count), &result)) {
return (result);
}
consumed += bytes;
availablebytes -= bytes;
sig >>= bytes;
count += ints_read;
}
prev = _mm_extract_epi32(mprev, 3);
for (; count < slot + 1; count++) {
uint32_t out;
consumed += read_int_delta(in + consumed, &out, &prev);
}
return prev;
}