Skip to content

Commit

Permalink
random: Use branchless implementation for mask generation in Randomiz…
Browse files Browse the repository at this point in the history
…er::getBytesFromString() (#10522)

* random: Add `max_offset` local to Randomizer::getBytesFromString()

* random: Use branchless implementation for mask generation in Randomizer::getBytesFromString()

This was benchmarked against clzl with a standalone script with random inputs
and is slightly faster. clzl requires an additional branch to handle the
source_length = 1 / max_offset = 0 case.

* Improve comment for masking in Randomizer::getBytesFromString()
  • Loading branch information
TimWolla committed Feb 8, 2023
1 parent eabb9b7 commit 0cfc45b
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 23 deletions.
35 changes: 12 additions & 23 deletions ext/random/randomizer.c
Expand Up @@ -388,6 +388,7 @@ PHP_METHOD(Random_Randomizer, getBytesFromString)
ZEND_PARSE_PARAMETERS_END();

const size_t source_length = ZSTR_LEN(source);
const size_t max_offset = source_length - 1;

if (source_length < 1) {
zend_argument_value_error(1, "cannot be empty");
Expand All @@ -401,9 +402,9 @@ PHP_METHOD(Random_Randomizer, getBytesFromString)

retval = zend_string_alloc(length, 0);

if (source_length > 0x100) {
if (max_offset > 0xff) {
while (total_size < length) {
uint64_t offset = randomizer->algo->range(randomizer->status, 0, source_length - 1);
uint64_t offset = randomizer->algo->range(randomizer->status, 0, max_offset);

if (EG(exception)) {
zend_string_free(retval);
Expand All @@ -413,26 +414,14 @@ PHP_METHOD(Random_Randomizer, getBytesFromString)
ZSTR_VAL(retval)[total_size++] = ZSTR_VAL(source)[offset];
}
} else {
uint64_t mask;
if (source_length <= 0x1) {
mask = 0x0;
} else if (source_length <= 0x2) {
mask = 0x1;
} else if (source_length <= 0x4) {
mask = 0x3;
} else if (source_length <= 0x8) {
mask = 0x7;
} else if (source_length <= 0x10) {
mask = 0xF;
} else if (source_length <= 0x20) {
mask = 0x1F;
} else if (source_length <= 0x40) {
mask = 0x3F;
} else if (source_length <= 0x80) {
mask = 0x7F;
} else {
mask = 0xFF;
}
uint64_t mask = max_offset;
// Copy the top-most bit into all lower bits.
// Shifting by 4 is sufficient, because max_offset
// is guaranteed to fit in an 8-bit integer at this
// point.
mask |= mask >> 1;
mask |= mask >> 2;
mask |= mask >> 4;

int failures = 0;
while (total_size < length) {
Expand All @@ -445,7 +434,7 @@ PHP_METHOD(Random_Randomizer, getBytesFromString)
for (size_t i = 0; i < randomizer->status->last_generated_size; i++) {
uint64_t offset = (result >> (i * 8)) & mask;

if (offset >= source_length) {
if (offset > max_offset) {
if (++failures > PHP_RANDOM_RANGE_ATTEMPTS) {
zend_string_free(retval);
zend_throw_error(random_ce_Random_BrokenRandomEngineError, "Failed to generate an acceptable random number in %d attempts", PHP_RANDOM_RANGE_ATTEMPTS);
Expand Down
Expand Up @@ -47,6 +47,32 @@ for ($i = 1; $i <= strlen($allBytes); $i *= 2) {
echo PHP_EOL;
}

// Test lengths that are one more than the powers of two. For these
// the maximum offset will be a power of two and thus a minimal number
// of bits will be set in the offset.
for ($i = 1; ($i + 1) <= strlen($allBytes); $i *= 2) {
$oneMore = $i + 1;

echo "{$oneMore}:", PHP_EOL;

$wrapper = new TestWrapperEngine($xoshiro);
$r = new Randomizer($wrapper);
$result = $r->getBytesFromString(substr($allBytes, 0, $oneMore), 20000);

$count = [];
for ($j = 0; $j < strlen($result); $j++) {
$b = $result[$j];
$count[ord($b)] ??= 0;
$count[ord($b)]++;
}

// We expect that each possible value appears at least once, if
// not is is very likely that some bits were erroneously masked away.
var_dump(count($count));

echo PHP_EOL;
}

echo "Slow Path:", PHP_EOL;

$wrapper = new TestWrapperEngine($xoshiro);
Expand Down Expand Up @@ -107,6 +133,30 @@ int(128)
int(2500)
int(256)

2:
int(2)

3:
int(3)

5:
int(5)

9:
int(9)

17:
int(17)

33:
int(33)

65:
int(65)

129:
int(129)

Slow Path:
int(20000)
int(256)

0 comments on commit 0cfc45b

Please sign in to comment.