Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #63174 - mt_rand() fails when given range exceeding PHP_INT_MAX #1416

Closed
wants to merge 9 commits into from
89 changes: 89 additions & 0 deletions ext/standard/int_overflow.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/**
* "How to detect integer overflow in C and C++ addition and subtraction"
* http://ptspts.blogspot.com/2014/01/how-to-detect-integer-overflow-in-c-and.html
*/
#ifndef _INT_OVERFLOW_H
#define _INT_OVERFLOW_H 1

#undef MAX_INT_SIZE
#ifdef __cplusplus /* g++-4.6 doesn't have __builtin_choose_expr. */
static inline unsigned TO_UNSIGNED_LOW(int x) { return x; }
static inline unsigned TO_UNSIGNED_LOW(unsigned x) { return x; }
static inline unsigned long long TO_UNSIGNED_LOW(long long x) { return x; }
static inline unsigned long long TO_UNSIGNED_LOW(unsigned long long x) { return x; }
#endif
#if __SIZEOF_INT128__ >= 16 || ((__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) && __SIZEOF_SIZE_T__ >= 8)
#ifdef __cplusplus /* g++-4.6 doesn't have __builtin_choose_expr. */
static inline __uint128_t TO_UNSIGNED_LOW(__int128_t x) { return x; }
static inline __uint128_t TO_UNSIGNED_LOW(__uint128_t x) { return x; }
#else
#define TO_UNSIGNED_LOW(x) ( \
__builtin_choose_expr(sizeof(x) <= sizeof(int), (unsigned)(x), \
__builtin_choose_expr(sizeof(x) <= sizeof(long long), (unsigned long long)(x), \
__builtin_choose_expr(sizeof(x) <= sizeof(__int128_t), (__uint128_t)(x), \
(void)0)))) /* Compile-time error when assigned to something. */
#endif
#define MAX_INT_SIZE (sizeof(long long) > sizeof(__int128_t) ? sizeof(long long) : sizeof(__int128_t))
#else
#ifndef __cplusplus
#define TO_UNSIGNED_LOW(x) ( \
__builtin_choose_expr(sizeof(x) <= sizeof(int), (unsigned)(x), \
__builtin_choose_expr(sizeof(x) <= sizeof(long long), (unsigned long long)(x), \
(void)0))) /* Compile-time error when assigned to something. */
#endif
#define MAX_INT_SIZE (sizeof(long long))
#endif
/* Converts to the the corresponding (or a bit larger) unsigned type.
*/
#define to_unsigned(x) TO_UNSIGNED_LOW(x)

/* Doesn't evaluate x. Returns (int)0 or (int)1 indicating whether the value or
* type x is unsigned.
*/
#define is_unsigned(a) (((__typeof__(a))-1) > 0)

/* Detect signed addition overflow, without executing a single overflowing
* operation.
*/
#define _IS_ADD_OVERFLOW_S(x, y, c) ({ \
__typeof__(x) _x = (x), _y = (y), _z = \
(_x ^ ~_y) & (1 << (sizeof(_x) * 8 - 1)); \
(int)(((_z & (((_x ^ _z) + _y + (c)) ^ ~_y)) >> \
(sizeof(_x) * 8 - 1)) & 1); })

/* Detect signed subtraction overflow, without executing a single overflowing
* operation.
*/
#define _IS_SUBTRACT_OVERFLOW_S(x, y, c) ({ \
__typeof__(x) _x = (x), _y = (y), _z = \
(_x ^ ~_y) & (1 << (sizeof(_x) * 8 - 1)); \
(int)(((_z & (((_x ^ _z) - _y - (c)) ^ _y)) >> \
(sizeof(_x) * 8 - 1)) & 1); })

/* Returns (int)0 or (int)1 indicating whether the addition x + y + c would
* overflow or underflow. x and y must be of the same (signed or unsigned)
* integer type, and c must be 0 or 1, of any integer (or bool) type.
*/
#define is_add_overflow(x, y, c) ( \
sizeof(x) > MAX_INT_SIZE && !is_unsigned(x) ? \
_IS_ADD_OVERFLOW_S(x, y, c) : ({ \
__typeof__(x) _x = (x), _y = (y), _s = \
(__typeof__(x))(to_unsigned(_x) + _y + (c)); \
is_unsigned(_x) ? \
(int)((((_x & _y) | ((_x | _y) & ~_s)) >> (sizeof(_x) * 8 - 1))) : \
(int)((((_s ^ _x) & (_s ^ _y)) >> (sizeof(_x) * 8 - 1)) & 1); }))

/* Returns (int)0 or (int)1 indicating whether the subtraction x - y - c would
* overflow or underflow. x and y must be of the same (signed or unsigned)
* integer type, and c must be 0 or 1, of any integer (or bool) type.
*/
#define is_subtract_overflow(x, y, c) ( \
sizeof(x) > MAX_INT_SIZE && !is_unsigned(x) ? \
_IS_SUBTRACT_OVERFLOW_S(x, y, c) : ({ \
__typeof__(x) _x = (x), _y = (y), _s = \
(__typeof__(x))(to_unsigned(_x) - _y - (c)); \
is_unsigned(_x) ? \
(int)((((~_x & _y) | ((~_x | _y) & _s)) >> (sizeof(_x) * 8 - 1))) : \
(int)((((_x ^ _y) & (_s ^ _x)) >> (sizeof(_x) * 8 - 1)) & 1); }))

#endif /* INT_OVERFLOW_H */
42 changes: 42 additions & 0 deletions ext/standard/php_rand.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,49 @@
#define PHP_RAND_MAX RAND_MAX
#endif

/**
* Generate a random number in the range [__min, __max], ensuring that we never
* teleport into UndefinedBehavior land because __max - __min > ZEND_LONG_MAX.
*/
#define RAND_RANGE(__n, __min, __max, __tmax) \
do { \
if (is_subtract_overflow(__max, __min, 0)) { \
zend_long number_a = (__n); \
zend_long number_b = (__n); \
RAND_SCALE(number_a, __min, -1, __tmax); \
RAND_SCALE(number_b, 0, __max, __tmax); \
(__n) = number_a + number_b; \
} else { \
RAND_SCALE(__n, __min, __max, __tmax); \
} \
} while (0)

/*
* A bit of tricky math here. We want to avoid using a modulus because
* that simply tosses the high-order bits and might skew the distribution
* of random values over the range. Instead we map the range directly.
*
* We need to map the range from 0...M evenly to the range a...b
* Let n = the random number and n' = the mapped random number
*
* Then we have: n' = a + n(b-a)/M
*
* We have a problem here in that only n==M will get mapped to b which
# means the chances of getting b is much much less than getting any of
# the other values in the range. We can fix this by increasing our range
# artificially and using:
#
# n' = a + n(b-a+1)/M
*
# Now we only have a problem if n==M which would cause us to produce a
# number of b+1 which would be bad. So we bump M up by one to make sure
# this will never happen, and the final algorithm looks like this:
#
# n' = a + n(b-a+1)/(M+1)
*
* -RL
*/
#define RAND_SCALE(__n, __min, __max, __tmax) \
(__n) = (__min) + (zend_long) ((double) ( (double) (__max) - (__min) + 1.0) * ((__n) / ((__tmax) + 1.0)))

/* MT Rand */
Expand Down
28 changes: 1 addition & 27 deletions ext/standard/rand.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "php.h"
#include "php_math.h"
#include "php_rand.h"
#include "int_overflow.h"

#include "basic_functions.h"

Expand Down Expand Up @@ -257,33 +258,6 @@ PHP_FUNCTION(mt_srand)
}
/* }}} */


/*
* A bit of tricky math here. We want to avoid using a modulus because
* that simply tosses the high-order bits and might skew the distribution
* of random values over the range. Instead we map the range directly.
*
* We need to map the range from 0...M evenly to the range a...b
* Let n = the random number and n' = the mapped random number
*
* Then we have: n' = a + n(b-a)/M
*
* We have a problem here in that only n==M will get mapped to b which
# means the chances of getting b is much much less than getting any of
# the other values in the range. We can fix this by increasing our range
# artificially and using:
#
# n' = a + n(b-a+1)/M
*
# Now we only have a problem if n==M which would cause us to produce a
# number of b+1 which would be bad. So we bump M up by one to make sure
# this will never happen, and the final algorithm looks like this:
#
# n' = a + n(b-a+1)/(M+1)
*
* -RL
*/

/* {{{ proto int rand([int min, int max])
Returns a random number */
PHP_FUNCTION(rand)
Expand Down
158 changes: 158 additions & 0 deletions ext/standard/tests/general_functions/bug63174.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
--TEST--
Random number generators should not overflow
--FILE--
<?php

// the biggest range we can accommodate, based on RAND_SCALE scaling algo
// 32bit: $upper == getrandmax() // since we use 32-bit MT algo
// 64bit: getrandmax() < PHP_INT_MAX // since we allow scaling into 64-bit range
if (! defined('PHP_INT_MIN')) {
define('PHP_INT_MIN', (-PHP_INT_MAX-1));
}
$lower = PHP_INT_MIN;
$upper = PHP_INT_MAX;

// define our test ranges
$ranges = array (
// nowhere close to overflowing
array (0, 242),
array (1, 242),
array (65, 90),
array (-242, -1),
array (-242, 0),
array (-242, 1),
array (-10000, 10000),
array (19781017, 20130513),
array (-19561018, -19551124),

// teetering on the edge of overflow
array (1, $upper), // barely inside range
array (1, $upper-1), // also barely inside range
array (0, $upper), // on the borders of our int size
array ($lower, -1), // also on the borders of our int size

// range (max - min) exceeds PHP_INT_MAX
// notice these all cross 0
array (-1, $upper, true), // one too big
array ($lower, 1, true), // also one too big
array ($lower, $upper, true), // $upper - $lower > PHP_INT_MAX
array (-$upper,$upper, true), // double your pleasure
array ($lower, -($lower+1), true), // double your pleasure, again
array (PHP_INT_MIN, PHP_INT_MAX, true), // whoa, as big as it gets
);

foreach (array ('mt_rand', 'rand') as $prng) {
var_dump($prng);

// check that we produce random numbers in the given ranges
var_dump(count($ranges));
foreach ($ranges as $k => $range) {
$number = mt_rand($range[0], $range[1]);
var_dump($number); // expect: integer
var_dump($range[0] <= $number && $number <= $range[1]); // expect: true
}

// quick check the uniformity of the distribution
// "each nonrandom sequence is nonrandom in its own way."
// Observed that zero popped up a lot when there was overflow
$n = 10000;
$t = 4000; // 40% is well more than would be expected in a uniform distribution
foreach ($ranges as $range) {
if (empty($range[2])) {
// not an overflow test, skip it
continue;
}
$zeroCnt = 0;
for ($i = 0; $i < $n; $i++) {
$point = mt_rand($range[0], $range[1]);
if (0 === $point) {
$zeroCnt++;
}
}
if ($t < $zeroCnt) {
var_dump("Zero popped up more than 40% of the time in range [${range[0]}, ${range[1]})");
}
}
}

--EXPECTF--
string(7) "mt_rand"
int(19)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
string(4) "rand"
int(19)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)
int(%i)
bool(true)