php · bishopb · Jul 9, 2015 · Jul 9, 2015 · Jul 9, 2015 · Jul 9, 2015
@@ -0,0 +1,89 @@
+/**
+ * "How to detect integer overflow in C and C++ addition and subtraction"
+ * http://ptspts.blogspot.com/2014/01/how-to-detect-integer-overflow-in-c-and.html
+ */
+#ifndef _INT_OVERFLOW_H
+#define _INT_OVERFLOW_H 1
+
+#undef MAX_INT_SIZE
+#ifdef __cplusplus  /* g++-4.6 doesn't have __builtin_choose_expr. */
+static inline unsigned TO_UNSIGNED_LOW(int x) { return x; } 
+static inline unsigned TO_UNSIGNED_LOW(unsigned x) { return x; } 
+static inline unsigned long long TO_UNSIGNED_LOW(long long x) { return x; } 
+static inline unsigned long long TO_UNSIGNED_LOW(unsigned long long x) { return x; } 
+#endif
+#if __SIZEOF_INT128__ >= 16 || ((__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) && __SIZEOF_SIZE_T__ >= 8)
+#ifdef __cplusplus  /* g++-4.6 doesn't have __builtin_choose_expr. */
+static inline __uint128_t TO_UNSIGNED_LOW(__int128_t x) { return x; } 
+static inline __uint128_t TO_UNSIGNED_LOW(__uint128_t x) { return x; } 
+#else
+#define TO_UNSIGNED_LOW(x) ( \
+    __builtin_choose_expr(sizeof(x) <= sizeof(int), (unsigned)(x), \
+    __builtin_choose_expr(sizeof(x) <= sizeof(long long), (unsigned long long)(x), \
+    __builtin_choose_expr(sizeof(x) <= sizeof(__int128_t), (__uint128_t)(x), \
+    (void)0))))  /* Compile-time error when assigned to something. */
+#endif
+#define MAX_INT_SIZE (sizeof(long long) > sizeof(__int128_t) ? sizeof(long long) : sizeof(__int128_t))
+#else
+#ifndef __cplusplus
+#define TO_UNSIGNED_LOW(x) ( \
+    __builtin_choose_expr(sizeof(x) <= sizeof(int), (unsigned)(x), \
+    __builtin_choose_expr(sizeof(x) <= sizeof(long long), (unsigned long long)(x), \
+    (void)0)))  /* Compile-time error when assigned to something. */
+#endif
+#define MAX_INT_SIZE (sizeof(long long))
+#endif
+/* Converts to the the corresponding (or a bit larger) unsigned type.
+ */
+#define to_unsigned(x) TO_UNSIGNED_LOW(x)
+
+/* Doesn't evaluate x. Returns (int)0 or (int)1 indicating whether the value or
+ * type x is unsigned.
+ */
+#define is_unsigned(a) (((__typeof__(a))-1) > 0)
+
+/* Detect signed addition overflow, without executing a single overflowing
+ * operation.
+ */
+#define _IS_ADD_OVERFLOW_S(x, y, c) ({ \
+    __typeof__(x) _x = (x), _y  = (y), _z = \
+        (_x ^ ~_y) & (1 << (sizeof(_x) * 8 - 1)); \
+    (int)(((_z & (((_x ^ _z) + _y + (c)) ^ ~_y)) >> \
+    (sizeof(_x) * 8 - 1)) & 1); })
+
+/* Detect signed subtraction overflow, without executing a single overflowing
+ * operation.
+ */
+#define _IS_SUBTRACT_OVERFLOW_S(x, y, c) ({ \
+    __typeof__(x) _x = (x), _y  = (y), _z = \
+        (_x ^ ~_y) & (1 << (sizeof(_x) * 8 - 1)); \
+    (int)(((_z & (((_x ^ _z) - _y - (c)) ^ _y)) >> \
+    (sizeof(_x) * 8 - 1)) & 1); })
+
+/* Returns (int)0 or (int)1 indicating whether the addition x + y + c would
+ * overflow or underflow. x and y must be of the same (signed or unsigned)
+ * integer type, and c must be 0 or 1, of any integer (or bool) type.
+ */
+#define is_add_overflow(x, y, c) ( \
+    sizeof(x) > MAX_INT_SIZE && !is_unsigned(x) ? \
+    _IS_ADD_OVERFLOW_S(x, y, c) : ({ \
+    __typeof__(x) _x = (x), _y = (y), _s = \
+        (__typeof__(x))(to_unsigned(_x) + _y + (c)); \
+    is_unsigned(_x) ? \
+    (int)((((_x & _y) | ((_x | _y) & ~_s)) >> (sizeof(_x) * 8 - 1))) : \
+    (int)((((_s ^ _x) & (_s ^ _y)) >> (sizeof(_x) * 8 - 1)) & 1); }))
+
+/* Returns (int)0 or (int)1 indicating whether the subtraction x - y - c would
+ * overflow or underflow. x and y must be of the same (signed or unsigned)
+ * integer type, and c must be 0 or 1, of any integer (or bool) type.
+ */
+#define is_subtract_overflow(x, y, c) ( \
+    sizeof(x) > MAX_INT_SIZE && !is_unsigned(x) ? \
+    _IS_SUBTRACT_OVERFLOW_S(x, y, c) : ({ \
+    __typeof__(x) _x = (x), _y = (y), _s = \
+        (__typeof__(x))(to_unsigned(_x) - _y - (c)); \
+    is_unsigned(_x) ? \
+    (int)((((~_x & _y) | ((~_x | _y) & _s)) >> (sizeof(_x) * 8 - 1))) : \
+    (int)((((_x ^ _y) & (_s ^ _x)) >> (sizeof(_x) * 8 - 1)) & 1); }))
+
+#endif  /* INT_OVERFLOW_H */
@@ -41,7 +41,49 @@
 #define PHP_RAND_MAX RAND_MAX
 #endif
 
+/**
+ * Generate a random number in the range [__min, __max], ensuring that we never
+ * teleport into UndefinedBehavior land because __max - __min > ZEND_LONG_MAX.
+ */
 #define RAND_RANGE(__n, __min, __max, __tmax) \
+    do { \
+        if (is_subtract_overflow(__max, __min, 0)) { \
+            zend_long number_a = (__n); \
+            zend_long number_b = (__n); \
+            RAND_SCALE(number_a, __min, -1, __tmax); \
+            RAND_SCALE(number_b, 0,  __max, __tmax); \
+            (__n) = number_a + number_b; \
+        } else { \
+            RAND_SCALE(__n, __min, __max, __tmax); \
+        } \
+    } while (0)
+
+/*
+ * A bit of tricky math here.  We want to avoid using a modulus because
+ * that simply tosses the high-order bits and might skew the distribution
+ * of random values over the range.  Instead we map the range directly.
+ *
+ * We need to map the range from 0...M evenly to the range a...b
+ * Let n = the random number and n' = the mapped random number
+ *
+ * Then we have: n' = a + n(b-a)/M
+ *
+ * We have a problem here in that only n==M will get mapped to b which
+ # means the chances of getting b is much much less than getting any of
+ # the other values in the range.  We can fix this by increasing our range
+ # artificially and using:
+ #
+ #               n' = a + n(b-a+1)/M
+ *
+ # Now we only have a problem if n==M which would cause us to produce a
+ # number of b+1 which would be bad.  So we bump M up by one to make sure
+ # this will never happen, and the final algorithm looks like this:
+ #
+ #               n' = a + n(b-a+1)/(M+1)
+ *
+ * -RL
+ */
+#define RAND_SCALE(__n, __min, __max, __tmax) \
     (__n) = (__min) + (zend_long) ((double) ( (double) (__max) - (__min) + 1.0) * ((__n) / ((__tmax) + 1.0)))
 
 /* MT Rand */

@@ -30,6 +30,7 @@
 #include "php.h"
 #include "php_math.h"
 #include "php_rand.h"
+#include "int_overflow.h"
 
 #include "basic_functions.h"
 
@@ -257,33 +258,6 @@ PHP_FUNCTION(mt_srand)
 }
 /* }}} */
 
-
-/*
- * A bit of tricky math here.  We want to avoid using a modulus because
- * that simply tosses the high-order bits and might skew the distribution
- * of random values over the range.  Instead we map the range directly.
- *
- * We need to map the range from 0...M evenly to the range a...b
- * Let n = the random number and n' = the mapped random number
- *
- * Then we have: n' = a + n(b-a)/M
- *
- * We have a problem here in that only n==M will get mapped to b which
- # means the chances of getting b is much much less than getting any of
- # the other values in the range.  We can fix this by increasing our range
- # artificially and using:
- #
- #               n' = a + n(b-a+1)/M
- *
- # Now we only have a problem if n==M which would cause us to produce a
- # number of b+1 which would be bad.  So we bump M up by one to make sure
- # this will never happen, and the final algorithm looks like this:
- #
- #               n' = a + n(b-a+1)/(M+1)
- *
- * -RL
- */
-
 /* {{{ proto int rand([int min, int max])
    Returns a random number */
 PHP_FUNCTION(rand)

@@ -0,0 +1,158 @@
+--TEST--
+Random number generators should not overflow
+--FILE--
+<?php
+
+// the biggest range we can accommodate, based on RAND_SCALE scaling algo
+// 32bit: $upper == getrandmax()       // since we use 32-bit MT algo
+// 64bit: getrandmax() < PHP_INT_MAX   // since we allow scaling into 64-bit range
+if (! defined('PHP_INT_MIN')) {
+    define('PHP_INT_MIN', (-PHP_INT_MAX-1));
+}
+$lower = PHP_INT_MIN;
+$upper = PHP_INT_MAX;
+
+// define our test ranges
+$ranges = array (
+    // nowhere close to overflowing
+    array (0, 242),
+    array (1, 242),
+    array (65, 90),
+    array (-242, -1),
+    array (-242, 0),
+    array (-242, 1),
+    array (-10000, 10000),
+    array (19781017, 20130513),
+    array (-19561018, -19551124),
+
+    // teetering on the edge of overflow
+    array (1, $upper),    // barely inside range
+    array (1, $upper-1),  // also barely inside range
+    array (0, $upper),    // on the borders of our int size
+    array ($lower, -1),   // also on the borders of our int size
+
+    // range (max - min) exceeds PHP_INT_MAX
+    // notice these all cross 0
+    array (-1, $upper, true),               // one too big
+    array ($lower, 1, true),                // also one too big
+    array ($lower, $upper, true),           // $upper - $lower > PHP_INT_MAX
+    array (-$upper,$upper, true),           // double your pleasure
+    array ($lower, -($lower+1), true),      // double your pleasure, again
+    array (PHP_INT_MIN, PHP_INT_MAX, true), // whoa, as big as it gets
+);
+
+foreach (array ('mt_rand', 'rand') as $prng) {
+    var_dump($prng);
+
+    // check that we produce random numbers in the given ranges
+    var_dump(count($ranges));
+    foreach ($ranges as $k => $range) {
+        $number = mt_rand($range[0], $range[1]);
+        var_dump($number);                                      // expect: integer
+        var_dump($range[0] <= $number && $number <= $range[1]); // expect: true
+    }
+
+    // quick check the uniformity of the distribution
+    // "each nonrandom sequence is nonrandom in its own way."
+    // Observed that zero popped up a lot when there was overflow
+    $n = 10000;
+    $t =  4000; // 40% is well more than would be expected in a uniform distribution
+    foreach ($ranges as $range) {
+        if (empty($range[2])) {
+            // not an overflow test, skip it
+            continue;
+        }
+        $zeroCnt = 0;
+        for ($i = 0; $i < $n; $i++) {
+            $point = mt_rand($range[0], $range[1]);
+            if (0 === $point) {
+                $zeroCnt++;
+            }
+        }
+        if ($t < $zeroCnt) {
+            var_dump("Zero popped up more than 40% of the time in range [${range[0]}, ${range[1]})");
+        }
+    }
+}
+
+--EXPECTF--
+string(7) "mt_rand"
+int(19)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+string(4) "rand"
+int(19)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)
+int(%i)
+bool(true)