Skip to content

Commit 80dfde7

Browse files
committed
Refactor zend comparison helpers and SORT_REGULAR transitivity detection
- Introduce the transitive-aware compare_long/double/string helpers in Zend. - Switch array sorts and array_unique to use them and adjust php_hash_values_need_transitivity().
1 parent 7bc7a5a commit 80dfde7

File tree

3 files changed

+190
-251
lines changed

3 files changed

+190
-251
lines changed

Zend/zend_operators.c

Lines changed: 3 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -2259,50 +2259,13 @@ ZEND_API zend_result ZEND_FASTCALL compare_function(zval *result, zval *op1, zva
22592259

22602260
static int compare_long_to_string(zend_long lval, zend_string *str) /* {{{ */
22612261
{
2262-
zend_long str_lval;
2263-
double str_dval;
2264-
uint8_t type = is_numeric_string(ZSTR_VAL(str), ZSTR_LEN(str), &str_lval, &str_dval, 0);
2265-
2266-
if (type == IS_LONG) {
2267-
return ZEND_THREEWAY_COMPARE(lval, str_lval);
2268-
}
2269-
2270-
if (type == IS_DOUBLE) {
2271-
return ZEND_THREEWAY_COMPARE((double) lval, str_dval);
2272-
}
2273-
2274-
char buf[MAX_LENGTH_OF_LONG + 1];
2275-
char *tmp = zend_print_long_to_buf(buf + sizeof(buf) - 1, lval);
2276-
size_t tmp_len = buf + sizeof(buf) - 1 - tmp;
2277-
int cmp_result = zend_binary_strcmp(
2278-
tmp, tmp_len, ZSTR_VAL(str), ZSTR_LEN(str));
2279-
return ZEND_NORMALIZE_BOOL(cmp_result);
2262+
return zend_compare_long_to_string_ex(lval, str, false);
22802263
}
22812264
/* }}} */
22822265

22832266
static int compare_double_to_string(double dval, zend_string *str) /* {{{ */
22842267
{
2285-
zend_long str_lval;
2286-
double str_dval;
2287-
uint8_t type = is_numeric_string(ZSTR_VAL(str), ZSTR_LEN(str), &str_lval, &str_dval, 0);
2288-
2289-
ZEND_ASSERT(!zend_isnan(dval));
2290-
2291-
if (type == IS_LONG) {
2292-
return ZEND_THREEWAY_COMPARE(dval, (double) str_lval);
2293-
}
2294-
2295-
if (type == IS_DOUBLE) {
2296-
return ZEND_THREEWAY_COMPARE(dval, str_dval);
2297-
}
2298-
2299-
char buf[ZEND_DOUBLE_MAX_LENGTH];
2300-
int precision = (int) EG(precision);
2301-
zend_gcvt(dval, precision ? precision : 1, '.', 'E', buf);
2302-
size_t tmp_len = strlen(buf);
2303-
int cmp_result = zend_binary_strcmp(
2304-
buf, tmp_len, ZSTR_VAL(str), ZSTR_LEN(str));
2305-
return ZEND_NORMALIZE_BOOL(cmp_result);
2268+
return zend_compare_double_to_string_ex(dval, str, false, (int) EG(precision));
23062269
}
23072270
/* }}} */
23082271

@@ -3423,73 +3386,7 @@ ZEND_API bool ZEND_FASTCALL zendi_smart_streq(zend_string *s1, zend_string *s2)
34233386

34243387
ZEND_API int ZEND_FASTCALL zendi_smart_strcmp(zend_string *s1, zend_string *s2) /* {{{ */
34253388
{
3426-
uint8_t ret1 = 0, ret2 = 0;
3427-
int oflow1 = 0, oflow2 = 0;
3428-
zend_long lval1 = 0, lval2 = 0;
3429-
double dval1 = 0.0, dval2 = 0.0;
3430-
3431-
if ((unsigned char)s1->val[0] > '9' && (unsigned char)s2->val[0] > '9') {
3432-
goto string_cmp;
3433-
}
3434-
3435-
if (UNEXPECTED(s1->len == 0 || s2->len == 0)) {
3436-
goto string_cmp;
3437-
}
3438-
3439-
if ((unsigned char)s1->val[0] <= '9') {
3440-
ret1 = is_numeric_string_ex(s1->val, s1->len, &lval1, &dval1, false, &oflow1, NULL);
3441-
}
3442-
3443-
if (!ret1) {
3444-
goto string_cmp;
3445-
}
3446-
3447-
ret2 = ((unsigned char)s2->val[0] <= '9')
3448-
? is_numeric_string_ex(s2->val, s2->len, &lval2, &dval2, false, &oflow2, NULL)
3449-
: 0;
3450-
3451-
if (!ret2) {
3452-
goto string_cmp;
3453-
}
3454-
3455-
#if ZEND_ULONG_MAX == 0xFFFFFFFF
3456-
if (oflow1 != 0 && oflow1 == oflow2 && dval1 - dval2 == 0. &&
3457-
((oflow1 == 1 && dval1 > 9007199254740991. /*0x1FFFFFFFFFFFFF*/)
3458-
|| (oflow1 == -1 && dval1 < -9007199254740991.))) {
3459-
#else
3460-
if (oflow1 != 0 && oflow1 == oflow2 && dval1 - dval2 == 0.) {
3461-
#endif
3462-
/* both values are integers overflowed to the same side, and the
3463-
* double comparison may have resulted in crucial accuracy lost */
3464-
goto string_cmp;
3465-
}
3466-
3467-
if ((ret1 == IS_DOUBLE) || (ret2 == IS_DOUBLE)) {
3468-
if (ret1 != IS_DOUBLE) {
3469-
if (oflow2) {
3470-
/* 2nd operand is integer > LONG_MAX (oflow2==1) or < LONG_MIN (-1) */
3471-
return -1 * oflow2;
3472-
}
3473-
dval1 = (double) lval1;
3474-
} else if (ret2 != IS_DOUBLE) {
3475-
if (oflow1) {
3476-
return oflow1;
3477-
}
3478-
dval2 = (double) lval2;
3479-
} else if (dval1 == dval2 && !zend_finite(dval1)) {
3480-
/* Both values overflowed and have the same sign,
3481-
* so a numeric comparison would be inaccurate */
3482-
goto string_cmp;
3483-
}
3484-
return ZEND_THREEWAY_COMPARE(dval1, dval2);
3485-
}
3486-
3487-
return ZEND_THREEWAY_COMPARE(lval1, lval2);
3488-
3489-
int strcmp_ret;
3490-
string_cmp:
3491-
strcmp_ret = zend_binary_strcmp(s1->val, s1->len, s2->val, s2->len);
3492-
return ZEND_NORMALIZE_BOOL(strcmp_ret);
3389+
return zendi_smart_strcmp_ex(s1, s2, false);
34933390
}
34943391
/* }}} */
34953392

Zend/zend_operators.h

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,6 +1068,169 @@ zend_memnistr(const char *haystack, const char *needle, size_t needle_len, const
10681068
return NULL;
10691069
}
10701070

1071+
static zend_always_inline int zend_compare_non_numeric_strings(zend_string *s1, zend_string *s2)
1072+
{
1073+
size_t min_len = ZSTR_LEN(s1) < ZSTR_LEN(s2) ? ZSTR_LEN(s1) : ZSTR_LEN(s2);
1074+
int cmp = memcmp(ZSTR_VAL(s1), ZSTR_VAL(s2), min_len);
1075+
if (cmp != 0) {
1076+
return cmp < 0 ? -1 : 1;
1077+
}
1078+
return ZEND_THREEWAY_COMPARE(ZSTR_LEN(s1), ZSTR_LEN(s2));
1079+
}
1080+
1081+
static zend_always_inline int zend_compare_long_to_string_ex(zend_long lval, zend_string *str, bool transitive)
1082+
{
1083+
zend_long str_lval;
1084+
double str_dval;
1085+
uint8_t type = is_numeric_string(ZSTR_VAL(str), ZSTR_LEN(str), &str_lval, &str_dval, 0);
1086+
1087+
if (type == IS_LONG) {
1088+
return ZEND_THREEWAY_COMPARE(lval, str_lval);
1089+
}
1090+
1091+
if (type == IS_DOUBLE) {
1092+
return ZEND_THREEWAY_COMPARE((double) lval, str_dval);
1093+
}
1094+
1095+
if (transitive) {
1096+
if (ZSTR_LEN(str) == 0) {
1097+
return 1;
1098+
}
1099+
return -1;
1100+
}
1101+
1102+
char buf[MAX_LENGTH_OF_LONG + 1];
1103+
char *tmp = zend_print_long_to_buf(buf + sizeof(buf) - 1, lval);
1104+
size_t tmp_len = buf + sizeof(buf) - 1 - tmp;
1105+
int cmp_result = zend_binary_strcmp(
1106+
tmp, tmp_len, ZSTR_VAL(str), ZSTR_LEN(str));
1107+
return ZEND_NORMALIZE_BOOL(cmp_result);
1108+
}
1109+
1110+
static zend_always_inline int zend_compare_double_to_string_ex(double dval, zend_string *str, bool transitive, int precision)
1111+
{
1112+
zend_long str_lval;
1113+
double str_dval;
1114+
uint8_t type = is_numeric_string(ZSTR_VAL(str), ZSTR_LEN(str), &str_lval, &str_dval, 0);
1115+
1116+
ZEND_ASSERT(!zend_isnan(dval));
1117+
1118+
if (type == IS_LONG) {
1119+
return ZEND_THREEWAY_COMPARE(dval, (double) str_lval);
1120+
}
1121+
1122+
if (type == IS_DOUBLE) {
1123+
return ZEND_THREEWAY_COMPARE(dval, str_dval);
1124+
}
1125+
1126+
if (transitive) {
1127+
if (ZSTR_LEN(str) == 0) {
1128+
return 1;
1129+
}
1130+
return -1;
1131+
}
1132+
1133+
char buf[ZEND_DOUBLE_MAX_LENGTH];
1134+
zend_gcvt(dval, precision ? precision : 1, '.', 'E', buf);
1135+
size_t tmp_len = strlen(buf);
1136+
int cmp_result = zend_binary_strcmp(
1137+
buf, tmp_len, ZSTR_VAL(str), ZSTR_LEN(str));
1138+
return ZEND_NORMALIZE_BOOL(cmp_result);
1139+
}
1140+
1141+
static zend_always_inline int zendi_smart_strcmp_ex(zend_string *s1, zend_string *s2, bool transitive)
1142+
{
1143+
uint8_t ret1 = 0, ret2 = 0;
1144+
int oflow1 = 0, oflow2 = 0;
1145+
zend_long lval1 = 0, lval2 = 0;
1146+
double dval1 = 0.0, dval2 = 0.0;
1147+
1148+
if ((unsigned char)ZSTR_VAL(s1)[0] > '9'
1149+
&& (unsigned char)ZSTR_VAL(s2)[0] > '9') {
1150+
goto string_cmp;
1151+
}
1152+
1153+
if (UNEXPECTED(ZSTR_LEN(s1) == 0 || ZSTR_LEN(s2) == 0)) {
1154+
if (transitive) {
1155+
if (ZSTR_LEN(s1) == 0 && ZSTR_LEN(s2) == 0) {
1156+
return 0;
1157+
}
1158+
return ZSTR_LEN(s1) == 0 ? -1 : 1;
1159+
}
1160+
goto string_cmp;
1161+
}
1162+
1163+
if ((unsigned char)ZSTR_VAL(s1)[0] <= '9') {
1164+
ret1 = is_numeric_string_ex(ZSTR_VAL(s1), ZSTR_LEN(s1), &lval1, &dval1, false, &oflow1, NULL);
1165+
}
1166+
1167+
if (!ret1) {
1168+
if (transitive && (unsigned char)ZSTR_VAL(s2)[0] <= '9') {
1169+
ret2 = is_numeric_string_ex(ZSTR_VAL(s2), ZSTR_LEN(s2), &lval2, &dval2, false, &oflow2, NULL);
1170+
if (ret2) {
1171+
return 1;
1172+
}
1173+
}
1174+
goto string_cmp;
1175+
}
1176+
1177+
if (transitive) {
1178+
ret2 = is_numeric_string_ex(ZSTR_VAL(s2), ZSTR_LEN(s2), &lval2, &dval2, false, &oflow2, NULL);
1179+
if (!ret2) {
1180+
return -1;
1181+
}
1182+
} else {
1183+
ret2 = ((unsigned char)ZSTR_VAL(s2)[0] <= '9')
1184+
? is_numeric_string_ex(ZSTR_VAL(s2), ZSTR_LEN(s2), &lval2, &dval2, false, &oflow2, NULL)
1185+
: 0;
1186+
if (!ret2) {
1187+
goto string_cmp;
1188+
}
1189+
}
1190+
1191+
#if ZEND_ULONG_MAX == 0xFFFFFFFF
1192+
if (oflow1 != 0 && oflow1 == oflow2 && dval1 - dval2 == 0. &&
1193+
((oflow1 == 1 && dval1 > 9007199254740991. /*0x1FFFFFFFFFFFFF*/)
1194+
|| (oflow1 == -1 && dval1 < -9007199254740991.))) {
1195+
#else
1196+
if (oflow1 != 0 && oflow1 == oflow2 && dval1 - dval2 == 0.) {
1197+
#endif
1198+
/* Both values are integers overflowed to the same side, and the
1199+
* double comparison may have resulted in crucial accuracy loss. */
1200+
goto string_cmp;
1201+
}
1202+
1203+
if ((ret1 == IS_DOUBLE) || (ret2 == IS_DOUBLE)) {
1204+
if (ret1 != IS_DOUBLE) {
1205+
if (oflow2) {
1206+
/* Second operand is integer > LONG_MAX (oflow2==1) or < LONG_MIN (-1). */
1207+
return -1 * oflow2;
1208+
}
1209+
dval1 = (double) lval1;
1210+
} else if (ret2 != IS_DOUBLE) {
1211+
if (oflow1) {
1212+
/* First operand overflowed the long range. */
1213+
return oflow1;
1214+
}
1215+
dval2 = (double) lval2;
1216+
} else if (dval1 == dval2 && !zend_finite(dval1)) {
1217+
/* Both values overflowed and have the same sign,
1218+
* so a numeric comparison would be inaccurate. */
1219+
goto string_cmp;
1220+
}
1221+
return ZEND_THREEWAY_COMPARE(dval1, dval2);
1222+
}
1223+
1224+
return ZEND_THREEWAY_COMPARE(lval1, lval2);
1225+
1226+
string_cmp:
1227+
if (transitive) {
1228+
return zend_compare_non_numeric_strings(s1, s2);
1229+
}
1230+
1231+
return ZEND_NORMALIZE_BOOL(
1232+
zend_binary_strcmp(ZSTR_VAL(s1), ZSTR_LEN(s1), ZSTR_VAL(s2), ZSTR_LEN(s2)));
1233+
}
10711234

10721235
END_EXTERN_C()
10731236

0 commit comments

Comments
 (0)