Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
5bb9644
add benchmark
wenshao Jan 4, 2025
d24c635
optimize parseLong
wenshao Jan 4, 2025
288ac04
optimize parseInt
wenshao Jan 4, 2025
fa31ec4
reuse isDigit
wenshao Jan 4, 2025
aed2e3c
optimize parseInt & parseLong
wenshao Jan 4, 2025
f555dae
optimize parseInt & parseLong
wenshao Jan 4, 2025
5801b17
bug fix
wenshao Jan 4, 2025
9ea0845
use String::value
wenshao Jan 4, 2025
844780d
bug fix
wenshao Jan 4, 2025
46bb40d
bug fix
wenshao Jan 4, 2025
3b8d458
bug fix
wenshao Jan 4, 2025
2c4eeb4
code format
wenshao Jan 4, 2025
426d607
remove unused code
wenshao Jan 4, 2025
e99f5bb
reduce codeSize
wenshao Jan 4, 2025
c7db300
bug fix for bound check
wenshao Jan 4, 2025
736bd9f
reduce codeSize
wenshao Jan 5, 2025
e010e97
add comments
wenshao Jan 5, 2025
52defbd
use CharacterDataLatin1.instance::isDigit
wenshao Jan 5, 2025
fd51c1c
emptyInput -> nullInput
wenshao Jan 5, 2025
f97093d
vector digit2
wenshao Jan 5, 2025
b52130b
Update src/java.base/share/classes/jdk/internal/util/DecimalDigits.java
wenshao Jan 14, 2025
eb86797
remove unused
wenshao Jan 14, 2025
e2b228c
from @rgiulietti
wenshao Jan 24, 2025
dc2a675
use &
wenshao Jan 25, 2025
0bacfa0
Merge remote-tracking branch 'upstream/master' into optim_parse_int_l…
wenshao Jan 25, 2025
284ad6a
error message
wenshao Jan 28, 2025
0b04a70
copyright
wenshao Jan 29, 2025
a6d9846
multiply 10
wenshao Jan 30, 2025
1fb40bb
Merge remote-tracking branch 'upstream/master' into optim_parse_int_l…
wenshao Feb 1, 2025
2f67c24
Update src/java.base/share/classes/java/lang/Integer.java
wenshao Feb 4, 2025
40b9faf
Update src/java.base/share/classes/java/lang/Long.java
wenshao Feb 4, 2025
2702440
fix JdbExprTest
wenshao Feb 5, 2025
0c43e9a
copyright
wenshao Feb 5, 2025
c4863da
Update src/java.base/share/classes/jdk/internal/util/DecimalDigits.java
wenshao Feb 5, 2025
c8f514f
fix comments
wenshao Feb 5, 2025
a4181b0
fix comments
wenshao Feb 5, 2025
3a555c5
remove ForceInline
wenshao Feb 5, 2025
08535b4
Merge remote-tracking branch 'upstream/master' into optim_parse_int_l…
wenshao Mar 31, 2025
047e170
Merge remote-tracking branch 'upstream/master' into optim_parse_int_l…
wenshao May 2, 2025
25848bb
Merge remote-tracking branch 'upstream/master' into optim_parse_int_l…
wenshao Jun 14, 2025
f2920e0
Merge remote-tracking branch 'origin/optim_parse_int_long_202501' int…
wenshao Jun 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 42 additions & 40 deletions src/java.base/share/classes/java/lang/Integer.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright (c) 1994, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2025, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -517,52 +518,53 @@ public static String toUnsignedString(int i) {
*/
public static int parseInt(String s, int radix)
throws NumberFormatException {
/*
* WARNING: This method may be invoked early during VM initialization
* before IntegerCache is initialized. Care must be taken to not use
* the valueOf method.
*/

if (s == null) {
throw new NumberFormatException("Cannot parse null string");
int len;
byte[] value;
if (s == null || radix != 10 || (len = (value = s.value()).length) == 0 || !s.isLatin1()) {
return parseInt0(s, radix);
}

if (radix < Character.MIN_RADIX) {
throw new NumberFormatException(String.format(
"radix %s less than Character.MIN_RADIX", radix));
/* Accumulating negatively avoids surprises near MAX_VALUE */
int fc = value[0];
int result = Integer.isDigitLatin1(fc)
? '0' - fc
: len != 1 && (fc == '-' || fc == '+')
? 0
: 1; // or any value > 0
int i = 1;
int d;
while (i + 1 < len
&& (d = DecimalDigits.digit2(value, i)) != -1
&& MIN_VALUE / 100 <= result & result <= 0) {
result = result * 100 - d; // overflow from d => result > 0
i += 2;
}

if (radix > Character.MAX_RADIX) {
throw new NumberFormatException(String.format(
"radix %s greater than Character.MAX_RADIX", radix));
if (i < len
&& Integer.isDigitLatin1(d = value[i])
&& MIN_VALUE / 10 <= result & result <= 0) {
result = result * 10 + '0' - d; // overflow from '0' - d => result > 0
i += 1;
}

int len = s.length();
if (len == 0) {
throw NumberFormatException.forInputString("", radix);
if (i == len
& result <= 0
& (MIN_VALUE < result || fc == '-')) {
return fc == '-' ? result : -result;
}
int digit = ~0xFF;
int i = 0;
char firstChar = s.charAt(i++);
if (firstChar != '-' && firstChar != '+') {
digit = digit(firstChar, radix);
throw NumberFormatException.forInputString(s);
}

private static int parseInt0(String s, int radix) {
if (s == null) {
throw NumberFormatException.nullInput();
}
if (digit >= 0 || digit == ~0xFF && len > 1) {
int limit = firstChar != '-' ? MIN_VALUE + 1 : MIN_VALUE;
int multmin = limit / radix;
int result = -(digit & 0xFF);
boolean inRange = true;
/* Accumulating negatively avoids surprises near MAX_VALUE */
while (i < len && (digit = digit(s.charAt(i++), radix)) >= 0
&& (inRange = result > multmin
|| result == multmin && digit <= radix * multmin - limit)) {
result = radix * result - digit;
}
if (inRange && i == len && digit >= 0) {
return firstChar != '-' ? -result : result;
}
int len;
if ((len = s.length()) == 0) {
throw NumberFormatException.forInputString(s);
}
throw NumberFormatException.forInputString(s, radix);
return parseInt(s, 0, len, radix);
Copy link
Contributor

@j3graham j3graham Jan 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this call will result in different exception messages in some cases. Consolidating the exception messages between the String and the CharSequence parsing methods would likely allow for even more code simplification, but currently it appears there was effort to preserve the exact message text in exceptions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we want to keep the original error message completely, we need to keep the original implementation, which will lead to code duplication.

}

static boolean isDigitLatin1(int ch) {
return CharacterDataLatin1.instance.isDigit(ch);
}

/**
Expand Down
72 changes: 38 additions & 34 deletions src/java.base/share/classes/java/lang/Long.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright (c) 1994, 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2025, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -552,46 +553,49 @@ public static String toUnsignedString(long i) {
*/
public static long parseLong(String s, int radix)
throws NumberFormatException {
if (s == null) {
throw new NumberFormatException("Cannot parse null string");
int len;
byte[] value;
if (s == null || radix != 10 || (len = (value = s.value()).length) == 0 || !s.isLatin1()) {
return parseLong0(s, radix);
}

if (radix < Character.MIN_RADIX) {
throw new NumberFormatException(String.format(
"radix %s less than Character.MIN_RADIX", radix));
/* Accumulating negatively avoids surprises near MAX_VALUE */
int fc = value[0];
long result = Integer.isDigitLatin1(fc)
? '0' - fc
: len != 1 && (fc == '-' || fc == '+')
? 0
: 1; // or any value > 0
int i = 1;
int d;
while (i + 1 < len
&& (d = DecimalDigits.digit2(value, i)) != -1
&& MIN_VALUE / 100 <= result & result <= 0) {
result = result * 100 - d; // overflow from d => result > 0
i += 2;
}

if (radix > Character.MAX_RADIX) {
throw new NumberFormatException(String.format(
"radix %s greater than Character.MAX_RADIX", radix));
if (i < len
&& Integer.isDigitLatin1(d = value[i])
&& MIN_VALUE / 10 <= result & result <= 0) {
result = result * 10 + '0' - d; // overflow from '0' - d => result > 0
i += 1;
}

int len = s.length();
if (len == 0) {
throw NumberFormatException.forInputString("", radix);
if (i == len
& result <= 0
& (MIN_VALUE < result || fc == '-')) {
return fc == '-' ? result : -result;
}
int digit = ~0xFF;
int i = 0;
char firstChar = s.charAt(i++);
if (firstChar != '-' && firstChar != '+') {
digit = digit(firstChar, radix);
throw NumberFormatException.forInputString(s);
}

private static long parseLong0(String s, int radix) {
if (s == null) {
throw NumberFormatException.nullInput();
}
if (digit >= 0 || digit == ~0xFF && len > 1) {
long limit = firstChar != '-' ? MIN_VALUE + 1 : MIN_VALUE;
long multmin = limit / radix;
long result = -(digit & 0xFF);
boolean inRange = true;
/* Accumulating negatively avoids surprises near MAX_VALUE */
while (i < len && (digit = digit(s.charAt(i++), radix)) >= 0
&& (inRange = result > multmin
|| result == multmin && digit <= (int) (radix * multmin - limit))) {
result = radix * result - digit;
}
if (inRange && i == len && digit >= 0) {
return firstChar != '-' ? -result : result;
}
int len;
if ((len = s.length()) == 0) {
throw NumberFormatException.forInputString(s);
}
throw NumberFormatException.forInputString(s, radix);
return parseLong(s, 0, len, radix);
}

/**
Expand Down
16 changes: 15 additions & 1 deletion src/java.base/share/classes/java/lang/NumberFormatException.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1994, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1994, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -54,6 +54,16 @@ public NumberFormatException (String s) {
super (s);
}

/**
* Factory method for making a {@code NumberFormatException}
* given the specified input which caused the error.
*
* @param s the input causing the error
*/
static NumberFormatException forInputString(String s) {
return forInputString(s, 10);
}

/**
* Factory method for making a {@code NumberFormatException}
* given the specified input which caused the error.
Expand Down Expand Up @@ -82,4 +92,8 @@ static NumberFormatException forCharSequence(CharSequence s,
+ (errorIndex - beginIndex) + " in: \""
+ s.subSequence(beginIndex, endIndex) + "\"");
}

static NumberFormatException nullInput() {
return new NumberFormatException("Cannot parse null string");
}
}
48 changes: 48 additions & 0 deletions src/java.base/share/classes/jdk/internal/util/DecimalDigits.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2025, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -132,6 +133,53 @@ public static int stringSize(long x) {
return 19 + d;
}

/**
* Determine whether the two character in str are both digits. If they are, return (str[offset] - '0') * 10 + (str[offset + 1] - '0'), otherwise return -1
* @param str The input LATIN1 encoded String value
* @param offset the offset
* @return If both characters are digits, return (str[offset] - '0') * 10 + (str[offset + 1] - '0'), otherwise return -1
*/
public static int digit2(byte[] str, int offset) {
// Used by trusted callers. Assumes all necessary bounds checks have been done by the caller.
/*
Here we are doing a 2-Byte Vector operation on the short type.

x & 0xF0 != 0x30
---------------
0 0b0011_0000 & 0b1111_0000 = 0b0011_0000
1 0b0011_0001 & 0b1111_0000 = 0b0011_0000
2 0b0011_0010 & 0b1111_0000 = 0b0011_0000
3 0b0011_0011 & 0b1111_0000 = 0b0011_0000
4 0b0011_0100 & 0b1111_0000 = 0b0011_0000
5 0b0011_0101 & 0b1111_0000 = 0b0011_0000
6 0b0011_0110 & 0b1111_0000 = 0b0011_0000
7 0b0011_0111 & 0b1111_0000 = 0b0011_0000
8 0b0011_1000 & 0b1111_0000 = 0b0011_0000
9 0b0011_1001 & 0b1111_0000 = 0b0011_0000

(((d = x & 0x0F) + 0x06) & 0xF0) != 0
---------------
0 ((0b0011_0000) & 0b0000_1111 + 0b0110_0000) & 0b1111_0000 = 0b0110_0000
1 ((0b0011_0001) & 0b0000_1111 + 0b0110_0000) & 0b1111_0000 = 0b0110_0000
2 ((0b0011_0010) & 0b0000_1111 + 0b0110_0000) & 0b1111_0000 = 0b0110_0000
3 ((0b0011_0011) & 0b0000_1111 + 0b0110_0000) & 0b1111_0000 = 0b0110_0000
4 ((0b0011_0100) & 0b0000_1111 + 0b0110_0000) & 0b1111_0000 = 0b0110_0000
5 ((0b0011_0101) & 0b0000_1111 + 0b0110_0000) & 0b1111_0000 = 0b0110_0000
6 ((0b0011_0110) & 0b0000_1111 + 0b0110_0000) & 0b1111_0000 = 0b0110_0000
7 ((0b0011_0111) & 0b0000_1111 + 0b0110_0000) & 0b1111_0000 = 0b0110_0000
8 ((0b0011_1000) & 0b0000_1111 + 0b0110_0000) & 0b1111_0000 = 0b0110_0000
9 ((0b0011_1001) & 0b0000_1111 + 0b0110_0000) & 0b1111_0000 = 0b0110_0000
*/
int d;
short x = UNSAFE.getShortUnaligned(str, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, false);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@wenshao I'm a bit worried about the use of Unsafe here.

This method is public (although in an internal package), and while it is used correctly in this PR, there's no warning in the doc that str and offset must come from a trusted caller that ensures that they are safe to use with, well..., Unsafe.

Did you consider safer alternatives, like usage of VarHandle, even if that might mean a slight performance degradation?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Never mind, VarHandle cannot be used in this case.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using VarHandle in this scenario may affect JVM startup performance

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I concur that unsafe usage here is undesirable. Performance is not the only metric of interest when working on the JDK core libraries.

if ((((x & 0xF0F0) - 0x3030)
| (((d = x & 0x0F0F) + 0x0606) & 0xF0F0)) != 0
) {
return -1;
}
return (d & 0xF) * 10 + (d >> 8);
}

/**
* Places characters representing the integer i into the
* character array buf. The characters are placed into
Expand Down
4 changes: 2 additions & 2 deletions test/jdk/com/sun/jdi/JdbExprTest.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -112,7 +112,7 @@ protected void runCases() {
execCommand(JdbCommand.set("JdbExprTestTarg.anInt", "0x80000000"))
.shouldMatch("InvalidTypeException: .* convert 2147483648 to int");
execCommand(JdbCommand.set("JdbExprTestTarg.anInt", "0x8000000000000000L"))
.shouldContain("java.lang.NumberFormatException: For input string: \"8000000000000000\"");
.shouldContain("java.lang.NumberFormatException: Error at index 15 in: \"8000000000000000\"");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copyright year should be adjusted.


execCommand(JdbCommand.set("JdbExprTestTarg.anInt", "0x7fffffff"))
.shouldContain("0x7fffffff = 2147483647");
Expand Down
9 changes: 8 additions & 1 deletion test/micro/org/openjdk/bench/java/lang/Longs.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -78,6 +78,13 @@ public void toStringSmall(Blackhole bh) {
}
}

@Benchmark
public void parseLong(Blackhole bh) {
for (String s : strings) {
bh.consume(Long.parseLong(s));
}
}

@Benchmark
public void decode(Blackhole bh) {
for (String s : strings) {
Expand Down