Skip to content

Commit 8299588

Browse files
committed
[Truffle] String#each_char is now multibyte-encoding aware.
1 parent dcee5f0 commit 8299588

File tree

3 files changed

+61
-18
lines changed

3 files changed

+61
-18
lines changed

spec/truffle/tags/core/string/chars_tags.txt

Lines changed: 0 additions & 6 deletions
This file was deleted.

spec/truffle/tags/core/string/each_char_tags.txt

Lines changed: 0 additions & 6 deletions
This file was deleted.

truffle/src/main/java/org/jruby/truffle/nodes/core/StringNodes.java

Lines changed: 61 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import org.jruby.truffle.runtime.rubinius.RubiniusByteArray;
4545
import org.jruby.truffle.runtime.util.ArrayUtils;
4646
import org.jruby.util.ByteList;
47+
import org.jruby.util.CodeRangeSupport;
4748
import org.jruby.util.Pack;
4849
import org.jruby.util.StringSupport;
4950
import org.jruby.util.io.EncodingUtils;
@@ -847,20 +848,74 @@ public Object eachChar(VirtualFrame frame, RubyString string, @SuppressWarnings(
847848
return toEnumNode.call(frame, string, "to_enum", null, getContext().newSymbol("each_char"));
848849
}
849850

850-
@Specialization
851+
@Specialization(guards = "isValidOr7BitEncoding")
851852
public RubyString eachChar(VirtualFrame frame, RubyString string, RubyProc block) {
852-
notDesignedForCompilation();
853+
ByteList strByteList = string.getByteList();
854+
byte[] ptrBytes = strByteList.unsafeBytes();
855+
int ptr = strByteList.begin();
856+
int len = strByteList.getRealSize();
857+
Encoding enc = string.getBytes().getEncoding();
853858

854-
// TODO (nirvdrum 04-Feb-15): This needs to support Ruby' encoding and code range semantics. For now, this hack will suffice for very simple Strings.
855-
final String javaString = string.toString();
859+
final int stringLength = string.getBytes().length();
860+
int n;
861+
862+
for (int i = 0; i < stringLength; i += n) {
863+
n = StringSupport.encFastMBCLen(ptrBytes, ptr + i, ptr + len, enc);
856864

857-
for (int i = 0; i < javaString.length(); i++) {
858-
yield(frame, block, getContext().makeString(javaString.charAt(i)));
865+
yield(frame, block, substr(string, i, n));
859866
}
860867

861868
return string;
862869
}
863870

871+
@Specialization(guards = "!isValidOr7BitEncoding")
872+
public RubyString eachCharMultiByteEncoding(VirtualFrame frame, RubyString string, RubyProc block) {
873+
ByteList strByteList = string.getByteList();
874+
byte[] ptrBytes = strByteList.unsafeBytes();
875+
int ptr = strByteList.begin();
876+
int len = strByteList.getRealSize();
877+
Encoding enc = string.getBytes().getEncoding();
878+
879+
final int stringLength = string.getBytes().length();
880+
int n;
881+
882+
for (int i = 0; i < stringLength; i += n) {
883+
n = multiByteStringLength(enc, ptrBytes, ptr + i, ptr + len);
884+
885+
yield(frame, block, substr(string, i, n));
886+
}
887+
888+
return string;
889+
}
890+
891+
public static boolean isValidOr7BitEncoding(RubyString string) {
892+
return string.isCodeRangeValid() || CodeRangeSupport.isCodeRangeAsciiOnly(string);
893+
}
894+
895+
@TruffleBoundary
896+
private int multiByteStringLength(Encoding enc, byte[] bytes, int p, int end) {
897+
return StringSupport.length(enc, bytes, p, end);
898+
}
899+
900+
// TODO (nirvdrum 10-Mar-15): This was extracted from JRuby, but likely will need to become a Rubinius primitive.
901+
private Object substr(RubyString string, int beg, int len) {
902+
final ByteList bytes = string.getBytes();
903+
904+
int length = bytes.length();
905+
if (len < 0 || beg > length) return getContext().getCoreLibrary().getNilObject();
906+
907+
if (beg < 0) {
908+
beg += length;
909+
if (beg < 0) getContext().getCoreLibrary().getNilObject();
910+
}
911+
912+
int end = Math.min(length, beg + len);
913+
914+
final ByteList substringBytes = new ByteList(bytes, beg, end - beg);
915+
substringBytes.setEncoding(bytes.getEncoding());
916+
917+
return getContext().makeString(string.getLogicalClass(), substringBytes);
918+
}
864919
}
865920

866921
@CoreMethod(names = "empty?")

0 commit comments

Comments
 (0)