|
44 | 44 | import org.jruby.truffle.runtime.rubinius.RubiniusByteArray; |
45 | 45 | import org.jruby.truffle.runtime.util.ArrayUtils; |
46 | 46 | import org.jruby.util.ByteList; |
| 47 | +import org.jruby.util.CodeRangeSupport; |
47 | 48 | import org.jruby.util.Pack; |
48 | 49 | import org.jruby.util.StringSupport; |
49 | 50 | import org.jruby.util.io.EncodingUtils; |
@@ -847,20 +848,74 @@ public Object eachChar(VirtualFrame frame, RubyString string, @SuppressWarnings( |
847 | 848 | return toEnumNode.call(frame, string, "to_enum", null, getContext().newSymbol("each_char")); |
848 | 849 | } |
849 | 850 |
|
850 | | - @Specialization |
| 851 | + @Specialization(guards = "isValidOr7BitEncoding") |
851 | 852 | public RubyString eachChar(VirtualFrame frame, RubyString string, RubyProc block) { |
852 | | - notDesignedForCompilation(); |
| 853 | + ByteList strByteList = string.getByteList(); |
| 854 | + byte[] ptrBytes = strByteList.unsafeBytes(); |
| 855 | + int ptr = strByteList.begin(); |
| 856 | + int len = strByteList.getRealSize(); |
| 857 | + Encoding enc = string.getBytes().getEncoding(); |
853 | 858 |
|
854 | | - // TODO (nirvdrum 04-Feb-15): This needs to support Ruby' encoding and code range semantics. For now, this hack will suffice for very simple Strings. |
855 | | - final String javaString = string.toString(); |
| 859 | + final int stringLength = string.getBytes().length(); |
| 860 | + int n; |
| 861 | + |
| 862 | + for (int i = 0; i < stringLength; i += n) { |
| 863 | + n = StringSupport.encFastMBCLen(ptrBytes, ptr + i, ptr + len, enc); |
856 | 864 |
|
857 | | - for (int i = 0; i < javaString.length(); i++) { |
858 | | - yield(frame, block, getContext().makeString(javaString.charAt(i))); |
| 865 | + yield(frame, block, substr(string, i, n)); |
859 | 866 | } |
860 | 867 |
|
861 | 868 | return string; |
862 | 869 | } |
863 | 870 |
|
| 871 | + @Specialization(guards = "!isValidOr7BitEncoding") |
| 872 | + public RubyString eachCharMultiByteEncoding(VirtualFrame frame, RubyString string, RubyProc block) { |
| 873 | + ByteList strByteList = string.getByteList(); |
| 874 | + byte[] ptrBytes = strByteList.unsafeBytes(); |
| 875 | + int ptr = strByteList.begin(); |
| 876 | + int len = strByteList.getRealSize(); |
| 877 | + Encoding enc = string.getBytes().getEncoding(); |
| 878 | + |
| 879 | + final int stringLength = string.getBytes().length(); |
| 880 | + int n; |
| 881 | + |
| 882 | + for (int i = 0; i < stringLength; i += n) { |
| 883 | + n = multiByteStringLength(enc, ptrBytes, ptr + i, ptr + len); |
| 884 | + |
| 885 | + yield(frame, block, substr(string, i, n)); |
| 886 | + } |
| 887 | + |
| 888 | + return string; |
| 889 | + } |
| 890 | + |
| 891 | + public static boolean isValidOr7BitEncoding(RubyString string) { |
| 892 | + return string.isCodeRangeValid() || CodeRangeSupport.isCodeRangeAsciiOnly(string); |
| 893 | + } |
| 894 | + |
| 895 | + @TruffleBoundary |
| 896 | + private int multiByteStringLength(Encoding enc, byte[] bytes, int p, int end) { |
| 897 | + return StringSupport.length(enc, bytes, p, end); |
| 898 | + } |
| 899 | + |
| 900 | + // TODO (nirvdrum 10-Mar-15): This was extracted from JRuby, but likely will need to become a Rubinius primitive. |
| 901 | + private Object substr(RubyString string, int beg, int len) { |
| 902 | + final ByteList bytes = string.getBytes(); |
| 903 | + |
| 904 | + int length = bytes.length(); |
| 905 | + if (len < 0 || beg > length) return getContext().getCoreLibrary().getNilObject(); |
| 906 | + |
| 907 | + if (beg < 0) { |
| 908 | + beg += length; |
| 909 | + if (beg < 0) getContext().getCoreLibrary().getNilObject(); |
| 910 | + } |
| 911 | + |
| 912 | + int end = Math.min(length, beg + len); |
| 913 | + |
| 914 | + final ByteList substringBytes = new ByteList(bytes, beg, end - beg); |
| 915 | + substringBytes.setEncoding(bytes.getEncoding()); |
| 916 | + |
| 917 | + return getContext().makeString(string.getLogicalClass(), substringBytes); |
| 918 | + } |
864 | 919 | } |
865 | 920 |
|
866 | 921 | @CoreMethod(names = "empty?") |
|
0 commit comments