Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ji] RubyString implements CharSequence #5180

Merged
merged 4 commits into from May 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
105 changes: 87 additions & 18 deletions core/src/main/java/org/jruby/RubyString.java
Expand Up @@ -117,7 +117,7 @@
*
*/
@JRubyClass(name="String", include={"Enumerable", "Comparable"})
public class RubyString extends RubyObject implements EncodingCapable, MarshalEncoding, CodeRangeable {
public class RubyString extends RubyObject implements CharSequence, EncodingCapable, MarshalEncoding, CodeRangeable {
public static final String DEBUG_INFO_FIELD = "@debug_created_info";

private static final ASCIIEncoding ASCII = ASCIIEncoding.INSTANCE;
Expand Down Expand Up @@ -2518,14 +2518,17 @@ public int size() {
return value.getRealSize();
}

/** rb_str_length
*
*/
public RubyFixnum length() {
return length19();
// MRI: rb_str_length
@JRubyMethod(name = {"length", "size"})
public RubyFixnum rubyLength(final ThreadContext context) {
return rubyLength(context.runtime);
}

@JRubyMethod(name = {"length", "size"})
private RubyFixnum rubyLength(final Ruby runtime) {
return runtime.newFixnum(strLength());
}

@Deprecated
public RubyFixnum length19() {
return getRuntime().newFixnum(strLength());
}
Expand All @@ -2535,6 +2538,38 @@ public RubyFixnum bytesize() {
return getRuntime().newFixnum(value.getRealSize());
}


// CharSequence

@Override
public int length() {
return strLength();
}

@Override
public char charAt(int offset) {
int length = value.getRealSize();

if (length < 1) throw new StringIndexOutOfBoundsException(offset);

Encoding enc = value.getEncoding();
if (singleByteOptimizable(enc)) {
if (offset >= length || offset < 0) throw new StringIndexOutOfBoundsException(offset);
return (char) value.get(offset);
}

return multibyteCharAt(enc, offset, length);
}

@Override
public CharSequence subSequence(int start, int end) {
IRubyObject subStr = substr19(getRuntime(), start, end - start);
if (subStr.isNil()) {
throw new StringIndexOutOfBoundsException("String index out of range: <" + start + ", " + end + ")");
}
return (RubyString) subStr;
}

private SizeFn eachByteSizeFn() {
final RubyString self = this;
return new SizeFn() {
Expand Down Expand Up @@ -3337,7 +3372,7 @@ private IRubyObject multibyteSubstr19(Ruby runtime, Encoding enc, int len, int b
int p;
int s = value.getBegin();
int end = s + length;
byte[]bytes = value.getUnsafeBytes();
byte[] bytes = value.getUnsafeBytes();

if (beg < 0) {
if (len > -beg) len = -beg;
Expand Down Expand Up @@ -3381,6 +3416,38 @@ private IRubyObject multibyteSubstr19(Ruby runtime, Encoding enc, int len, int b
return makeShared(runtime, p - s, len);
}

private char multibyteCharAt(Encoding enc, int beg, int length) {
int p;
int s = value.getBegin();
int end = s + length;
byte[] bytes = value.getUnsafeBytes();


if (beg > 0 && beg > StringSupport.strLengthFromRubyString(this, enc)) {
throw new StringIndexOutOfBoundsException(beg);
}

if (isCodeRangeValid() && enc.isUTF8()) {
p = StringSupport.utf8Nth(bytes, s, end, beg);
} else if (enc.isFixedWidth()) {
int w = enc.maxLength();
p = s + beg * w;
if (p > end || w > end - p) {
throw new StringIndexOutOfBoundsException(beg);
}
} else if ((p = StringSupport.nth(enc, bytes, s, end, beg)) == end) {
throw new StringIndexOutOfBoundsException(beg);
}
int codepoint = enc.mbcToCode(bytes, p, end);

if (Character.isBmpCodePoint(codepoint)) {
return (char) codepoint;
}

// we can only return high surrogate here
return Character.highSurrogate(codepoint);
}

/* rb_str_splice */
private IRubyObject replaceInternal(int beg, int len, RubyString repl) {
StringSupport.replaceInternal(beg, len, this, repl);
Expand Down Expand Up @@ -5543,7 +5610,7 @@ private SizeFn eachCharSizeFn() {
return new SizeFn() {
@Override
public IRubyObject size(IRubyObject[] args) {
return self.length();
return self.rubyLength(getRuntime());
}
};
}
Expand Down Expand Up @@ -5651,7 +5718,7 @@ private IRubyObject enumerateCodepoints(ThreadContext context, String name, Bloc
}
else {
if (wantarray)
ary = RubyArray.newArray(runtime, str.length().getLongValue());
ary = RubyArray.newArray(runtime, str.length());
else
return enumeratorizeWithSize(context, str, name, eachCodepointSizeFn());
}
Expand Down Expand Up @@ -5704,26 +5771,24 @@ private IRubyObject enumerateBytes(ThreadContext context, String name, Block blo
}

private SizeFn eachCodepointSizeFn() {
final RubyString self = this;
return new SizeFn() {
@Override
public IRubyObject size(IRubyObject[] args) {
return self.length();
return rubyLength(getRuntime());
}
};
}

private static ByteList GRAPHEME_CLUSTER_PATTERN = new ByteList(new byte[] {(byte)'\\', (byte)'X'});

private SizeFn eachGraphemeClusterSizeFn() {
final RubyString self = this;
return new SizeFn() {
@Override
public IRubyObject size(IRubyObject[] args) {
Ruby runtime = self.getRuntime();
ByteList value = self.getByteList();
Ruby runtime = getRuntime();
ByteList value = getByteList();
Encoding enc = value.getEncoding();
if (!enc.isUnicode() || isSingleByteOptimizable(self, enc)) return self.length();
if (!enc.isUnicode() || isSingleByteOptimizable(RubyString.this, enc)) return rubyLength(runtime);

Regex reg = RubyRegexp.getRegexpFromCache(runtime, GRAPHEME_CLUSTER_PATTERN, enc, RegexpOptions.NULL_OPTIONS);
int beg = value.getBegin();
Expand Down Expand Up @@ -6104,10 +6169,14 @@ public static ByteList encodeBytelist(CharSequence value, Encoding encoding) {

@Override
public <T> T toJava(Class<T> target) {
if (target.isAssignableFrom(String.class)) {
// converting on Comparable.class due target.isAssignableFrom(String.class) compatibility (< 9.2)
if (target == String.class || target == Comparable.class || target == Object.class) {
return target.cast(decodeString());
}
if (target.isAssignableFrom(ByteList.class)) {
if (target == CharSequence.class) { // explicitly here
return (T) this; // used to convert to java.lang.String (< 9.2)
}
if (target == ByteList.class) {
return target.cast(value);
}
if (target == Character.class || target == Character.TYPE) {
Expand Down
13 changes: 11 additions & 2 deletions spec/java_integration/types/coercion_spec.rb
Expand Up @@ -694,10 +694,10 @@ def receive_primitive_box(obj)
end

describe "when passed java.lang.CharSequence" do
it "coerces to java.lang.String" do
it "returns a RubyString" do
cs = "123".to_java java.lang.CharSequence

expect(cs.class).to eq(java.lang.String)
expect(cs.class).to eq(org.jruby.RubyString)
end
end

Expand All @@ -709,6 +709,15 @@ def receive_primitive_box(obj)
end
end

describe "when passed org.jruby.util.ByteList" do
it "coerces to java.lang.String" do
cs = "123".to_java 'org.jruby.util.ByteList'

expect(cs.class).to eq(org.jruby.util.ByteList)
expect(cs.toString).to eq('123')
end
end

describe "when passed void (java.lang.Void.TYPE)" do
it "coerces to null" do
cs = "123".to_java Java::java.lang.Void::TYPE
Expand Down
9 changes: 9 additions & 0 deletions test/jruby/test_helper.rb
Expand Up @@ -128,6 +128,15 @@ def assert_in_sub_runtime(script)
assert run_in_sub_runtime(script)
end

def assert_java_raises(type)
begin
yield
fail("expected to raise (#{type}) but did not")
rescue java.lang.Throwable => ex
raise(ex) unless ex.is_a?(type)
end
end

def self.included(base)
if defined? Test::Unit::TestCase
if base < Test::Unit::TestCase
Expand Down
13 changes: 13 additions & 0 deletions test/jruby/test_higher_javasupport.rb
Expand Up @@ -1378,6 +1378,19 @@ def test_string_from_bytes
assert_equal('foo', String.from_java_bytes('foo'.to_java_bytes))
end

def test_string_as_charsequence
str = 'fo0'.to_java('java.lang.CharSequence')
assert_equal 'o'.ord, str.charAt(1)
assert_equal 3, str.length
assert_equal 'f', str.subSequence(0, 1)
assert_equal 'o0', str.subSequence(1, 3)
assert 'fo0'.to_java.contentEquals('fo0')
assert_java_raises(java.lang.StringIndexOutOfBoundsException) { str.charAt(5) }
assert_java_raises(java.lang.StringIndexOutOfBoundsException) { str.charAt(-2) }
assert_java_raises(java.lang.StringIndexOutOfBoundsException) { str.subSequence(3, 2) }
assert_java_raises(java.lang.StringIndexOutOfBoundsException) { str.subSequence(0, -2) }
end

# JRUBY-2088
def test_package_notation_with_arguments
assert_raises(ArgumentError) do
Expand Down