Skip to content

Commit

Permalink
[GR-19220] Add String#bytesplice
Browse files Browse the repository at this point in the history
PullRequest: truffleruby/3828
  • Loading branch information
andrykonchin authored and eregon committed May 26, 2023
2 parents 2b4657a + f0cbd62 commit 2372d09
Show file tree
Hide file tree
Showing 7 changed files with 195 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Compatibility:
* Fix `Range#size` and return `nil` for beginningless Range when end isn't Numeric (#3039, @rwstauner).
* Alias `String#-@` to `String#dedup` (#3039, @itarato).
* Fix `Pathname#relative_path_from` to convert string arguments to Pathname objects (@rwstauner).
* Add `String#bytesplice` (#3039, @itarato).

Performance:

Expand Down
133 changes: 133 additions & 0 deletions spec/ruby/core/string/bytesplice_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# -*- encoding: utf-8 -*-
require_relative '../../spec_helper'

describe "String#bytesplice" do
ruby_version_is "3.2" do
it "raises IndexError when index is less than -bytesize" do
-> { "hello".bytesplice(-6, 0, "xxx") }.should raise_error(IndexError, "index -6 out of string")
end

it "raises IndexError when index is greater than bytesize" do
-> { "hello".bytesplice(6, 0, "xxx") }.should raise_error(IndexError, "index 6 out of string")
end

it "raises IndexError for negative length" do
-> { "abc".bytesplice(0, -2, "") }.should raise_error(IndexError, "negative length -2")
end

it "replaces with integer indices" do
"hello".bytesplice(-5, 0, "xxx").should == "xxxhello"
"hello".bytesplice(0, 0, "xxx").should == "xxxhello"
"hello".bytesplice(0, 1, "xxx").should == "xxxello"
"hello".bytesplice(0, 5, "xxx").should == "xxx"
"hello".bytesplice(0, 6, "xxx").should == "xxx"
end

it "raises RangeError when range left boundary is less than -bytesize" do
-> { "hello".bytesplice(-6...-6, "xxx") }.should raise_error(RangeError, "-6...-6 out of range")
end

it "replaces with ranges" do
"hello".bytesplice(-5...-5, "xxx").should == "xxxhello"
"hello".bytesplice(0...0, "xxx").should == "xxxhello"
"hello".bytesplice(0..0, "xxx").should == "xxxello"
"hello".bytesplice(0...1, "xxx").should == "xxxello"
"hello".bytesplice(0..1, "xxx").should == "xxxllo"
"hello".bytesplice(0..-1, "xxx").should == "xxx"
"hello".bytesplice(0...5, "xxx").should == "xxx"
"hello".bytesplice(0...6, "xxx").should == "xxx"
end

it "raises TypeError when integer index is provided without length argument" do
-> { "hello".bytesplice(0, "xxx") }.should raise_error(TypeError, "wrong argument type Integer (expected Range)")
end

it "replaces on an empty string" do
"".bytesplice(0, 0, "").should == ""
"".bytesplice(0, 0, "xxx").should == "xxx"
end

it "mutates self" do
s = "hello"
s.bytesplice(2, 1, "xxx").should.equal?(s)
end

it "raises when string is frozen" do
s = "hello".freeze
-> { s.bytesplice(2, 1, "xxx") }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"")
end
end
end

describe "String#bytesplice with multibyte characters" do
ruby_version_is "3.2" do
it "raises IndexError when index is out of byte size boundary" do
-> { "こんにちは".bytesplice(-16, 0, "xxx") }.should raise_error(IndexError, "index -16 out of string")
end

it "raises IndexError when index is not on a codepoint boundary" do
-> { "こんにちは".bytesplice(1, 0, "xxx") }.should raise_error(IndexError, "offset 1 does not land on character boundary")
end

it "raises IndexError when length is not matching the codepoint boundary" do
-> { "こんにちは".bytesplice(0, 1, "xxx") }.should raise_error(IndexError, "offset 1 does not land on character boundary")
-> { "こんにちは".bytesplice(0, 2, "xxx") }.should raise_error(IndexError, "offset 2 does not land on character boundary")
end

it "replaces with integer indices" do
"こんにちは".bytesplice(-15, 0, "xxx").should == "xxxこんにちは"
"こんにちは".bytesplice(0, 0, "xxx").should == "xxxこんにちは"
"こんにちは".bytesplice(0, 3, "xxx").should == "xxxんにちは"
"こんにちは".bytesplice(3, 3, "はは").should == "こははにちは"
"こんにちは".bytesplice(15, 0, "xxx").should == "こんにちはxxx"
end

it "replaces with range" do
"こんにちは".bytesplice(-15...-16, "xxx").should == "xxxこんにちは"
"こんにちは".bytesplice(0...0, "xxx").should == "xxxこんにちは"
"こんにちは".bytesplice(0..2, "xxx").should == "xxxんにちは"
"こんにちは".bytesplice(0...3, "xxx").should == "xxxんにちは"
"こんにちは".bytesplice(0..5, "xxx").should == "xxxにちは"
"こんにちは".bytesplice(0..-1, "xxx").should == "xxx"
"こんにちは".bytesplice(0...15, "xxx").should == "xxx"
"こんにちは".bytesplice(0...18, "xxx").should == "xxx"
end

it "treats negative length for range as 0" do
"こんにちは".bytesplice(0...-100, "xxx").should == "xxxこんにちは"
"こんにちは".bytesplice(3...-100, "xxx").should == "こxxxんにちは"
"こんにちは".bytesplice(-15...-100, "xxx").should == "xxxこんにちは"
end

it "raises when ranges not match codepoint boundaries" do
-> { "こんにちは".bytesplice(0..0, "x") }.should raise_error(IndexError, "offset 1 does not land on character boundary")
-> { "こんにちは".bytesplice(0..1, "x") }.should raise_error(IndexError, "offset 2 does not land on character boundary")
# Begin is incorrect
-> { "こんにちは".bytesplice(-4..-1, "x") }.should raise_error(IndexError, "offset 11 does not land on character boundary")
-> { "こんにちは".bytesplice(-5..-1, "x") }.should raise_error(IndexError, "offset 10 does not land on character boundary")
# End is incorrect
-> { "こんにちは".bytesplice(-3..-2, "x") }.should raise_error(IndexError, "offset 14 does not land on character boundary")
-> { "こんにちは".bytesplice(-3..-3, "x") }.should raise_error(IndexError, "offset 13 does not land on character boundary")
end

it "deals with a different encoded argument" do
s = "こんにちは"
s.encoding.should == Encoding::UTF_8
sub = "xxxxxx"
sub.force_encoding(Encoding::US_ASCII)

result = s.bytesplice(0, 3, sub)
result.should == "xxxxxxんにちは"
result.encoding.should == Encoding::UTF_8

s = "xxxxxx"
s.force_encoding(Encoding::US_ASCII)
sub = "こんにちは"
sub.encoding.should == Encoding::UTF_8

result = s.bytesplice(0, 3, sub)
result.should == "こんにちはxxx"
result.encoding.should == Encoding::UTF_8
end
end
end
1 change: 1 addition & 0 deletions spec/tags/truffle/methods_tags.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,4 @@ fails:Public methods on Thread should include native_thread_id
fails:Public methods on UnboundMethod should include private?
fails:Public methods on UnboundMethod should include protected?
fails:Public methods on UnboundMethod should include public?
fails:Public methods on String should not include bytesplice
1 change: 1 addition & 0 deletions spec/truffleruby.next-specs
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ spec/ruby/core/hash/shift_spec.rb
spec/ruby/core/range/size_spec.rb

spec/ruby/core/string/dedup_spec.rb
spec/ruby/core/string/bytesplice_spec.rb
12 changes: 12 additions & 0 deletions src/main/java/org/truffleruby/core/string/StringNodes.java
Original file line number Diff line number Diff line change
Expand Up @@ -4459,4 +4459,16 @@ private String formatTooLongError(int count, RubyString string) {

}

@Primitive(name = "string_is_character_head?", lowerFixnum = 2)
public abstract static class IsCharacterHeadPrimitiveNode extends PrimitiveArrayArgumentsNode {

@Specialization
protected boolean isCharacterHead(RubyEncoding enc, Object string, int byteOffset,
@Cached RubyStringLibrary libString,
@Cached IsCharacterHeadNode isCharacterHeadNode) {
var tstring = libString.getTString(string);
return isCharacterHeadNode.execute(enc, tstring, byteOffset);
}
}

}
43 changes: 43 additions & 0 deletions src/main/ruby/truffleruby/core/string.rb
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,49 @@ def byteslice(index_or_range, length = undefined)
byteslice index, length
end

def bytesplice(index_or_range, length = undefined, str)
is_range = Primitive.is_a?(index_or_range, Range)

if Primitive.undefined?(length)
raise TypeError, "wrong argument type #{Primitive.class(index_or_range)} (expected Range)" unless is_range

start, len = Primitive.range_normalized_start_length(index_or_range, bytesize)
len = 0 if len < 0
else
start = Primitive.rb_to_int(index_or_range)
start += bytesize if start < 0
len = Primitive.rb_to_int(length)
end

str = StringValue(str)

if len < 0
raise IndexError, "negative length #{len}"
end

if bytesize < start || start < 0
if is_range
raise RangeError, "#{index_or_range} out of range"
else
raise IndexError, "index #{index_or_range} out of string"
end
end

len = bytesize - start if len > bytesize - start
finish = start + len

if start < bytesize && !Primitive.string_is_character_head?(encoding, self, start)
raise IndexError, "offset #{start} does not land on character boundary"
end
if finish < bytesize && !Primitive.string_is_character_head?(encoding, self, finish)
raise IndexError, "offset #{finish} does not land on character boundary"
end

Primitive.check_mutable_string(self)
enc = Primitive.encoding_ensure_compatible_str(self, str)
Primitive.string_splice(self, str, start, len, enc)
end

def self.try_convert(obj)
Truffle::Type.try_convert obj, String, :to_str
end
Expand Down
4 changes: 4 additions & 0 deletions src/main/ruby/truffleruby/core/truffle/polyglot_methods.rb
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ def byteslice(...)
to_s.byteslice(...)
end

def bytesplice(...)
to_s.bytesplice(...)
end

def capitalize(...)
to_s.capitalize(...)
end
Expand Down

0 comments on commit 2372d09

Please sign in to comment.