Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactored invalid range validation for String#delete! and String#squeeze! #1736

70 changes: 47 additions & 23 deletions kernel/common/string.rb
Expand Up @@ -254,6 +254,30 @@ def delete(*strings)
str.delete!(*strings) || str
end

def delete!(*strings)
raise ArgumentError, "wrong number of arguments" if strings.empty?

self.modify!

table = count_table(*strings).__data__

i, j = 0, -1
while i < @num_bytes
c = @data[i]
unless table[c] == 1
@data[j+=1] = c
end
i += 1
end

if (j += 1) < @num_bytes
self.num_bytes = j
self
else
nil
end
end

def clear
Rubinius.check_frozen
self.num_bytes = 0
Expand Down Expand Up @@ -786,6 +810,29 @@ def squeeze(*strings)
str.squeeze!(*strings) || str
end

def squeeze!(*strings)
return if @num_bytes == 0
self.modify!

table = count_table(*strings).__data__

i, j, last = 1, 0, @data[0]
while i < @num_bytes
c = @data[i]
unless c == last and table[c] == 1
@data[j+=1] = last = c
end
i += 1
end

if (j += 1) < @num_bytes
self.num_bytes = j
self
else
nil
end
end

def start_with?(*prefixes)
prefixes.each do |prefix|
prefix = Rubinius::Type.check_convert_type prefix, String, :to_str
Expand Down Expand Up @@ -1088,29 +1135,6 @@ def compare_substring(other, start, size)
raise PrimitiveFailure, "String#compare_substring primitive failed"
end

def count_table(*strings)
table = String.pattern 256, 1

i, size = 0, strings.size
while i < size
str = StringValue(strings[i]).dup
if str.size > 1 && str.getbyte(0) == 94 # ?^
pos, neg = 0, 1
else
pos, neg = 1, 0
end

set = String.pattern 256, neg
str.tr_expand! nil, true
j, chars = -1, str.size
set.setbyte(str.getbyte(j), pos) while (j += 1) < chars

table.apply_and! set
i += 1
end
table
end

def tr_expand!(limit, invalid_as_empty)
Rubinius.primitive :string_tr_expand
raise PrimitiveFailure, "String#tr_expand primitive failed"
Expand Down
71 changes: 23 additions & 48 deletions kernel/common/string18.rb
Expand Up @@ -12,6 +12,29 @@ def self.allocate

alias_method :bytesize, :size

def count_table(*strings)
table = String.pattern 256, 1

i, size = 0, strings.size
while i < size
str = StringValue(strings[i]).dup
if str.size > 1 && str.getbyte(0) == 94 # ?^
pos, neg = 0, 1
else
pos, neg = 1, 0
end

set = String.pattern 256, neg
str.tr_expand! nil, true
j, chars = -1, str.size
set.setbyte(str.getbyte(j), pos) while (j += 1) < chars

table.apply_and! set
i += 1
end
table
end

def upto(stop, exclusive=false)
stop = StringValue(stop)
return self if self > stop
Expand All @@ -36,31 +59,6 @@ def reverse!
self
end

def delete!(*strings)
raise ArgumentError, "wrong number of arguments" if strings.empty?

self.modify!

table = count_table(*strings).__data__

i, j = 0, -1
while i < @num_bytes
c = @data[i]
unless table[c] == 1
@data[j+=1] = c
end
i += 1
end

if (j += 1) < @num_bytes
self.num_bytes = j
self
else
nil
end
end


def slice!(one, two=undefined)
# This is un-DRY, but it's a simple manual argument splitting. Keeps
# the code fast and clean since the sequence are pretty short.
Expand Down Expand Up @@ -90,29 +88,6 @@ def slice!(one, two=undefined)
result
end

def squeeze!(*strings)
return if @num_bytes == 0
self.modify!

table = count_table(*strings).__data__

i, j, last = 1, 0, @data[0]
while i < @num_bytes
c = @data[i]
unless c == last and table[c] == 1
@data[j+=1] = last = c
end
i += 1
end

if (j += 1) < @num_bytes
self.num_bytes = j
self
else
nil
end
end

def sub!(pattern, replacement=undefined)
# Copied mostly from sub to keep Regexp.last_match= working right.

Expand Down
92 changes: 31 additions & 61 deletions kernel/common/string19.rb
Expand Up @@ -22,6 +22,37 @@ def codepoints

alias_method :each_codepoint, :codepoints

def count_table(*strings)
table = String.pattern 256, 1

i, size = 0, strings.size
while i < size
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not validate here? Since here we already walk through the list of strings, now we walk it twice and that's probably more confusing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, I fixed that in antekpiechnik/rubinius@2de7d1a

str = StringValue(strings[i]).dup

if str =~ /.+\-.+/
ranges_found = str.scan(/\w{1}\-\w{1}/)
ranges_found.map{ |range| range.gsub(/-/, '').split('') }.each do |range_array|
raise ArgumentError, "invalid range \"#{range_array.join('-')}\" in string transliteration" unless range_array == range_array.sort
end
end

if str.size > 1 && str.getbyte(0) == 94 # ?^
pos, neg = 0, 1
else
pos, neg = 1, 0
end

set = String.pattern 256, neg
str.tr_expand! nil, true
j, chars = -1, str.size
set.setbyte(str.getbyte(j), pos) while (j += 1) < chars

table.apply_and! set
i += 1
end
table
end

def encode!(to=undefined, from=undefined, options=nil)
Rubinius.check_frozen

Expand Down Expand Up @@ -51,39 +82,6 @@ def prepend(other)
self
end

def delete!(*strings)
raise ArgumentError, "wrong number of arguments" if strings.empty?

strings.each do |string|
if string =~ /.+\-.+/
ranges_found = string.scan(/\w{1}\-\w{1}/)
ranges_found.map{ |range| range.gsub(/-/, '').split('') }.each do |range_array|
raise ArgumentError, "invalid range #{strings} in string transliteration" unless range_array == range_array.sort
end
end
end

self.modify!

table = count_table(*strings).__data__

i, j = 0, -1
while i < @num_bytes
c = @data[i]
unless table[c] == 1
@data[j+=1] = c
end
i += 1
end

if (j += 1) < @num_bytes
self.num_bytes = j
self
else
nil
end
end

def upto(stop, exclusive=false)
return to_enum :upto, stop, exclusive unless block_given?
stop = StringValue(stop)
Expand Down Expand Up @@ -121,34 +119,6 @@ def reverse!
self
end

def squeeze!(*strings)
if strings.first =~ /.+\-.+/
range = strings.first.gsub(/-/, '').split('')
raise ArgumentError, "invalid range #{strings} in string transliteration" unless range == range.sort
end

return if @num_bytes == 0
self.modify!

table = count_table(*strings).__data__

i, j, last = 1, 0, @data[0]
while i < @num_bytes
c = @data[i]
unless c == last and table[c] == 1
@data[j+=1] = last = c
end
i += 1
end

if (j += 1) < @num_bytes
self.num_bytes = j
self
else
nil
end
end

def sub!(pattern, replacement=undefined)
# Copied mostly from sub to keep Regexp.last_match= working right.

Expand Down
58 changes: 53 additions & 5 deletions spec/core/string/count_table_spec.rb
Expand Up @@ -20,22 +20,70 @@
256.times { |i| t.getbyte(i).should == ("-|*\030 abcde".include?(i.chr) ? 1 : 0) }
end

it "returns a String with entries corresponding to sequences set to 1" do
t = "".count_table "-ab-g\\d-abhi--A\\"
it "returns a String with entries corresponding to sequences set to 1 without an invalid range" do
t = "".count_table "-ab-g\\f-hbhi--A\\"
256.times { |i|
t.getbyte(i).should == ("-abcdefgbhA\\".include?(i.chr) ? 1 : 0)
t.getbyte(i).should == ("-abcdefgfghbhA\\".include?(i.chr) ? 1 : 0)
}
end

ruby_version_is "" ... "1.9" do
it "returns a String with entries corresponding to sequences set to 1 ignoring an invalid range" do
t = "".count_table "-ab-g\\d-abhi--A\\"
256.times { |i|
t.getbyte(i).should == ("-abcdefgbhA\\".include?(i.chr) ? 1 : 0)
}
end
end

ruby_version_is "1.9" do
it "raises an exception for input provided with invalid range" do
lambda { "".count_table "-ab-g\\d-abhi--A\\" }.should raise_error(ArgumentError)
end
end

it "returns a String with entries corresponding included and excluded characters" do
t = "".count_table "ab", "^bc"
256.times { |i| t.getbyte(i).should == (i == 97 ? 1 : 0) }
end

it "returns a String with entries set to false for '^abc-e' inverse match strings" do
t = "".count_table "^ab-g\\d-abhi--A\\"
t = "".count_table "^ab-g\\f-hbhi--A\\"
256.times { |i|
t.getbyte(i).should == ("abcdefgbhA\\".include?(i.chr) ? 0 : 1)
t.getbyte(i).should == ("abcdefgfghbhA\\".include?(i.chr) ? 0 : 1)
}
end

ruby_version_is "" ... "1.9" do
it "returns a String with entries set to false for '^abc-e' inverse match strings ignoring the invalid range" do
t = "".count_table "^ab-g\\d-abhi--A\\"
256.times { |i|
t.getbyte(i).should == ("abcdefgbhA\\".include?(i.chr) ? 0 : 1)
}
end
end

ruby_version_is "1.9" do
it "raises an exception for '^abc-e' inverse match strings with an invalid range" do
lambda { "".count_table "^ab-g\\d-abhi--A\\" }.should raise_error(ArgumentError)
end
end

describe "input validation" do
it "raises no exception when input is correct" do
lambda { "".count_table("abc") }.should_not raise_error
end

it "raises no exception when input contains a correct range" do
lambda { "".count_table("a-c") }.should_not raise_error
end

ruby_version_is "1.9" do
it "raises a proper ArgumentError when input contains a range out of sequence" do
lambda { "".count_table("h-e") }.should raise_error(ArgumentError, 'invalid range "h-e" in string transliteration')
lambda { "".count_table("^h-e") }.should raise_error(ArgumentError, 'invalid range "h-e" in string transliteration')
lambda { "".count_table("abc", "ash-e") }.should raise_error(ArgumentError, 'invalid range "h-e" in string transliteration')
end
end
end
end
1 change: 1 addition & 0 deletions spec/ruby/core/string/delete_spec.rb
Expand Up @@ -64,6 +64,7 @@
it "raises if the given ranges are invalid" do
lambda { "hello".delete("h-e") }.should raise_error(ArgumentError)
lambda { "hello".delete("^h-e") }.should raise_error(ArgumentError)
lambda { "hello".delete("abc", "h-e") }.should raise_error(ArgumentError)
end
end

Expand Down
1 change: 1 addition & 0 deletions spec/ruby/core/string/squeeze_spec.rb
Expand Up @@ -123,6 +123,7 @@
s = "--subbookkeeper--"
lambda { s.squeeze!("e-b") }.should raise_error(ArgumentError)
lambda { s.squeeze!("^e-b") }.should raise_error(ArgumentError)
lambda { s.squeeze!("abc", "e-b") }.should raise_error(ArgumentError)
end
end

Expand Down