Permalink
Browse files

Replace Unicode.u_unpack with String#codepoints

  • Loading branch information...
1 parent 51648a6 commit 3fe7ca1dbea75ae83cd2eb868ba3f8518c0849a4 @norman norman committed Jan 5, 2012
View
1 activesupport/lib/active_support/multibyte.rb
@@ -3,7 +3,6 @@
module ActiveSupport #:nodoc:
module Multibyte
- autoload :EncodingError, 'active_support/multibyte/exceptions'
autoload :Chars, 'active_support/multibyte/chars'
autoload :Unicode, 'active_support/multibyte/unicode'
View
4 activesupport/lib/active_support/multibyte/chars.rb
@@ -153,7 +153,7 @@ def normalize(form = nil)
# 'é'.length # => 2
# 'é'.mb_chars.decompose.to_s.length # => 3
def decompose
- chars(Unicode.decompose(:canonical, Unicode.u_unpack(@wrapped_string)).pack('U*'))
+ chars(Unicode.decompose(:canonical, @wrapped_string.codepoints.to_a).pack('U*'))
end
# Performs composition on all the characters.
@@ -162,7 +162,7 @@ def decompose
# 'é'.length # => 3
# 'é'.mb_chars.compose.to_s.length # => 2
def compose
- chars(Unicode.compose(Unicode.u_unpack(@wrapped_string)).pack('U*'))
+ chars(Unicode.compose(@wrapped_string.codepoints.to_a).pack('U*'))
end
# Returns the number of grapheme clusters in the string.
View
8 activesupport/lib/active_support/multibyte/exceptions.rb
@@ -1,8 +0,0 @@
-# encoding: utf-8
-
-module ActiveSupport #:nodoc:
- module Multibyte #:nodoc:
- # Raised when a problem with the encoding was found.
- class EncodingError < StandardError; end
- end
-end
View
19 activesupport/lib/active_support/multibyte/unicode.rb
@@ -61,19 +61,6 @@ def self.codepoints_to_pattern(array_of_codepoints) #:nodoc:
TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u
LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u
- # Unpack the string at codepoints boundaries. Raises an EncodingError when the encoding of the string isn't
- # valid UTF-8.
- #
- # Example:
- # Unicode.u_unpack('Café') # => [67, 97, 102, 233]
- def u_unpack(string)
- begin
- string.unpack 'U*'
- rescue ArgumentError
- raise EncodingError, 'malformed UTF-8 character'
- end
- end
-
# Detect whether the codepoint is in a certain character class. Returns +true+ when it's in the specified
# character class and +false+ otherwise. Valid character classes are: <tt>:cr</tt>, <tt>:lf</tt>, <tt>:l</tt>,
# <tt>:v</tt>, <tt>:lv</tt>, <tt>:lvt</tt> and <tt>:t</tt>.
@@ -89,7 +76,7 @@ def in_char_class?(codepoint, classes)
# Unicode.g_unpack('क्षि') # => [[2325, 2381], [2359], [2367]]
# Unicode.g_unpack('Café') # => [[67], [97], [102], [233]]
def g_unpack(string)
- codepoints = u_unpack(string)
+ codepoints = string.codepoints.to_a
unpacked = []
pos = 0
marker = 0
@@ -283,7 +270,7 @@ def tidy_bytes(string, force = false)
def normalize(string, form=nil)
form ||= @default_normalization_form
# See http://www.unicode.org/reports/tr15, Table 1
- codepoints = u_unpack(string)
+ codepoints = string.codepoints.to_a
case form
when :d
reorder_characters(decompose(:canonical, codepoints))
@@ -299,7 +286,7 @@ def normalize(string, form=nil)
end
def apply_mapping(string, mapping) #:nodoc:
- u_unpack(string).map do |codepoint|
+ string.each_codepoint.map do |codepoint|
cp = database.codepoints[codepoint]
if cp and (ncp = cp.send(mapping)) and ncp > 0
ncp
View
11 activesupport/test/multibyte_chars_test.rb
@@ -72,17 +72,6 @@ def test_consumes_utf8_strings
assert !@proxy_class.consumes?(BYTE_STRING)
end
- def test_unpack_utf8_strings
- assert_equal 4, ActiveSupport::Multibyte::Unicode.u_unpack(UNICODE_STRING).length
- assert_equal 5, ActiveSupport::Multibyte::Unicode.u_unpack(ASCII_STRING).length
- end
-
- def test_unpack_raises_encoding_error_on_broken_strings
- assert_raise(ActiveSupport::Multibyte::EncodingError) do
- ActiveSupport::Multibyte::Unicode.u_unpack(BYTE_STRING)
- end
- end
-
def test_concatenation_should_return_a_proxy_class_instance
assert_equal ActiveSupport::Multibyte.proxy_class, ('a'.mb_chars + 'b').class
assert_equal ActiveSupport::Multibyte.proxy_class, ('a'.mb_chars << 'b').class

0 comments on commit 3fe7ca1

Please sign in to comment.