Permalink
Browse files

Fixed String#inspect for 1.9. Closes #1945.

  • Loading branch information...
1 parent 4783470 commit 16a315686dce4002d37c630d0a89a9c9d7ca1692 @brixen brixen committed Nov 19, 2012
View
@@ -449,10 +449,6 @@ def insert(index, other)
ControlCharacters = [10, 9, 7, 11, 12, 13, 27, 8]
ControlPrintValue = ["\\n", "\\t", "\\a", "\\v", "\\f", "\\r", "\\e", "\\b"]
- def inspect
- "\"#{transform(Rubinius::CType::Printed, true)}\""
- end
-
def ljust(width, padstr=" ")
justify(width, :left, padstr)
end
@@ -5,6 +5,10 @@ class String
alias_method :bytesize, :size
+ def inspect
+ "\"#{transform(Rubinius::CType::Printed, true)}\""
+ end
+
def upto(stop, exclusive=false)
stop = StringValue(stop)
return self if self > stop
View
@@ -75,6 +75,145 @@ def force_encoding(enc)
self
end
+ def inspect
+ result_encoding = Encoding.default_internal || Encoding.default_external
+ unless result_encoding.ascii_compatible?
+ result_encoding = Encoding::US_ASCII
+ end
+
+ enc = encoding
+ ascii = enc.ascii_compatible?
+ enc_name = enc.name
+ unicode = enc_name.start_with?("UTF-") && enc_name[4] != ?7
+
+ if unicode
+ if enc.equal? Encoding::UTF_16
+ a = getbyte 0
+ b = getbyte 1
+
+ if a == 0xfe and b == 0xff
+ enc = Encoding::UTF_16BE
+ elsif a == 0xff and b == 0xfe
+ enc = Encoding::UTF_16LE
+ else
+ unicode = false
+ end
+ elsif enc.equal? Encoding::UTF_32
+ a = getbyte 0
+ b = getbyte 1
+ c = getbyte 2
+ d = getbyte 3
+
+ if a == 0 and b == 0 and c == 0xfe and d == 0xfe
+ enc = Encoding::UTF_32BE
+ elsif a == 0xff and b == 0xfe and c == 0 and d == 0
+ enc = Encoding::UTF_32LE
+ else
+ unicode = false
+ end
+ end
+ end
+
+ array = []
+
+ index = 0
+ total = bytesize
+ while index < total
+ char = chr_at index
+
+ if char
+ bs = char.bytesize
+
+ if (ascii or unicode) and bs == 1
+ escaped = nil
+
+ byte = getbyte(index)
+ if byte >= 7 and byte <= 92
+ case byte
+ when 7 # \a
+ escaped = '\a'
+ when 8 # \b
+ escaped = '\b'
+ when 9 # \t
+ escaped = '\t'
+ when 10 # \n
+ escaped = '\n'
+ when 11 # \v
+ escaped = '\v'
+ when 12 # \f
+ escaped = '\f'
+ when 13 # \r
+ escaped = '\r'
+ when 27 # \e
+ escaped = '\e'
+ when 34 # \"
+ escaped = '\"'
+ when 35 # #
+ case getbyte(index += 1)
+ when 36 # $
+ escaped = '\#$'
+ when 64 # @
+ escaped = '\#@'
+ when 123 # {
+ escaped = '\#{'
+ else
+ index -= 1
+ end
+ when 92 # \\
+ escaped = '\\\\'
+ end
+
+ if escaped
+ array << escaped
+ index += 1
+ next
+ end
+ end
+ end
+
+ if char.printable?
+ array << char
+ else
+ code = char.ord
+ escaped = code.to_s(16).upcase
+
+ if unicode
+ if code < 0x10000
+ pad = "0" * (4 - escaped.bytesize)
+ array << "\\u#{pad}#{escaped}"
+ else
+ array << "\\u{#{escaped}}"
+ end
+ else
+ if code < 0x100
+ pad = "0" * (2 - escaped.bytesize)
+ array << "\\x#{pad}#{escaped}"
+ else
+ array << "\\x{#{escaped}}"
+ end
+ end
+ end
+
+ index += bs
+ else
+ array << "\\x#{getbyte(index).to_s(16)}"
+ index += 1
+ end
+ end
+
+ size = array.inject(0) { |s, chr| s += chr.bytesize }
+ result = String.pattern size + 2, ?".ord
+
+ index = 1
+ array.each do |chr|
+ result.copy_from chr, 0, chr.bytesize, index
+ index += chr.bytesize
+ end
+
+ Rubinius::Type.infect result, self
+ result.force_encoding result_encoding
+ end
+
def prepend(other)
self[0,0] = other
self
@@ -1,2 +0,0 @@
-fails:String#inspect When default external is UTF-8 returns a string with non-printing characters replaced by \u notation for Unicode strings
-fails:String#inspect When default external is UTF-8 returns a string with extended characters for Unicode strings
View
@@ -5,6 +5,7 @@
#include "builtin/string.hpp"
#include "builtin/array.hpp"
#include "builtin/bytearray.hpp"
+#include "builtin/character.hpp"
#include "builtin/class.hpp"
#include "builtin/encoding.hpp"
#include "builtin/exception.hpp"
@@ -584,7 +585,7 @@ namespace rubinius {
if(byte_compatible_p(encoding_)) {
num_chars(state, num_bytes_);
} else {
- OnigEncodingType* enc = encoding_->get_encoding();
+ OnigEncodingType* enc = encoding()->get_encoding();
native_int chars;
if(fixed_width_p(encoding_)) {
@@ -1296,12 +1297,12 @@ namespace rubinius {
if(byte_compatible_p(encoding_)) {
return start + index;
} else if(fixed_width_p(encoding_)) {
- return start + index * ONIGENC_MBC_MINLEN(encoding_->get_encoding());
+ return start + index * ONIGENC_MBC_MINLEN(encoding()->get_encoding());
} else {
native_int offset = Encoding::find_character_byte_index(byte_address() + start,
byte_address() + byte_size(),
index,
- encoding_->get_encoding());
+ encoding()->get_encoding());
return start + offset;
}
}
@@ -1316,12 +1317,12 @@ namespace rubinius {
if(byte_compatible_p(encoding_)) {
return index;
} else if(fixed_width_p(encoding_)) {
- return index / ONIGENC_MBC_MINLEN(encoding_->get_encoding());
+ return index / ONIGENC_MBC_MINLEN(encoding()->get_encoding());
} else {
return Encoding::find_byte_character_index(byte_address() + start,
byte_address() + byte_size(),
index,
- encoding_->get_encoding());
+ encoding()->get_encoding());
}
}
@@ -1371,7 +1372,7 @@ namespace rubinius {
native_int e = find_character_byte_index(state, length - 1, i);
int c = Encoding::precise_mbclen(byte_address() + e, byte_address() + byte_size(),
- encoding_->get_encoding());
+ encoding()->get_encoding());
if(ONIGENC_MBCLEN_CHARFOUND_P(c)) {
e += ONIGENC_MBCLEN_CHARFOUND_LEN(c);
@@ -1657,7 +1658,7 @@ namespace rubinius {
return valid_encoding_;
}
- OnigEncodingType* enc = encoding_->get_encoding();
+ OnigEncodingType* enc = encoding()->get_encoding();
uint8_t* p = byte_address();
uint8_t* e = p + byte_size();
@@ -1679,48 +1680,35 @@ namespace rubinius {
return valid_encoding_;
}
- Fixnum* String::codepoint(STATE) {
- if(char_size(state) == 0) return force_as<Fixnum>(Primitives::failure());
-
- if(byte_compatible_p(encoding_)) {
- return Fixnum::from(byte_address()[0]);
- } else {
- OnigEncodingType* enc = encoding_->get_encoding();
- uint8_t* p = byte_address();
- uint8_t* e = p + byte_size();
+ int String::codepoint(bool* found) {
+ OnigEncodingType* enc = encoding()->get_encoding();
+ uint8_t* p = byte_address();
+ uint8_t* e = p + byte_size();
- int n = Encoding::precise_mbclen(p, e, enc);
+ int n = Encoding::precise_mbclen(p, e, enc);
- if(ONIGENC_MBCLEN_CHARFOUND_P(n)) {
- return Fixnum::from(ONIGENC_MBC_TO_CODE(enc, (UChar*)p, (UChar*)e));
- }
+ if(ONIGENC_MBCLEN_CHARFOUND_P(n)) {
+ *found = true;
+ return ONIGENC_MBC_TO_CODE(enc, (UChar*)p, (UChar*)e);
+ } else {
+ *found = false;
+ return 0;
}
-
- return force_as<Fixnum>(Primitives::failure());
}
- Object* String::chr_at(STATE, Fixnum* byte) {
- native_int i = byte->to_native();
- native_int size = byte_size();
- int n = 1;
-
- if(i < 0 || i >= size) return cNil;
-
- if(!byte_compatible_p(encoding_)) {
- OnigEncodingType* enc = encoding_->get_encoding();
- uint8_t* p = byte_address() + i;
- uint8_t* e = byte_address() + byte_size();
-
- int c = Encoding::precise_mbclen(p, e, enc);
+ Fixnum* String::codepoint(STATE) {
+ bool found;
+ int c = codepoint(&found);
- if(ONIGENC_MBCLEN_CHARFOUND_P(c)) {
- n = ONIGENC_MBCLEN_CHARFOUND_LEN(c);
- } else {
- return cNil;
- }
+ if(!found) {
+ return force_as<Fixnum>(Primitives::failure());
+ } else {
+ return Fixnum::from(c);
}
+ }
- return byte_substring(state, i, n);
+ Object* String::chr_at(STATE, Fixnum* byte) {
+ return Character::create_from(state, this, byte);
}
void String::Info::show(STATE, Object* self, int level) {
View
@@ -268,6 +268,8 @@ namespace rubinius {
// Rubinius.primitive :string_valid_encoding_p
Object* valid_encoding_p(STATE);
+ int codepoint(bool* found);
+
// Rubinius.primitive :string_codepoint
Fixnum* codepoint(STATE);

0 comments on commit 16a3156

Please sign in to comment.