Skip to content
Browse files

emitter transcodes to UTF-8 automatically

  • Loading branch information...
1 parent 18a88dd commit aa5e67a7dc7ba9e0d798d914f104f350f4d1969a @tenderlove committed May 19, 2010
Showing with 139 additions and 2 deletions.
  1. +66 −2 ext/psych/emitter.c
  2. +1 −0 test/psych/test_emitter.rb
  3. +72 −0 test/psych/test_encoding.rb
View
68 ext/psych/emitter.c
@@ -121,6 +121,9 @@ static VALUE start_document(VALUE self, VALUE version, VALUE tags, VALUE imp)
if(RTEST(tags)) {
int i = 0;
+#ifdef HAVE_RUBY_ENCODING_H
+ rb_encoding * encoding = rb_utf8_encoding();
+#endif
Check_Type(tags, T_ARRAY);
@@ -129,15 +132,24 @@ static VALUE start_document(VALUE self, VALUE version, VALUE tags, VALUE imp)
for(i = 0; i < RARRAY_LEN(tags); i++) {
VALUE tuple = RARRAY_PTR(tags)[i];
+ VALUE name;
+ VALUE value;
+
Check_Type(tuple, T_ARRAY);
if(RARRAY_LEN(tuple) < 2) {
xfree(head);
rb_raise(rb_eRuntimeError, "tag tuple must be of length 2");
}
+ name = RARRAY_PTR(tuple)[0];
+ value = RARRAY_PTR(tuple)[1];
+#ifdef HAVE_RUBY_ENCODING_H
+ name = rb_str_export_to_enc(name, encoding);
+ value = rb_str_export_to_enc(value, encoding);
+#endif
- tail->handle = (yaml_char_t *)StringValuePtr(RARRAY_PTR(tuple)[0]);
- tail->prefix = (yaml_char_t *)StringValuePtr(RARRAY_PTR(tuple)[1]);
+ tail->handle = (yaml_char_t *)StringValuePtr(name);
+ tail->prefix = (yaml_char_t *)StringValuePtr(value);
tail++;
}
@@ -199,6 +211,22 @@ static VALUE scalar(
Check_Type(value, T_STRING);
+#ifdef HAVE_RUBY_ENCODING_H
+ rb_encoding * encoding = rb_utf8_encoding();
+
+ value = rb_str_export_to_enc(value, encoding);
+
+ if(!NIL_P(anchor)) {
+ Check_Type(anchor, T_STRING);
+ anchor = rb_str_export_to_enc(anchor, encoding);
+ }
+
+ if(!NIL_P(tag)) {
+ Check_Type(tag, T_STRING);
+ tag = rb_str_export_to_enc(tag, encoding);
+ }
+#endif
+
yaml_scalar_event_initialize(
&event,
(yaml_char_t *)(NIL_P(anchor) ? NULL : StringValuePtr(anchor)),
@@ -231,6 +259,21 @@ static VALUE start_sequence(
) {
yaml_emitter_t * emitter;
yaml_event_t event;
+
+#ifdef HAVE_RUBY_ENCODING_H
+ rb_encoding * encoding = rb_utf8_encoding();
+
+ if(!NIL_P(anchor)) {
+ Check_Type(anchor, T_STRING);
+ anchor = rb_str_export_to_enc(anchor, encoding);
+ }
+
+ if(!NIL_P(tag)) {
+ Check_Type(tag, T_STRING);
+ tag = rb_str_export_to_enc(tag, encoding);
+ }
+#endif
+
Data_Get_Struct(self, yaml_emitter_t, emitter);
yaml_sequence_start_event_initialize(
@@ -283,6 +326,20 @@ static VALUE start_mapping(
yaml_event_t event;
Data_Get_Struct(self, yaml_emitter_t, emitter);
+#ifdef HAVE_RUBY_ENCODING_H
+ rb_encoding * encoding = rb_utf8_encoding();
+
+ if(!NIL_P(anchor)) {
+ Check_Type(anchor, T_STRING);
+ anchor = rb_str_export_to_enc(anchor, encoding);
+ }
+
+ if(!NIL_P(tag)) {
+ Check_Type(tag, T_STRING);
+ tag = rb_str_export_to_enc(tag, encoding);
+ }
+#endif
+
yaml_mapping_start_event_initialize(
&event,
(yaml_char_t *)(NIL_P(anchor) ? NULL : StringValuePtr(anchor)),
@@ -327,6 +384,13 @@ static VALUE alias(VALUE self, VALUE anchor)
yaml_event_t event;
Data_Get_Struct(self, yaml_emitter_t, emitter);
+#ifdef HAVE_RUBY_ENCODING_H
+ if(!NIL_P(anchor)) {
+ Check_Type(anchor, T_STRING);
+ anchor = rb_str_export_to_enc(anchor, rb_utf8_encoding());
+ }
+#endif
+
yaml_alias_event_initialize(
&event,
(yaml_char_t *)(NIL_P(anchor) ? NULL : StringValuePtr(anchor))
View
1 test/psych/test_emitter.rb
@@ -64,6 +64,7 @@ def test_scalar_arg_error
['foo', Object.new, nil, false, true, 1],
['foo', nil, Object.new, false, true, 1],
['foo', nil, nil, false, true, :foo],
+ [nil, nil, nil, false, true, 1],
].each do |args|
assert_raises(TypeError) do
@emitter.scalar(*args)
View
72 test/psych/test_encoding.rb
@@ -24,9 +24,81 @@ def #{m} *args
def setup
super
+ @buffer = StringIO.new
@handler = EncodingCatcher.new
@parser = Psych::Parser.new @handler
@utf8 = Encoding.find('UTF-8')
+ @emitter = Psych::Emitter.new @buffer
+ end
+
+ def test_emit_alias
+ @emitter.start_stream Psych::Parser::UTF8
+ @emitter.start_document [], [], true
+ e = assert_raises(RuntimeError) do
+ @emitter.alias 'ドラえもん'.encode('EUC-JP')
+ end
+ assert_match(/alias value/, e.message)
+ end
+
+ def test_start_mapping
+ foo = 'foo'
+ bar = 'バー'
+
+ @emitter.start_stream Psych::Parser::UTF8
+ @emitter.start_document [], [], true
+ @emitter.start_mapping(
+ foo.encode('Shift_JIS'),
+ bar.encode('UTF-16LE'),
+ false, Nodes::Sequence::ANY)
+ @emitter.end_mapping
+ @emitter.end_document false
+ @emitter.end_stream
+
+ @parser.parse @buffer.string
+ assert_encodings @utf8, @handler.strings
+ assert_equal [foo, bar], @handler.strings
+ end
+
+ def test_start_sequence
+ foo = 'foo'
+ bar = 'バー'
+
+ @emitter.start_stream Psych::Parser::UTF8
+ @emitter.start_document [], [], true
+ @emitter.start_sequence(
+ foo.encode('Shift_JIS'),
+ bar.encode('UTF-16LE'),
+ false, Nodes::Sequence::ANY)
+ @emitter.end_sequence
+ @emitter.end_document false
+ @emitter.end_stream
+
+ @parser.parse @buffer.string
+ assert_encodings @utf8, @handler.strings
+ assert_equal [foo, bar], @handler.strings
+ end
+
+ def test_doc_tag_encoding
+ key = ''
+ @emitter.start_stream Psych::Parser::UTF8
+ @emitter.start_document(
+ [1, 1],
+ [['!'.encode('EUC-JP'), key.encode('EUC-JP')]],
+ true
+ )
+ @emitter.scalar 'foo', nil, nil, true, false, Nodes::Scalar::ANY
+ @emitter.end_document false
+ @emitter.end_stream
+
+ @parser.parse @buffer.string
+ assert_encodings @utf8, @handler.strings
+ assert_equal key, @handler.strings[1]
+ end
+
+ def test_emitter_encoding
+ str = "壁に耳あり、障子に目あり"
+ thing = Psych.load Psych.dump str.encode('EUC-JP')
+ assert_equal str, thing
end
def test_default_internal

0 comments on commit aa5e67a

Please sign in to comment.
Something went wrong with that request. Please try again.