Permalink
Browse files

Fix JRUBY-6930: yaml escaping of non-printable characters

Defer creating the SnakeYAML emitter until we are given an
encoding from the start_stream event. This allows us to set up
the OutputStreamWriter to use the proper charset, and has the
added bonus of making other lazily-set options actually be
reflected in the emitter's behavior.
  • Loading branch information...
1 parent e681ad9 commit da0f2eb0226f05573c7be6ff4cc9aa25cd56f945 @headius headius committed Oct 18, 2012
@@ -30,8 +30,11 @@
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map;
+
+import org.jcodings.Encoding;
import org.jruby.Ruby;
import org.jruby.RubyArray;
import org.jruby.RubyClass;
@@ -82,12 +85,8 @@ public PsychEmitter(Ruby runtime, RubyClass klass) {
public IRubyObject initialize(ThreadContext context, IRubyObject io) {
options = new DumperOptions();
options.setIndent(2);
- try {
- emitter = new Emitter(new OutputStreamWriter(new IOOutputStream(io), "UTF-8"), options);
- } catch (UnsupportedEncodingException uee) {
- // should never happen on a compliant JVM
- emitter = new Emitter(new OutputStreamWriter(new IOOutputStream(io)), options);
- }
+
+ this.io = io;
return context.nil;
}
@@ -104,7 +103,7 @@ public IRubyObject initialize(ThreadContext context, IRubyObject io, IRubyObject
options.setIndent((int)level.convertToInteger().getLongValue());
options.setWidth((int)width.convertToInteger().getLongValue());
- emitter = new Emitter(new OutputStreamWriter(new IOOutputStream(io)), options);
+ this.io = io;
return context.nil;
}
@@ -114,10 +113,13 @@ public IRubyObject start_stream(ThreadContext context, IRubyObject encoding) {
if (!(encoding instanceof RubyFixnum)) {
throw context.runtime.newTypeError(encoding, context.runtime.getFixnum());
}
-
- // TODO: do something with encoding? perhaps at the stream level?
+
+ initEmitter(context, encoding);
+
StreamStartEvent event = new StreamStartEvent(NULL_MARK, NULL_MARK);
+
emit(context, event);
+
return this;
}
@@ -305,6 +307,8 @@ public IRubyObject line_width(ThreadContext context) {
private void emit(ThreadContext context, Event event) {
try {
+ if (emitter == null) throw context.runtime.newRuntimeError("uninitialized emitter");
+
emitter.emit(event);
} catch (IOException ioe) {
throw context.runtime.newIOErrorFromException(ioe);
@@ -313,8 +317,18 @@ private void emit(ThreadContext context, Event event) {
}
}
+ private void initEmitter(ThreadContext context, IRubyObject _encoding) {
+ if (emitter != null) throw context.runtime.newRuntimeError("already initialized emitter");
+
+ Encoding encoding = PsychLibrary.YAML_ENCODING.values()[(int)_encoding.convertToInteger().getLongValue()].encoding;
+ Charset charset = context.runtime.getEncodingService().charsetForEncoding(encoding);
+
+ emitter = new Emitter(new OutputStreamWriter(new IOOutputStream(io), charset), options);
+ }
+
Emitter emitter;
DumperOptions options = new DumperOptions();
+ IRubyObject io;
private static final Mark NULL_MARK = new Mark(null, 0, 0, 0, null, 0);
@@ -27,6 +27,10 @@
***** END LICENSE BLOCK *****/
package org.jruby.ext.psych;
+import org.jcodings.Encoding;
+import org.jcodings.specific.UTF16BEEncoding;
+import org.jcodings.specific.UTF16LEEncoding;
+import org.jcodings.specific.UTF8Encoding;
import org.jruby.Ruby;
import org.jruby.RubyArray;
import org.jruby.RubyModule;
@@ -60,4 +64,17 @@ public IRubyObject call(ThreadContext context, IRubyObject self, RubyModule claz
PsychToRuby.initPsychToRuby(runtime, psych);
PsychYamlTree.initPsychYamlTree(runtime, psych);
}
+
+ public enum YAML_ENCODING {
+ YAML_ANY_ENCODING(UTF8Encoding.INSTANCE),
+ YAML_UTF8_ENCODING(UTF8Encoding.INSTANCE),
+ YAML_UTF16LE_ENCODING(UTF16LEEncoding.INSTANCE),
+ YAML_UTF16BE_ENCODING(UTF16BEEncoding.INSTANCE);
+
+ YAML_ENCODING(Encoding encoding) {
+ this.encoding = encoding;
+ }
+
+ public final Encoding encoding;
+ }
}
@@ -44,6 +44,7 @@
import org.jruby.RubyObject;
import org.jruby.RubyString;
import org.jruby.anno.JRubyMethod;
+import static org.jruby.ext.psych.PsychLibrary.YAML_ENCODING.*;
import org.jruby.runtime.Block;
import org.jruby.runtime.ObjectAllocator;
import org.jruby.runtime.ThreadContext;
@@ -75,11 +76,6 @@
public class PsychParser extends RubyObject {
private static final Logger LOG = LoggerFactory.getLogger("PsychParser");
-
- public static final int YAML_ANY_ENCODING = 0;
- public static final int YAML_UTF8_ENCODING = UTF8Encoding.INSTANCE.getIndex();
- public static final int YAML_UTF16LE_ENCODING = UTF16LEEncoding.INSTANCE.getIndex();
- public static final int YAML_UTF16BE_ENCODING = UTF16BEEncoding.INSTANCE.getIndex();
public static void initPsychParser(Ruby runtime, RubyModule psych) {
RubyClass psychParser = runtime.defineClassUnder("Parser", runtime.getObject(), new ObjectAllocator() {
@@ -90,10 +86,10 @@ public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
RubyKernel.require(runtime.getNil(),
runtime.newString("psych/syntax_error"), Block.NULL_BLOCK);
- psychParser.defineConstant("ANY", runtime.newFixnum(YAML_ANY_ENCODING));
- psychParser.defineConstant("UTF8", runtime.newFixnum(YAML_UTF8_ENCODING));
- psychParser.defineConstant("UTF16LE", runtime.newFixnum(YAML_UTF16LE_ENCODING));
- psychParser.defineConstant("UTF16BE", runtime.newFixnum(YAML_UTF16BE_ENCODING));
+ psychParser.defineConstant("ANY", runtime.newFixnum(YAML_ANY_ENCODING.ordinal()));
+ psychParser.defineConstant("UTF8", runtime.newFixnum(YAML_UTF8_ENCODING.ordinal()));
+ psychParser.defineConstant("UTF16LE", runtime.newFixnum(YAML_UTF16LE_ENCODING.ordinal()));
+ psychParser.defineConstant("UTF16BE", runtime.newFixnum(YAML_UTF16BE_ENCODING.ordinal()));
psychParser.defineAnnotatedMethods(PsychParser.class);
@@ -154,7 +150,7 @@ public IRubyObject parse(ThreadContext context, IRubyObject yaml, IRubyObject pa
// FIXME: Event should expose a getID, so it can be switched
if (event.is(ID.StreamStart)) {
- invoke(context, handler, "start_stream", runtime.newFixnum(YAML_ANY_ENCODING));
+ invoke(context, handler, "start_stream", runtime.newFixnum(YAML_ANY_ENCODING.ordinal()));
} else if (event.is(ID.DocumentStart)) {
handleDocumentStart(context, (DocumentStartEvent) event, tainted, handler);
} else if (event.is(ID.DocumentEnd)) {

0 comments on commit da0f2eb

Please sign in to comment.