Skip to content
Permalink
Browse files
Fixes #2172: Symbols need to support UTF-8 names
  • Loading branch information
enebo committed Dec 11, 2014
1 parent c34b8db commit 89f548ea9bc8ecfd9db099b0c594bc1ca82f8492
@@ -149,6 +149,15 @@ final ByteList getBytes() {
return symbolBytes;
}

/**
* RubySymbol is created by passing in a String and bytes are extracted from that. We will
* pass in encoding of that string after construction but before use so it does not forget
* what it is.
*/
public void associateEncoding(Encoding encoding) {
symbolBytes.setEncoding(encoding);
}

/** short circuit for Symbol key comparison
*
*/
@@ -185,6 +194,17 @@ public static RubySymbol newSymbol(Ruby runtime, String name) {
return runtime.getSymbolTable().getSymbol(name);
}

// FIXME: same bytesequences will fight over encoding of the symbol once cached. I think largely
// this will only happen in some ISO_8859_?? encodings making symbols at the same time so it should
// be pretty rare.
public static RubySymbol newSymbol(Ruby runtime, String name, Encoding encoding) {
RubySymbol newSymbol = newSymbol(runtime, name);

newSymbol.associateEncoding(encoding);

return newSymbol;
}

/**
* @see org.jruby.compiler.Constantizable
*/
@@ -33,20 +33,25 @@
package org.jruby.ast;

import java.util.List;
import org.jcodings.Encoding;
import org.jcodings.specific.USASCIIEncoding;
import org.jruby.ast.types.ILiteralNode;
import org.jruby.ast.types.INameNode;
import org.jruby.ast.visitor.NodeVisitor;
import org.jruby.lexer.yacc.ISourcePosition;
import org.jruby.util.ByteList;

/**
* Represents a symbol (:symbol_name).
*/
public class SymbolNode extends Node implements ILiteralNode, INameNode {
private String name;
private final String name;
private final Encoding encoding;

public SymbolNode(ISourcePosition position, String name) {
public SymbolNode(ISourcePosition position, ByteList value) {
super(position);
this.name = name;
this.name = value.toString().intern();
encoding = value.lengthEnc() != value.length() ? value.getEncoding() : USASCIIEncoding.INSTANCE;
}

public NodeType getNodeType() {
@@ -64,6 +69,10 @@ public <T> T accept(NodeVisitor<T> iVisitor) {
public String getName() {
return name;
}

public Encoding getEncoding() {
return encoding;
}

public List<Node> childNodes() {
return EMPTY_LIST;
@@ -1,5 +1,6 @@
package org.jruby.ir;

import org.jcodings.specific.ASCIIEncoding;
import org.jruby.EvalType;
import org.jruby.Ruby;
import org.jruby.RubyInstanceConfig;
@@ -1710,14 +1711,16 @@ private IRMethod defineNewMethod(MethodDefNode defNode, IRScope parent, boolean
public Operand buildDefn(MethodDefNode node, IRScope s) { // Instance method
IRMethod method = defineNewMethod(node, s, true);
addInstr(s, new DefineInstanceMethodInstr(method));
return new Symbol(method.getName());
// FIXME: Method name should save encoding
return new Symbol(method.getName(), ASCIIEncoding.INSTANCE);
}

public Operand buildDefs(DefsNode node, IRScope s) { // Class method
Operand container = build(node.getReceiverNode(), s);
IRMethod method = defineNewMethod(node, s, false);
addInstr(s, new DefineClassMethodInstr(container, method));
return new Symbol(method.getName());
// FIXME: Method name should save encoding
return new Symbol(method.getName(), ASCIIEncoding.INSTANCE);
}

protected LocalVariable getArgVariable(IRScope s, String name, int depth) {
@@ -3293,7 +3296,7 @@ public Operand buildSValue(SValueNode node, IRScope s) {

public Operand buildSymbol(SymbolNode node) {
// SSS: Since symbols are interned objects, no need to copyAndReturnValue(...)
return new Symbol(node.getName());
return new Symbol(node.getName(), node.getEncoding());
}

public Operand buildTrue() {
@@ -1,5 +1,6 @@
package org.jruby.ir;

import org.jcodings.specific.USASCIIEncoding;
import org.jruby.ir.instructions.*;
import org.jruby.ir.interpreter.ClosureInterpreterContext;
import org.jruby.ir.interpreter.InterpreterContext;
@@ -191,7 +192,8 @@ public void addInstr(Instr i) {
keywordArgs.add(0, new KeyValuePair<Operand, Operand>(Symbol.KW_REST_ARG_DUMMY, ((ReceiveArgBase) i).getResult()));
} else if (i instanceof ReceiveKeywordArgInstr) {
ReceiveKeywordArgInstr rkai = (ReceiveKeywordArgInstr)i;
keywordArgs.add(new KeyValuePair<Operand, Operand>(new Symbol(rkai.argName), rkai.getResult()));
// FIXME: This lost encoding information when name was converted to string earlier in IRBuilder
keywordArgs.add(new KeyValuePair<Operand, Operand>(new Symbol(rkai.argName, USASCIIEncoding.INSTANCE), rkai.getResult()));
} else if (i instanceof ReceiveRestArgInstr) {
blockArgs.add(new Splat(((ReceiveRestArgInstr)i).getResult()));
} else if (i instanceof ReceiveArgBase) {
@@ -1,5 +1,6 @@
package org.jruby.ir;

import org.jcodings.specific.USASCIIEncoding;
import org.jruby.internal.runtime.methods.IRMethodArgs;
import org.jruby.ir.instructions.Instr;
import org.jruby.ir.instructions.ReceiveArgBase;
@@ -69,7 +70,8 @@ public void addInstr(Instr i) {
keywordArgs.add(0, new KeyValuePair<Operand, Operand>(Symbol.KW_REST_ARG_DUMMY, ((ReceiveArgBase) i).getResult()));
} else if (i instanceof ReceiveKeywordArgInstr) {
ReceiveKeywordArgInstr rkai = (ReceiveKeywordArgInstr)i;
keywordArgs.add(new KeyValuePair<Operand, Operand>(new Symbol(rkai.argName), rkai.getResult()));
// FIXME: This lost encoding information when name was converted to string earlier in IRBuilder
keywordArgs.add(new KeyValuePair<Operand, Operand>(new Symbol(rkai.argName, USASCIIEncoding.INSTANCE), rkai.getResult()));
} else if (i instanceof ReceiveRestArgInstr) {
callArgs.add(new Splat(((ReceiveRestArgInstr)i).getResult(), true));
} else if (i instanceof ReceiveArgBase) {
@@ -1,5 +1,6 @@
package org.jruby.ir.instructions;

import org.jcodings.specific.USASCIIEncoding;
import org.jruby.RubyModule;
import org.jruby.ir.IRVisitor;
import org.jruby.ir.Operation;
@@ -20,7 +21,8 @@ public class ConstMissingInstr extends CallInstr implements ResultInstr, FixedAr
private final String missingConst;

public ConstMissingInstr(Variable result, Operand currentModule, String missingConst) {
super(Operation.CONST_MISSING, CallType.FUNCTIONAL, result, new MethAddr("const_missing"), currentModule, new Operand[]{new Symbol(missingConst)}, null);
// FIXME: Missing encoding knowledge of the constant name.
super(Operation.CONST_MISSING, CallType.FUNCTIONAL, result, new MethAddr("const_missing"), currentModule, new Operand[]{new Symbol(missingConst, USASCIIEncoding.INSTANCE)}, null);

this.missingConst = missingConst;
}
@@ -1,26 +1,37 @@
package org.jruby.ir.operands;

import org.jcodings.Encoding;
import org.jcodings.specific.ASCIIEncoding;
import org.jruby.RubySymbol;
import org.jruby.ir.IRVisitor;
import org.jruby.parser.StaticScope;
import org.jruby.runtime.DynamicScope;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;

public class Symbol extends Reference {
public static final Symbol KW_REST_ARG_DUMMY = new Symbol("");
public static final Symbol KW_REST_ARG_DUMMY = new Symbol("", ASCIIEncoding.INSTANCE);

public Symbol(String name) {
private final Encoding encoding;

public Symbol(String name, Encoding encoding) {
super(OperandType.SYMBOL, name);

this.encoding = encoding;
}

@Override
public boolean canCopyPropagate() {
return true;
}

public Encoding getEncoding() {
return encoding;
}

@Override
public Object retrieve(ThreadContext context, IRubyObject self, StaticScope currScope, DynamicScope currDynScope, Object[] temp) {
return context.runtime.newSymbol(getName());
return RubySymbol.newSymbol(context.runtime, getName(), encoding);
}

@Override
@@ -1,6 +1,7 @@
package org.jruby.ir.persistence;

import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.USASCIIEncoding;
import org.jruby.RubyInstanceConfig;
import org.jruby.ir.IRClosure;
import org.jruby.ir.IRManager;
@@ -57,7 +58,8 @@ public Operand decode(OperandType type) {
case STANDARD_ERROR: return new StandardError();
case STRING_LITERAL: return new StringLiteral(d.decodeString());
case SVALUE: return new SValue(d.decodeOperand());
case SYMBOL: return new Symbol(d.decodeString());
// FIXME: This is broken since there is no encode/decode for encoding
case SYMBOL: return new Symbol(d.decodeString(), USASCIIEncoding.INSTANCE);
case TEMPORARY_VARIABLE: return decodeTemporaryVariable();
case UNBOXED_BOOLEAN: return new UnboxedBoolean(d.decodeBoolean());
case UNBOXED_FIXNUM: return new UnboxedFixnum(d.decodeLong());
@@ -971,8 +971,12 @@ public static RubyRegexp constructRubyRegexp(ThreadContext context, RubyString p

// Used by JIT
public static RubyEncoding retrieveEncoding(ThreadContext context, String name) {
Encoding encoding = context.runtime.getEncodingService().findEncodingOrAliasEntry(name.getBytes()).getEncoding();
return context.runtime.getEncodingService().getEncoding(encoding);
return context.runtime.getEncodingService().getEncoding(retrieveJCodingsEncoding(context, name));
}

// Used by JIT
public static Encoding retrieveJCodingsEncoding(ThreadContext context, String name) {
return context.runtime.getEncodingService().findEncodingOrAliasEntry(name.getBytes()).getEncoding();
}

// Used by JIT
@@ -6,20 +6,17 @@

import com.headius.invokebinder.Signature;
import org.jcodings.Encoding;
import org.jruby.*;
import org.jruby.Ruby;
import org.jruby.RubyClass;
import org.jruby.compiler.impl.SkinnyMethodAdapter;
import org.jruby.ir.operands.UndefinedValue;
import org.jruby.ir.runtime.IRRuntimeHelpers;
import org.jruby.parser.StaticScope;
import org.jruby.runtime.Block;
import org.jruby.runtime.Helpers;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.util.ByteList;
import org.jruby.util.JavaNameMangler;
import org.jruby.util.RegexpOptions;
import org.objectweb.asm.Handle;
import org.objectweb.asm.Opcodes;
import org.objectweb.asm.Type;
import org.objectweb.asm.commons.Method;

@@ -249,7 +246,7 @@ public org.objectweb.asm.Label newLabel() {
*
* @param sym the symbol's string identifier
*/
public abstract void pushSymbol(String sym);
public abstract void pushSymbol(String sym, Encoding encoding);

/**
* Push the JRuby runtime on the stack.
@@ -5,6 +5,7 @@
package org.jruby.ir.targets;

import com.headius.invokebinder.Signature;
import java.math.BigInteger;
import org.jcodings.Encoding;
import org.jruby.Ruby;
import org.jruby.RubyArray;
@@ -29,16 +30,12 @@
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.runtime.callsite.CachingCallSite;
import org.jruby.runtime.callsite.FunctionalCachingCallSite;
import org.jruby.runtime.invokedynamic.InvokeDynamicSupport;
import org.jruby.util.ByteList;
import org.jruby.util.CodegenUtils;
import org.jruby.util.JavaNameMangler;
import org.jruby.util.RegexpOptions;
import org.objectweb.asm.Label;
import org.objectweb.asm.Opcodes;

import java.math.BigInteger;

import static org.jruby.util.CodegenUtils.ci;
import static org.jruby.util.CodegenUtils.p;
import static org.jruby.util.CodegenUtils.params;
@@ -170,10 +167,16 @@ public void pushDRegexp(Runnable callback, RegexpOptions options, int arity) {
}
}

public void pushSymbol(String sym) {
public void pushSymbol(String sym, Encoding encoding) {
loadRuntime();
adapter.ldc(sym);
adapter.invokevirtual(p(Ruby.class), "newSymbol", sig(RubySymbol.class, String.class));

// FIXME: Should be a helper somewhere? Load Encoding
loadContext();
adapter.ldc(encoding.toString());
invokeIRHelper("retrieveJCodingsEncoding", sig(Encoding.class, ThreadContext.class, String.class));

adapter.invokestatic(p(RubySymbol.class), "newSymbol", sig(RubySymbol.class, Ruby.class, String.class, Encoding.class));
}

public void loadRuntime() {
@@ -103,9 +103,9 @@ public void pushDRegexp(Runnable callback, RegexpOptions options, int arity) {
* Push a symbol on the stack
* @param sym the symbol's string identifier
*/
public void pushSymbol(String sym) {
public void pushSymbol(String sym, Encoding encoding) {
loadContext();
adapter.invokedynamic("symbol", sig(JVM.OBJECT, ThreadContext.class), SymbolObjectSite.BOOTSTRAP, sym);
adapter.invokedynamic("symbol", sig(JVM.OBJECT, ThreadContext.class), SymbolObjectSite.BOOTSTRAP, sym, new String(encoding.getName()));
}

public void loadRuntime() {
@@ -1,6 +1,7 @@
package org.jruby.ir.targets;

import com.headius.invokebinder.Signature;
import org.jcodings.specific.USASCIIEncoding;
import org.jruby.*;
import org.jruby.compiler.NotCompilableException;
import org.jruby.compiler.impl.SkinnyMethodAdapter;
@@ -879,7 +880,8 @@ public void ConstMissingInstr(ConstMissingInstr constmissinginstr) {
jvmAdapter().checkcast("org/jruby/RubyModule");
jvmMethod().loadContext();
jvmAdapter().ldc("const_missing");
jvmMethod().pushSymbol(constmissinginstr.getMissingConst());
// FIXME: This has lost it's encoding info by this point
jvmMethod().pushSymbol(constmissinginstr.getMissingConst(), USASCIIEncoding.INSTANCE);
jvmMethod().invokeVirtual(Type.getType(RubyModule.class), Method.getMethod("org.jruby.runtime.builtin.IRubyObject callMethod(org.jruby.runtime.ThreadContext, java.lang.String, org.jruby.runtime.builtin.IRubyObject)"));
jvmStoreLocal(constmissinginstr.getResult());
}
@@ -2075,7 +2077,7 @@ public void SValue(SValue svalue) {

@Override
public void Symbol(Symbol symbol) {
jvmMethod().pushSymbol(symbol.getName());
jvmMethod().pushSymbol(symbol.getName(), symbol.getEncoding());
}

@Override
@@ -1,5 +1,8 @@
package org.jruby.ir.targets;

import org.jcodings.Encoding;
import org.jruby.RubySymbol;
import org.jruby.ir.runtime.IRRuntimeHelpers;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.objectweb.asm.Handle;
@@ -17,20 +20,24 @@
*/
public class SymbolObjectSite extends LazyObjectSite {
private final String value;
private final String encoding;

public SymbolObjectSite(MethodType type, String value) {
public SymbolObjectSite(MethodType type, String value, String encoding) {
super(type);

this.value = value;
this.encoding = encoding;
}

public static final Handle BOOTSTRAP = new Handle(Opcodes.H_INVOKESTATIC, p(SymbolObjectSite.class), "bootstrap", sig(CallSite.class, MethodHandles.Lookup.class, String.class, MethodType.class, String.class));
public static final Handle BOOTSTRAP = new Handle(Opcodes.H_INVOKESTATIC, p(SymbolObjectSite.class), "bootstrap",
sig(CallSite.class, MethodHandles.Lookup.class, String.class, MethodType.class, String.class, String.class));

public static CallSite bootstrap(MethodHandles.Lookup lookup, String name, MethodType type, String value) {
return new SymbolObjectSite(type, value).bootstrap(lookup);
public static CallSite bootstrap(MethodHandles.Lookup lookup, String name, MethodType type, String value, String encoding) {
return new SymbolObjectSite(type, value, encoding).bootstrap(lookup);
}

public IRubyObject construct(ThreadContext context) {
return context.runtime.newSymbol(value);
return RubySymbol.newSymbol(context.runtime, value,
IRRuntimeHelpers.retrieveJCodingsEncoding(context, encoding));
}
}

0 comments on commit 89f548e

Please sign in to comment.