From b7dc31fb2720eeee4fdc906d05aade2ff3b2bbbf Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Thu, 25 Mar 2021 22:40:37 -0700 Subject: [PATCH] Use `StringConcatFactory` for string concatenation on JDK 9+ JEP 280, released in JDK 9, proposes a new way to compile string concatenation using `invokedynamic` and `StringConcatFactory`. This new approach generates less bytecode, doesn't have to incur the overhead of `StringBuilder` allocations, and allows users to pick swap the concatenation technique at runtime. This changes the codegen when the target is at least Java 9 to leverage `invokedynamic` and `StringConcatFactory`. On Java 8, the old `StringBuilder` approach is still used. --- .../nsc/backend/jvm/BCodeBodyBuilder.scala | 114 ++++++++++++++---- .../nsc/backend/jvm/BCodeIdiomatic.scala | 39 +++++- test/files/run/StringConcat.check | Bin 0 -> 5587 bytes test/files/run/StringConcat.scala | 86 +++++++++++++ 4 files changed, 209 insertions(+), 30 deletions(-) create mode 100644 test/files/run/StringConcat.check create mode 100644 test/files/run/StringConcat.scala diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BCodeBodyBuilder.scala b/src/compiler/scala/tools/nsc/backend/jvm/BCodeBodyBuilder.scala index a40c04e6a527..753407346a14 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BCodeBodyBuilder.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BCodeBodyBuilder.scala @@ -33,7 +33,7 @@ abstract class BCodeBodyBuilder extends BCodeSkelBuilder { import bTypes._ import coreBTypes._ import definitions._ - import genBCode.postProcessor.backendUtils.addIndyLambdaImplMethod + import genBCode.postProcessor.backendUtils.{addIndyLambdaImplMethod, classfileVersion} import genBCode.postProcessor.callGraph.{inlineAnnotatedCallsites, noInlineAnnotatedCallsites} /* @@ -990,44 +990,110 @@ abstract class BCodeBodyBuilder extends BCodeSkelBuilder { } } + /* Generate string concatenation + * + * On JDK 8: create and append using `StringBuilder` + * On JDK 9+: use `invokedynamic` with `StringConcatFactory` + */ def genStringConcat(tree: Tree): BType = { lineNumber(tree) liftStringConcat(tree) match { - // Optimization for expressions of the form "" + x. We can avoid the StringBuilder. + // Optimization for expressions of the form "" + x case List(Literal(Constant("")), arg) => genLoad(arg, ObjectRef) genCallMethod(String_valueOf, InvokeStyle.Static, arg.pos) case concatenations => - val approxBuilderSize = concatenations.map { - case Literal(Constant(s: String)) => s.length - case Literal(c @ Constant(value)) if c.isNonUnitAnyVal => String.valueOf(c).length - case _ => - // could add some guess based on types of primitive args. - // or, we could stringify all the args onto the stack, compute the exact size of - // the StringBuilder. - // or, just let https://openjdk.java.net/jeps/280 (or a re-implementation thereof in our 2.13.x stdlib) do all the hard work at link time - 0 - }.sum - bc.genStartConcat(tree.pos, approxBuilderSize) - def isEmptyString(t: Tree) = t match { - case Literal(Constant("")) => true - case _ => false - } - for (elem <- concatenations if !isEmptyString(elem)) { - val loadedElem = elem match { + + val concatArguments = concatenations.view + .filter { + case Literal(Constant("")) => false // empty strings are no-ops in concatenation + case _ => true + } + .map { case Apply(boxOp, value :: Nil) if currentRun.runDefinitions.isBox(boxOp.symbol) => // Eliminate boxing of primitive values. Boxing is introduced by erasure because // there's only a single synthetic `+` method "added" to the string class. value + case other => other + } + .toList + + // `StringConcatFactory` only got added in JDK 9, so use `StringBuilder` for lower + if (classfileVersion.get < asm.Opcodes.V9) { + + // Estimate capacity needed for the string builder + val approxBuilderSize = concatArguments.view.map { + case Literal(Constant(s: String)) => s.length + case Literal(c @ Constant(_)) if c.isNonUnitAnyVal => String.valueOf(c).length + case _ => 0 + }.sum + bc.genNewStringBuilder(tree.pos, approxBuilderSize) + + for (elem <- concatArguments) { + val elemType = tpeTK(elem) + genLoad(elem, elemType) + bc.genStringBuilderAppend(elemType, elem.pos) + } + bc.genStringBuilderEnd(tree.pos) + } else { + + /* `StringConcatFactory#makeConcatWithConstants` accepts max 200 argument slots. If + * the string concatenation is longer (unlikely), we spill into multiple calls + */ + val MaxIndySlots = 200 + val TagArg = '\u0001' // indicates a hole (in the recipe string) for an argument + val TagConst = '\u0002' // indicates a hole (in the recipe string) for a constant + + val recipe = new StringBuilder() + val argTypes = Seq.newBuilder[asm.Type] + val constVals = Seq.newBuilder[String] + var totalArgSlots = 0 + var countConcats = 1 // ie. 1 + how many times we spilled + + for (elem <- concatArguments) { + val tpe = tpeTK(elem) + val elemSlots = tpe.size + + // Unlikely spill case + if (totalArgSlots + elemSlots >= MaxIndySlots) { + bc.genIndyStringConcat(recipe.toString, argTypes.result(), constVals.result()) + countConcats += 1 + totalArgSlots = 0 + recipe.setLength(0) + argTypes.clear() + constVals.clear() + } - case _ => elem + elem match { + case Literal(Constant(s: String)) => + if (s.contains(TagArg) || s.contains(TagConst)) { + totalArgSlots += elemSlots + recipe.append(TagConst) + constVals += s + } else { + recipe.append(s) + } + + case other => + totalArgSlots += elemSlots + recipe.append(TagArg) + val tpe = tpeTK(elem) + argTypes += tpe.toASMType + genLoad(elem, tpe) + } + } + bc.genIndyStringConcat(recipe.toString, argTypes.result(), constVals.result()) + + // If we spilled, generate one final concat + if (countConcats > 1) { + bc.genIndyStringConcat( + TagArg.toString * countConcats, + Seq.fill(countConcats)(StringRef.toASMType), + Seq.empty + ) } - val elemType = tpeTK(loadedElem) - genLoad(loadedElem, elemType) - bc.genConcat(elemType, loadedElem.pos) } - bc.genEndConcat(tree.pos) } StringRef } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BCodeIdiomatic.scala b/src/compiler/scala/tools/nsc/backend/jvm/BCodeIdiomatic.scala index 86c0b83671c4..92de2aca3b9a 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BCodeIdiomatic.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BCodeIdiomatic.scala @@ -175,10 +175,11 @@ abstract class BCodeIdiomatic { } // end of method genPrimitiveShift() - /* + /* Creates a new `StringBuilder` instance with the requested capacity + * * can-multi-thread */ - final def genStartConcat(pos: Position, size: Int): Unit = { + final def genNewStringBuilder(pos: Position, size: Int): Unit = { jmethod.visitTypeInsn(Opcodes.NEW, JavaStringBuilderClassName) jmethod.visitInsn(Opcodes.DUP) jmethod.visitLdcInsn(Integer.valueOf(size)) @@ -191,10 +192,11 @@ abstract class BCodeIdiomatic { ) } - /* + /* Issue a call to `StringBuilder#append` for the right element type + * * can-multi-thread */ - def genConcat(elemType: BType, pos: Position): Unit = { + final def genStringBuilderAppend(elemType: BType, pos: Position): Unit = { val paramType: BType = elemType match { case ct: ClassBType if ct.isSubtypeOf(StringRef).get => StringRef case ct: ClassBType if ct.isSubtypeOf(jlStringBufferRef).get => jlStringBufferRef @@ -211,13 +213,38 @@ abstract class BCodeIdiomatic { invokevirtual(JavaStringBuilderClassName, "append", bt.descriptor, pos) } - /* + /* Extract the built `String` from the `StringBuilder` + *: * can-multi-thread */ - final def genEndConcat(pos: Position): Unit = { + final def genStringBuilderEnd(pos: Position): Unit = { invokevirtual(JavaStringBuilderClassName, "toString", "()Ljava/lang/String;", pos) } + /* Concatenate top N arguments on the stack with `StringConcatFactory#makeConcatWithConstants` + * (only works for JDK 9+) + * + * can-multi-thread + */ + final def genIndyStringConcat( + recipe: String, + argTypes: Seq[asm.Type], + constants: Seq[String] + ): Unit = { + jmethod.visitInvokeDynamicInsn( + "makeConcatWithConstants", + asm.Type.getMethodDescriptor(StringRef.toASMType, argTypes:_*), + new asm.Handle( + asm.Opcodes.H_INVOKESTATIC, + "java/lang/invoke/StringConcatFactory", + "makeConcatWithConstants", + "(Ljava/lang/invoke/MethodHandles$Lookup;Ljava/lang/String;Ljava/lang/invoke/MethodType;Ljava/lang/String;[Ljava/lang/Object;)Ljava/lang/invoke/CallSite;", + false + ), + (recipe +: constants):_* + ) + } + /* * Emits one or more conversion instructions based on the types given as arguments. * diff --git a/test/files/run/StringConcat.check b/test/files/run/StringConcat.check new file mode 100644 index 0000000000000000000000000000000000000000..10eaa9a20d1b98d974875029c1d16d893b13f4e1 GIT binary patch literal 5587 zcmeHKOHbQC5H3CXSB$J45~&!4kF{g9i(`7BnJR!WndA!XgU3;{}f2{S1QkM9yHFWGm>^ z17j{QF(XMTElWluC12qD9PU?%1i9jQx_~8RRFE{?0iV+yczwqkIi3SNhHORQ+wBxO z(@fwl;EFTc0A+d;-`ALz;R2`Ka_$9(=l7h@c z4YehIqzc33>_f!-5OA5tF%>WYMz>}r4Rz{y^CHUbn)FDH;c7+1ls@HDu{MDRvLOyj zxTA1AgM$okb+}I7?K)(=rfku%BlJs?S5BCW;f7^6jpqkpe=r!60Bo^4-^sxGflg0m21r7VmB+NY;1wE8@A!;2CIFl=TWTd z#a`|f4