Add option to turn off optimization for X86 assembler #75895

kongy · 2023-12-19T05:56:58Z

There are use cases that we are not expecting the assembler to produce the exact instructions without any optimizations.

llvmbot · 2023-12-19T05:57:30Z

@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-x86

Author: Yi Kong (kongy)

Changes

There are use cases that we are not expecting the assembler to produce the exact instructions without any optimizations.

Full diff: https://github.com/llvm/llvm-project/pull/75895.diff

2 Files Affected:

(modified) llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp (+6-2)
(modified) llvm/test/MC/X86/avx-64-att.s (+36-18)

diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 1d40ce35c1b416..5390dd94b760d8 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -48,6 +48,10 @@ static cl::opt<bool> LVIInlineAsmHardening(
     cl::desc("Harden inline assembly code that may be vulnerable to Load Value"
              " Injection (LVI). This feature is experimental."), cl::Hidden);
 
+static cl::opt<bool> AsmOptimize(
+    "x86-inline-asm-optimize", cl::init(true),
+    cl::desc("Optimize X86 inline assembly code."), cl::Hidden);
+
 static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
   if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
     ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
@@ -3670,11 +3674,11 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
 }
 
 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
-  if (ForcedVEXEncoding != VEXEncoding_VEX3 &&
+  if (AsmOptimize && ForcedVEXEncoding != VEXEncoding_VEX3 &&
       X86::optimizeInstFromVEX3ToVEX2(Inst, MII.get(Inst.getOpcode())))
     return true;
 
-  if (X86::optimizeShiftRotateWithImmediateOne(Inst))
+  if (AsmOptimize && X86::optimizeShiftRotateWithImmediateOne(Inst))
     return true;
 
   switch (Inst.getOpcode()) {
diff --git a/llvm/test/MC/X86/avx-64-att.s b/llvm/test/MC/X86/avx-64-att.s
index 39ee048c3736d4..7cdd93891c94cb 100644
--- a/llvm/test/MC/X86/avx-64-att.s
+++ b/llvm/test/MC/X86/avx-64-att.s
@@ -1,4 +1,5 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck --check-prefixes=CHECK,OPT %s
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding -x86-inline-asm-optimize=false %s | FileCheck --check-prefixes=CHECK,NOOPT %s
 
 // CHECK: vaddss  %xmm8, %xmm9, %xmm10
 // CHECK:  encoding: [0xc4,0x41,0x32,0x58,0xd0]
@@ -3168,20 +3169,28 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc4,0xc1,0x5d,0x5e,0xf4]
           vdivpd  %ymm12, %ymm4, %ymm6
 
-// CHECK: vaddps  %ymm4, %ymm12, %ymm6
-// CHECK: encoding: [0xc5,0x9c,0x58,0xf4]
+// OPT:   vaddps  %ymm4, %ymm12, %ymm6
+// OPT:   encoding: [0xc5,0x9c,0x58,0xf4]
+// NOOPT: vaddps  %ymm12, %ymm4, %ymm6
+// NOOPT: encoding: [0xc4,0xc1,0x5c,0x58,0xf4]
           vaddps  %ymm12, %ymm4, %ymm6
 
-// CHECK: vaddpd  %ymm4, %ymm12, %ymm6
-// CHECK: encoding: [0xc5,0x9d,0x58,0xf4]
+// OPT:   vaddpd  %ymm4, %ymm12, %ymm6
+// OPT:   encoding: [0xc5,0x9d,0x58,0xf4]
+// NOOPT: vaddpd  %ymm12, %ymm4, %ymm6
+// NOOPT: encoding: [0xc4,0xc1,0x5d,0x58,0xf4]
           vaddpd  %ymm12, %ymm4, %ymm6
 
-// CHECK: vmulps  %ymm4, %ymm12, %ymm6
-// CHECK: encoding: [0xc5,0x9c,0x59,0xf4]
+// OPT:   vmulps  %ymm4, %ymm12, %ymm6
+// OPT:   encoding: [0xc5,0x9c,0x59,0xf4]
+// NOOPT: vmulps  %ymm12, %ymm4, %ymm6
+// NOOPT: encoding: [0xc4,0xc1,0x5c,0x59,0xf4]
           vmulps  %ymm12, %ymm4, %ymm6
 
-// CHECK: vmulpd  %ymm4, %ymm12, %ymm6
-// CHECK: encoding: [0xc5,0x9d,0x59,0xf4]
+// OPT:   vmulpd  %ymm4, %ymm12, %ymm6
+// OPT:   encoding: [0xc5,0x9d,0x59,0xf4]
+// NOOPT: vmulpd  %ymm12, %ymm4, %ymm6
+// NOOPT: encoding: [0xc4,0xc1,0x5d,0x59,0xf4]
           vmulpd  %ymm12, %ymm4, %ymm6
 
 // CHECK: vmaxps  (%rax), %ymm4, %ymm6
@@ -4203,7 +4212,8 @@ _foo2:
           {vex3} vmovq %xmm0, %xmm8
 
 // CHECK: vmovq %xmm8, %xmm0
-// CHECK: encoding: [0xc5,0x79,0xd6,0xc0]
+// OPT:   encoding: [0xc5,0x79,0xd6,0xc0]
+// NOOPT: encoding: [0xc4,0xc1,0x7a,0x7e,0xc0]
           vmovq %xmm8, %xmm0
 
 // CHECK: vmovq %xmm8, %xmm0
@@ -4219,7 +4229,8 @@ _foo2:
           {vex3} vmovdqa %xmm0, %xmm8
 
 // CHECK: vmovdqa %xmm8, %xmm0
-// CHECK: encoding: [0xc5,0x79,0x7f,0xc0]
+// OPT:   encoding: [0xc5,0x79,0x7f,0xc0]
+// NOOPT: encoding: [0xc4,0xc1,0x79,0x6f,0xc0]
           vmovdqa %xmm8, %xmm0
 
 // CHECK: vmovdqa %xmm8, %xmm0
@@ -4235,7 +4246,8 @@ _foo2:
           {vex3} vmovdqu %xmm0, %xmm8
 
 // CHECK: vmovdqu %xmm8, %xmm0
-// CHECK: encoding: [0xc5,0x7a,0x7f,0xc0]
+// OPT:   encoding: [0xc5,0x7a,0x7f,0xc0]
+// NOOPT: encoding: [0xc4,0xc1,0x7a,0x6f,0xc0]
           vmovdqu %xmm8, %xmm0
 
 // CHECK: vmovdqu %xmm8, %xmm0
@@ -4251,7 +4263,8 @@ _foo2:
           {vex3} vmovaps %xmm0, %xmm8
 
 // CHECK: vmovaps %xmm8, %xmm0
-// CHECK: encoding: [0xc5,0x78,0x29,0xc0]
+// OPT:   encoding: [0xc5,0x78,0x29,0xc0]
+// NOOPT: encoding: [0xc4,0xc1,0x78,0x28,0xc0]
           vmovaps %xmm8, %xmm0
 
 // CHECK: vmovaps %xmm8, %xmm0
@@ -4267,7 +4280,8 @@ _foo2:
           {vex3} vmovaps %ymm0, %ymm8
 
 // CHECK: vmovaps %ymm8, %ymm0
-// CHECK: encoding: [0xc5,0x7c,0x29,0xc0]
+// OPT:   encoding: [0xc5,0x7c,0x29,0xc0]
+// NOOPT: encoding: [0xc4,0xc1,0x7c,0x28,0xc0]
           vmovaps %ymm8, %ymm0
 
 // CHECK: vmovaps %ymm8, %ymm0
@@ -4283,7 +4297,8 @@ _foo2:
           {vex3} vmovups %xmm0, %xmm8
 
 // CHECK: vmovups %xmm8, %xmm0
-// CHECK: encoding: [0xc5,0x78,0x11,0xc0]
+// OPT:   encoding: [0xc5,0x78,0x11,0xc0]
+// NOOPT: encoding: [0xc4,0xc1,0x78,0x10,0xc0]
           vmovups %xmm8, %xmm0
 
 // CHECK: vmovups %xmm8, %xmm0
@@ -4299,7 +4314,8 @@ _foo2:
           {vex3} vmovups %ymm0, %ymm8
 
 // CHECK: vmovups %ymm8, %ymm0
-// CHECK: encoding: [0xc5,0x7c,0x11,0xc0]
+// OPT:   encoding: [0xc5,0x7c,0x11,0xc0]
+// NOOPT: encoding: [0xc4,0xc1,0x7c,0x10,0xc0]
           vmovups %ymm8, %ymm0
 
 // CHECK: vmovups %ymm8, %ymm0
@@ -4323,7 +4339,8 @@ _foo2:
           {vex3} vmovss %xmm0, %xmm8, %xmm0
 
 // CHECK: vmovss %xmm8, %xmm0, %xmm0
-// CHECK: encoding: [0xc5,0x7a,0x11,0xc0]
+// OPT:   encoding: [0xc5,0x7a,0x11,0xc0]
+// NOOPT: encoding: [0xc4,0xc1,0x7a,0x10,0xc0]
           vmovss %xmm8, %xmm0, %xmm0
 
 // CHECK: vmovss %xmm8, %xmm0, %xmm0
@@ -4347,7 +4364,8 @@ _foo2:
           {vex3} vmovsd %xmm0, %xmm8, %xmm0
 
 // CHECK: vmovsd %xmm8, %xmm0, %xmm0
-// CHECK: encoding: [0xc5,0x7b,0x11,0xc0]
+// OPT:   encoding: [0xc5,0x7b,0x11,0xc0]
+// NOOPT: encoding: [0xc4,0xc1,0x7b,0x10,0xc0]
           vmovsd %xmm8, %xmm0, %xmm0
 
 // CHECK: vmovsd %xmm8, %xmm0, %xmm0

github-actions · 2023-12-19T05:59:19Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp

KanRobert · 2023-12-19T06:15:25Z

I think it should be a target-independent flag. Maybe we should add a flag "O0" for llvm-mc ?

kongy · 2023-12-19T06:25:17Z

I think it should be a target-independent flag. Maybe we should add a flag "O0" for llvm-mc ?

This is a great suggestion, but can we add this as a hidden flag for now to unblock Android's integration?

KanRobert · 2023-12-19T06:41:11Z

llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp

@@ -48,6 +48,10 @@ static cl::opt<bool> LVIInlineAsmHardening(
    cl::desc("Harden inline assembly code that may be vulnerable to Load Value"
             " Injection (LVI). This feature is experimental."), cl::Hidden);

+static cl::opt<bool> AsmOptimize("x86-asm-optimize", cl::init(true),
+                                 cl::desc("Optimize X86 inline assembly code."),


Comment needs to be updated too.

There are use cases that we are not expecting the assembler to produce the exact instructions without any optimizations.

phoebewang · 2023-12-19T07:20:33Z

The intention is not quite clear to me. For example, if you just want to always get VEX3 encoding, you can use {vex3} before the instruction. It is officially supported by both llvm-mc and gas. It looks less optimization than assembler preference to me.

kongy · 2023-12-19T07:38:32Z

The intention is not quite clear to me. For example, if you just want to always get VEX3 encoding, you can use {vex3} before the instruction. It is officially supported by both llvm-mc and gas. It looks less optimization than assembler preference to me.

We have tests that use clang assembler as a verification tool, to verify that our assembler generates the correct (i.e. the same) output. Therefore, clang assembler performing optimisations is undesired.

topperc · 2023-12-19T07:55:44Z

llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp

      X86::optimizeInstFromVEX3ToVEX2(Inst, MII.get(Inst.getOpcode())))
    return true;

-  if (X86::optimizeShiftRotateWithImmediateOne(Inst))
+  if (AsmOptimize && X86::optimizeShiftRotateWithImmediateOne(Inst))


@kongy does your assembler not optimize shifts/rotates by 1 to use the short form? That seems like a pretty basic optimization that has been around for x86 for a very long time.

After some investigation, our assembler (Android ART) does implement the optimizeShiftRotateWithImmediateOne optimization. I guess we only need to turn off the VEX3ToVEX2 optimization.

Do you still need this patch? If so, I guess you might need to change it to "x86-asm-vex3-to-vex2"?

We ended up implementing the same optimization in our assembler. This change is no longer required.

kongy requested a review from KanRobert December 19, 2023 05:56

llvmbot added backend:X86 mc Machine (object) code labels Dec 19, 2023

kongy force-pushed the x86-asm-no-opt branch from 26fb586 to 75bceeb Compare December 19, 2023 06:02

kongy requested review from pirama-arumuga-nainar, topperc and phoebewang December 19, 2023 06:03

KanRobert reviewed Dec 19, 2023

View reviewed changes

llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp Outdated Show resolved Hide resolved

kongy force-pushed the x86-asm-no-opt branch from 75bceeb to 8ec4144 Compare December 19, 2023 06:20

KanRobert reviewed Dec 19, 2023

View reviewed changes

Add option to turn off optimization for X86 assembler

c8a81be

There are use cases that we are not expecting the assembler to produce the exact instructions without any optimizations.

kongy force-pushed the x86-asm-no-opt branch from 8ec4144 to c8a81be Compare December 19, 2023 06:43

topperc reviewed Dec 19, 2023

View reviewed changes

kongy closed this Jan 29, 2024

kongy deleted the x86-asm-no-opt branch January 29, 2024 06:32

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add option to turn off optimization for X86 assembler #75895

Add option to turn off optimization for X86 assembler #75895

kongy commented Dec 19, 2023

llvmbot commented Dec 19, 2023 •

edited

github-actions bot commented Dec 19, 2023 •

edited

KanRobert commented Dec 19, 2023

kongy commented Dec 19, 2023

KanRobert Dec 19, 2023

kongy Dec 19, 2023

phoebewang commented Dec 19, 2023

kongy commented Dec 19, 2023

topperc Dec 19, 2023

kongy Dec 21, 2023

KanRobert Dec 27, 2023

kongy Jan 29, 2024

Add option to turn off optimization for X86 assembler #75895

Add option to turn off optimization for X86 assembler #75895

Conversation

kongy commented Dec 19, 2023

llvmbot commented Dec 19, 2023 • edited

github-actions bot commented Dec 19, 2023 • edited

KanRobert commented Dec 19, 2023

kongy commented Dec 19, 2023

KanRobert Dec 19, 2023

Choose a reason for hiding this comment

kongy Dec 19, 2023

Choose a reason for hiding this comment

phoebewang commented Dec 19, 2023

kongy commented Dec 19, 2023

topperc Dec 19, 2023

Choose a reason for hiding this comment

kongy Dec 21, 2023

Choose a reason for hiding this comment

KanRobert Dec 27, 2023

Choose a reason for hiding this comment

kongy Jan 29, 2024

Choose a reason for hiding this comment

llvmbot commented Dec 19, 2023 •

edited

github-actions bot commented Dec 19, 2023 •

edited