-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add option to turn off optimization for X86 assembler #75895
Conversation
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-x86 Author: Yi Kong (kongy) ChangesThere are use cases that we are not expecting the assembler to produce the exact instructions without any optimizations. Full diff: https://github.com/llvm/llvm-project/pull/75895.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 1d40ce35c1b416..5390dd94b760d8 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -48,6 +48,10 @@ static cl::opt<bool> LVIInlineAsmHardening(
cl::desc("Harden inline assembly code that may be vulnerable to Load Value"
" Injection (LVI). This feature is experimental."), cl::Hidden);
+static cl::opt<bool> AsmOptimize(
+ "x86-inline-asm-optimize", cl::init(true),
+ cl::desc("Optimize X86 inline assembly code."), cl::Hidden);
+
static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
@@ -3670,11 +3674,11 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
- if (ForcedVEXEncoding != VEXEncoding_VEX3 &&
+ if (AsmOptimize && ForcedVEXEncoding != VEXEncoding_VEX3 &&
X86::optimizeInstFromVEX3ToVEX2(Inst, MII.get(Inst.getOpcode())))
return true;
- if (X86::optimizeShiftRotateWithImmediateOne(Inst))
+ if (AsmOptimize && X86::optimizeShiftRotateWithImmediateOne(Inst))
return true;
switch (Inst.getOpcode()) {
diff --git a/llvm/test/MC/X86/avx-64-att.s b/llvm/test/MC/X86/avx-64-att.s
index 39ee048c3736d4..7cdd93891c94cb 100644
--- a/llvm/test/MC/X86/avx-64-att.s
+++ b/llvm/test/MC/X86/avx-64-att.s
@@ -1,4 +1,5 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck --check-prefixes=CHECK,OPT %s
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding -x86-inline-asm-optimize=false %s | FileCheck --check-prefixes=CHECK,NOOPT %s
// CHECK: vaddss %xmm8, %xmm9, %xmm10
// CHECK: encoding: [0xc4,0x41,0x32,0x58,0xd0]
@@ -3168,20 +3169,28 @@ vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11
// CHECK: encoding: [0xc4,0xc1,0x5d,0x5e,0xf4]
vdivpd %ymm12, %ymm4, %ymm6
-// CHECK: vaddps %ymm4, %ymm12, %ymm6
-// CHECK: encoding: [0xc5,0x9c,0x58,0xf4]
+// OPT: vaddps %ymm4, %ymm12, %ymm6
+// OPT: encoding: [0xc5,0x9c,0x58,0xf4]
+// NOOPT: vaddps %ymm12, %ymm4, %ymm6
+// NOOPT: encoding: [0xc4,0xc1,0x5c,0x58,0xf4]
vaddps %ymm12, %ymm4, %ymm6
-// CHECK: vaddpd %ymm4, %ymm12, %ymm6
-// CHECK: encoding: [0xc5,0x9d,0x58,0xf4]
+// OPT: vaddpd %ymm4, %ymm12, %ymm6
+// OPT: encoding: [0xc5,0x9d,0x58,0xf4]
+// NOOPT: vaddpd %ymm12, %ymm4, %ymm6
+// NOOPT: encoding: [0xc4,0xc1,0x5d,0x58,0xf4]
vaddpd %ymm12, %ymm4, %ymm6
-// CHECK: vmulps %ymm4, %ymm12, %ymm6
-// CHECK: encoding: [0xc5,0x9c,0x59,0xf4]
+// OPT: vmulps %ymm4, %ymm12, %ymm6
+// OPT: encoding: [0xc5,0x9c,0x59,0xf4]
+// NOOPT: vmulps %ymm12, %ymm4, %ymm6
+// NOOPT: encoding: [0xc4,0xc1,0x5c,0x59,0xf4]
vmulps %ymm12, %ymm4, %ymm6
-// CHECK: vmulpd %ymm4, %ymm12, %ymm6
-// CHECK: encoding: [0xc5,0x9d,0x59,0xf4]
+// OPT: vmulpd %ymm4, %ymm12, %ymm6
+// OPT: encoding: [0xc5,0x9d,0x59,0xf4]
+// NOOPT: vmulpd %ymm12, %ymm4, %ymm6
+// NOOPT: encoding: [0xc4,0xc1,0x5d,0x59,0xf4]
vmulpd %ymm12, %ymm4, %ymm6
// CHECK: vmaxps (%rax), %ymm4, %ymm6
@@ -4203,7 +4212,8 @@ _foo2:
{vex3} vmovq %xmm0, %xmm8
// CHECK: vmovq %xmm8, %xmm0
-// CHECK: encoding: [0xc5,0x79,0xd6,0xc0]
+// OPT: encoding: [0xc5,0x79,0xd6,0xc0]
+// NOOPT: encoding: [0xc4,0xc1,0x7a,0x7e,0xc0]
vmovq %xmm8, %xmm0
// CHECK: vmovq %xmm8, %xmm0
@@ -4219,7 +4229,8 @@ _foo2:
{vex3} vmovdqa %xmm0, %xmm8
// CHECK: vmovdqa %xmm8, %xmm0
-// CHECK: encoding: [0xc5,0x79,0x7f,0xc0]
+// OPT: encoding: [0xc5,0x79,0x7f,0xc0]
+// NOOPT: encoding: [0xc4,0xc1,0x79,0x6f,0xc0]
vmovdqa %xmm8, %xmm0
// CHECK: vmovdqa %xmm8, %xmm0
@@ -4235,7 +4246,8 @@ _foo2:
{vex3} vmovdqu %xmm0, %xmm8
// CHECK: vmovdqu %xmm8, %xmm0
-// CHECK: encoding: [0xc5,0x7a,0x7f,0xc0]
+// OPT: encoding: [0xc5,0x7a,0x7f,0xc0]
+// NOOPT: encoding: [0xc4,0xc1,0x7a,0x6f,0xc0]
vmovdqu %xmm8, %xmm0
// CHECK: vmovdqu %xmm8, %xmm0
@@ -4251,7 +4263,8 @@ _foo2:
{vex3} vmovaps %xmm0, %xmm8
// CHECK: vmovaps %xmm8, %xmm0
-// CHECK: encoding: [0xc5,0x78,0x29,0xc0]
+// OPT: encoding: [0xc5,0x78,0x29,0xc0]
+// NOOPT: encoding: [0xc4,0xc1,0x78,0x28,0xc0]
vmovaps %xmm8, %xmm0
// CHECK: vmovaps %xmm8, %xmm0
@@ -4267,7 +4280,8 @@ _foo2:
{vex3} vmovaps %ymm0, %ymm8
// CHECK: vmovaps %ymm8, %ymm0
-// CHECK: encoding: [0xc5,0x7c,0x29,0xc0]
+// OPT: encoding: [0xc5,0x7c,0x29,0xc0]
+// NOOPT: encoding: [0xc4,0xc1,0x7c,0x28,0xc0]
vmovaps %ymm8, %ymm0
// CHECK: vmovaps %ymm8, %ymm0
@@ -4283,7 +4297,8 @@ _foo2:
{vex3} vmovups %xmm0, %xmm8
// CHECK: vmovups %xmm8, %xmm0
-// CHECK: encoding: [0xc5,0x78,0x11,0xc0]
+// OPT: encoding: [0xc5,0x78,0x11,0xc0]
+// NOOPT: encoding: [0xc4,0xc1,0x78,0x10,0xc0]
vmovups %xmm8, %xmm0
// CHECK: vmovups %xmm8, %xmm0
@@ -4299,7 +4314,8 @@ _foo2:
{vex3} vmovups %ymm0, %ymm8
// CHECK: vmovups %ymm8, %ymm0
-// CHECK: encoding: [0xc5,0x7c,0x11,0xc0]
+// OPT: encoding: [0xc5,0x7c,0x11,0xc0]
+// NOOPT: encoding: [0xc4,0xc1,0x7c,0x10,0xc0]
vmovups %ymm8, %ymm0
// CHECK: vmovups %ymm8, %ymm0
@@ -4323,7 +4339,8 @@ _foo2:
{vex3} vmovss %xmm0, %xmm8, %xmm0
// CHECK: vmovss %xmm8, %xmm0, %xmm0
-// CHECK: encoding: [0xc5,0x7a,0x11,0xc0]
+// OPT: encoding: [0xc5,0x7a,0x11,0xc0]
+// NOOPT: encoding: [0xc4,0xc1,0x7a,0x10,0xc0]
vmovss %xmm8, %xmm0, %xmm0
// CHECK: vmovss %xmm8, %xmm0, %xmm0
@@ -4347,7 +4364,8 @@ _foo2:
{vex3} vmovsd %xmm0, %xmm8, %xmm0
// CHECK: vmovsd %xmm8, %xmm0, %xmm0
-// CHECK: encoding: [0xc5,0x7b,0x11,0xc0]
+// OPT: encoding: [0xc5,0x7b,0x11,0xc0]
+// NOOPT: encoding: [0xc4,0xc1,0x7b,0x10,0xc0]
vmovsd %xmm8, %xmm0, %xmm0
// CHECK: vmovsd %xmm8, %xmm0, %xmm0
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
26fb586
to
75bceeb
Compare
I think it should be a target-independent flag. Maybe we should add a flag "O0" for llvm-mc ? |
75bceeb
to
8ec4144
Compare
This is a great suggestion, but can we add this as a hidden flag for now to unblock Android's integration? |
@@ -48,6 +48,10 @@ static cl::opt<bool> LVIInlineAsmHardening( | |||
cl::desc("Harden inline assembly code that may be vulnerable to Load Value" | |||
" Injection (LVI). This feature is experimental."), cl::Hidden); | |||
|
|||
static cl::opt<bool> AsmOptimize("x86-asm-optimize", cl::init(true), | |||
cl::desc("Optimize X86 inline assembly code."), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Comment needs to be updated too.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There are use cases that we are not expecting the assembler to produce the exact instructions without any optimizations.
8ec4144
to
c8a81be
Compare
The intention is not quite clear to me. For example, if you just want to always get VEX3 encoding, you can use |
We have tests that use clang assembler as a verification tool, to verify that our assembler generates the correct (i.e. the same) output. Therefore, clang assembler performing optimisations is undesired. |
X86::optimizeInstFromVEX3ToVEX2(Inst, MII.get(Inst.getOpcode()))) | ||
return true; | ||
|
||
if (X86::optimizeShiftRotateWithImmediateOne(Inst)) | ||
if (AsmOptimize && X86::optimizeShiftRotateWithImmediateOne(Inst)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@kongy does your assembler not optimize shifts/rotates by 1 to use the short form? That seems like a pretty basic optimization that has been around for x86 for a very long time.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After some investigation, our assembler (Android ART) does implement the optimizeShiftRotateWithImmediateOne optimization. I guess we only need to turn off the VEX3ToVEX2 optimization.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you still need this patch? If so, I guess you might need to change it to "x86-asm-vex3-to-vex2"?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We ended up implementing the same optimization in our assembler. This change is no longer required.
There are use cases that we are not expecting the assembler to produce the exact instructions without any optimizations.