Skip to content

Commit

Permalink
[Utils][x86] add an option to reduce scrubbing of shuffles with memops
Browse files Browse the repository at this point in the history
I was drafting a patch that would increase broadcast load usage,
but our shuffle scrubbing makes it impossible to see if the memory
operand offset was getting created correctly. I'm proposing to make
that an option (defaulted to 'off' for now to reduce regression
test churn).

The updated files provide examples of tests where we can now verify
that the pointer offset for a loaded memory operand is correct. We
still have stack and constant scrubbing that can obscure the operand
even if we don't scrub the entire instruction.

Differential Revision: https://reviews.llvm.org/D74775
  • Loading branch information
rotateright committed Feb 20, 2020
1 parent 977cd66 commit 15e20dc
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 22 deletions.
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/avx-splat.ll
Expand Up @@ -169,12 +169,12 @@ define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
; X86-LABEL: splat_load_2f64_11:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X86-NEXT: vmovddup 8(%eax), %xmm0 # xmm0 = mem[0,0]
; X86-NEXT: retl
;
; X64-LABEL: splat_load_2f64_11:
; X64: # %bb.0:
; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; X64-NEXT: vmovddup 8(%rdi), %xmm0 # xmm0 = mem[0,0]
; X64-NEXT: retq
%x = load <2 x double>, <2 x double>* %ptr
%x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>
Expand Down
38 changes: 19 additions & 19 deletions llvm/test/CodeGen/X86/extractelement-load.ll
Expand Up @@ -9,13 +9,13 @@ define i32 @t(<2 x i64>* %val) nounwind {
; X32-SSE2-LABEL: t:
; X32-SSE2: # %bb.0:
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
; X32-SSE2-NEXT: pshufd $78, (%eax), %xmm0 # xmm0 = mem[2,3,0,1]
; X32-SSE2-NEXT: movd %xmm0, %eax
; X32-SSE2-NEXT: retl
;
; X64-SSSE3-LABEL: t:
; X64-SSSE3: # %bb.0:
; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
; X64-SSSE3-NEXT: pshufd $78, (%rdi), %xmm0 # xmm0 = mem[2,3,0,1]
; X64-SSSE3-NEXT: movd %xmm0, %eax
; X64-SSSE3-NEXT: retq
;
Expand Down Expand Up @@ -59,13 +59,13 @@ define void @t3(<2 x double>* %a0) {
;
; X64-SSSE3-LABEL: t3:
; X64-SSSE3: # %bb.0: # %bb
; X64-SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X64-SSSE3-NEXT: movsd 8(%rdi), %xmm0 # xmm0 = mem[0],zero
; X64-SSSE3-NEXT: movsd %xmm0, (%rax)
; X64-SSSE3-NEXT: retq
;
; X64-AVX-LABEL: t3:
; X64-AVX: # %bb.0: # %bb
; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X64-AVX-NEXT: vmovsd 8(%rdi), %xmm0 # xmm0 = mem[0],zero
; X64-AVX-NEXT: vmovsd %xmm0, (%rax)
; X64-AVX-NEXT: retq
bb:
Expand Down Expand Up @@ -138,7 +138,7 @@ define float @t6(<8 x float> *%a0) {
; X32-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; X32-SSE2-NEXT: xorps %xmm1, %xmm1
; X32-SSE2-NEXT: cmpeqss %xmm0, %xmm1
; X32-SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-SSE2-NEXT: movss {{\.LCPI.*}}, %xmm2 # xmm2 = mem[0],zero,zero,zero
; X32-SSE2-NEXT: andps %xmm1, %xmm2
; X32-SSE2-NEXT: andnps %xmm0, %xmm1
; X32-SSE2-NEXT: orps %xmm2, %xmm1
Expand All @@ -150,21 +150,21 @@ define float @t6(<8 x float> *%a0) {
;
; X64-SSSE3-LABEL: t6:
; X64-SSSE3: # %bb.0:
; X64-SSSE3-NEXT: movshdup {{.*#+}} xmm1 = mem[1,1,3,3]
; X64-SSSE3-NEXT: movshdup (%rdi), %xmm1 # xmm1 = mem[1,1,3,3]
; X64-SSSE3-NEXT: xorps %xmm0, %xmm0
; X64-SSSE3-NEXT: cmpeqss %xmm1, %xmm0
; X64-SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X64-SSSE3-NEXT: movss {{.*}}(%rip), %xmm2 # xmm2 = mem[0],zero,zero,zero
; X64-SSSE3-NEXT: andps %xmm0, %xmm2
; X64-SSSE3-NEXT: andnps %xmm1, %xmm0
; X64-SSSE3-NEXT: orps %xmm2, %xmm0
; X64-SSSE3-NEXT: retq
;
; X64-AVX-LABEL: t6:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: vmovshdup {{.*#+}} xmm0 = mem[1,1,3,3]
; X64-AVX-NEXT: vmovshdup (%rdi), %xmm0 # xmm0 = mem[1,1,3,3]
; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm1
; X64-AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X64-AVX-NEXT: vmovss {{.*}}(%rip), %xmm2 # xmm2 = mem[0],zero,zero,zero
; X64-AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
; X64-AVX-NEXT: retq
%vecload = load <8 x float>, <8 x float>* %a0, align 32
Expand All @@ -183,7 +183,7 @@ define void @PR43971(<8 x float> *%a0, float *%a1) {
; X32-SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; X32-SSE2-NEXT: xorps %xmm1, %xmm1
; X32-SSE2-NEXT: cmpltss %xmm0, %xmm1
; X32-SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-SSE2-NEXT: movss (%eax), %xmm2 # xmm2 = mem[0],zero,zero,zero
; X32-SSE2-NEXT: andps %xmm1, %xmm2
; X32-SSE2-NEXT: andnps %xmm0, %xmm1
; X32-SSE2-NEXT: orps %xmm2, %xmm1
Expand All @@ -192,10 +192,10 @@ define void @PR43971(<8 x float> *%a0, float *%a1) {
;
; X64-SSSE3-LABEL: PR43971:
; X64-SSSE3: # %bb.0: # %entry
; X64-SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-SSSE3-NEXT: movss 24(%rdi), %xmm0 # xmm0 = mem[0],zero,zero,zero
; X64-SSSE3-NEXT: xorps %xmm1, %xmm1
; X64-SSSE3-NEXT: cmpltss %xmm0, %xmm1
; X64-SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X64-SSSE3-NEXT: movss (%rsi), %xmm2 # xmm2 = mem[0],zero,zero,zero
; X64-SSSE3-NEXT: andps %xmm1, %xmm2
; X64-SSSE3-NEXT: andnps %xmm0, %xmm1
; X64-SSSE3-NEXT: orps %xmm2, %xmm1
Expand All @@ -204,10 +204,10 @@ define void @PR43971(<8 x float> *%a0, float *%a1) {
;
; X64-AVX-LABEL: PR43971:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
; X64-AVX-NEXT: vpermilpd $1, 16(%rdi), %xmm0 # xmm0 = mem[1,0]
; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-AVX-NEXT: vcmpltss %xmm0, %xmm1, %xmm1
; X64-AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X64-AVX-NEXT: vmovss (%rsi), %xmm2 # xmm2 = mem[0],zero,zero,zero
; X64-AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
; X64-AVX-NEXT: vmovss %xmm0, (%rsi)
; X64-AVX-NEXT: retq
Expand All @@ -230,7 +230,7 @@ define float @PR43971_1(<8 x float> *%a0) nounwind {
; X32-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; X32-SSE2-NEXT: xorps %xmm1, %xmm1
; X32-SSE2-NEXT: cmpeqss %xmm0, %xmm1
; X32-SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-SSE2-NEXT: movss {{\.LCPI.*}}, %xmm2 # xmm2 = mem[0],zero,zero,zero
; X32-SSE2-NEXT: andps %xmm1, %xmm2
; X32-SSE2-NEXT: andnps %xmm0, %xmm1
; X32-SSE2-NEXT: orps %xmm2, %xmm1
Expand All @@ -241,21 +241,21 @@ define float @PR43971_1(<8 x float> *%a0) nounwind {
;
; X64-SSSE3-LABEL: PR43971_1:
; X64-SSSE3: # %bb.0: # %entry
; X64-SSSE3-NEXT: movshdup {{.*#+}} xmm1 = mem[1,1,3,3]
; X64-SSSE3-NEXT: movshdup (%rdi), %xmm1 # xmm1 = mem[1,1,3,3]
; X64-SSSE3-NEXT: xorps %xmm0, %xmm0
; X64-SSSE3-NEXT: cmpeqss %xmm1, %xmm0
; X64-SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X64-SSSE3-NEXT: movss {{.*}}(%rip), %xmm2 # xmm2 = mem[0],zero,zero,zero
; X64-SSSE3-NEXT: andps %xmm0, %xmm2
; X64-SSSE3-NEXT: andnps %xmm1, %xmm0
; X64-SSSE3-NEXT: orps %xmm2, %xmm0
; X64-SSSE3-NEXT: retq
;
; X64-AVX-LABEL: PR43971_1:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: vmovshdup {{.*#+}} xmm0 = mem[1,1,3,3]
; X64-AVX-NEXT: vmovshdup (%rdi), %xmm0 # xmm0 = mem[1,1,3,3]
; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm1
; X64-AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X64-AVX-NEXT: vmovss {{.*}}(%rip), %xmm2 # xmm2 = mem[0],zero,zero,zero
; X64-AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
; X64-AVX-NEXT: retq
entry:
Expand Down
13 changes: 12 additions & 1 deletion llvm/utils/UpdateTestChecks/asm.py
Expand Up @@ -148,6 +148,12 @@ class string:
re.compile(
r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$',
flags=re.M))

SCRUB_X86_SHUFFLES_NO_MEM_RE = (
re.compile(
r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = (?!.*(?:mem)).*)$',
flags=re.M))

SCRUB_X86_SPILL_RELOAD_RE = (
re.compile(
r'-?\d+\(%([er])[sb]p\)(.*(?:Spill|Reload))$',
Expand All @@ -163,8 +169,13 @@ def scrub_asm_x86(asm, args):
asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
# Expand the tabs used for indentation.
asm = string.expandtabs(asm, 2)

# Detect shuffle asm comments and hide the operands in favor of the comments.
asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
if getattr(args, 'no_x86_scrub_mem_shuffle', True):
asm = SCRUB_X86_SHUFFLES_NO_MEM_RE.sub(r'\1 {{.*#+}} \2', asm)
else:
asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)

# Detect stack spills and reloads and hide their exact offset and whether
# they used the stack pointer or frame pointer.
asm = SCRUB_X86_SPILL_RELOAD_RE.sub(r'{{[-0-9]+}}(%\1{{[sb]}}p)\2', asm)
Expand Down
3 changes: 3 additions & 0 deletions llvm/utils/update_llc_test_checks.py
Expand Up @@ -36,6 +36,9 @@ def main():
help='Use more regex for x86 matching to reduce diffs between various subtargets')
parser.add_argument(
'--no_x86_scrub_rip', action='store_false', dest='x86_scrub_rip')
parser.add_argument(
'--no_x86_scrub_mem_shuffle', action='store_true', default=False,
help='Reduce scrubbing shuffles with memory operands')
parser.add_argument('tests', nargs='+')
args = common.parse_commandline_args(parser)

Expand Down

0 comments on commit 15e20dc

Please sign in to comment.