diff --git a/llvm/test/CodeGen/X86/AMX/amx-config.ll b/llvm/test/CodeGen/X86/AMX/amx-config.ll index c66466dc53703e..453ce113c34b90 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-config.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-config.ll @@ -1,43 +1,105 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -verify-machineinstrs | FileCheck %s --check-prefix=AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx2 -verify-machineinstrs | FileCheck %s --check-prefix=AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -verify-machineinstrs | FileCheck %s --check-prefix=SSE2 @buf = dso_local global [1024 x i8] zeroinitializer, align 64 @buf2 = dso_local global [1024 x i8] zeroinitializer, align 64 ; Function Attrs: nounwind uwtable define dso_local void @test_api(i32 %0, i16 signext %1, i16 signext %2) { -; CHECK-LABEL: test_api: -; CHECK: # %bb.0: -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: movsbl %sil, %eax -; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 -; CHECK-NEXT: vmovdqu64 %zmm0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movb $1, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %si, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %dx, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %dx, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) -; CHECK-NEXT: je .LBB0_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: movl $buf, %ecx -; CHECK-NEXT: jmp .LBB0_3 -; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: movl $buf2, %ecx -; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: movl $32, %edi -; CHECK-NEXT: tileloadd (%rcx,%rdi), %tmm0 -; CHECK-NEXT: tileloadd (%rcx,%rdi), %tmm2 -; CHECK-NEXT: tileloadd (%rcx,%rdi), %tmm1 -; CHECK-NEXT: tdpbssd %tmm2, %tmm0, %tmm1 -; CHECK-NEXT: movl $buf, %ecx -; CHECK-NEXT: movl $32, %esi -; CHECK-NEXT: tilestored %tmm1, (%rcx,%rsi) -; CHECK-NEXT: tilerelease -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; AVX512-LABEL: test_api: +; AVX512: # %bb.0: +; AVX512-NEXT: testl %edi, %edi +; AVX512-NEXT: movsbl %sil, %eax +; AVX512-NEXT: vpxord %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vmovdqu64 %zmm0, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: movb $1, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: movw %si, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: movw %dx, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: movw %dx, -{{[0-9]+}}(%rsp) +; AVX512-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) +; AVX512-NEXT: je .LBB0_2 +; AVX512-NEXT: # %bb.1: +; AVX512-NEXT: movl $buf, %ecx +; AVX512-NEXT: jmp .LBB0_3 +; AVX512-NEXT: .LBB0_2: +; AVX512-NEXT: movl $buf2, %ecx +; AVX512-NEXT: .LBB0_3: +; AVX512-NEXT: movl $32, %edi +; AVX512-NEXT: tileloadd (%rcx,%rdi), %tmm0 +; AVX512-NEXT: tileloadd (%rcx,%rdi), %tmm2 +; AVX512-NEXT: tileloadd (%rcx,%rdi), %tmm1 +; AVX512-NEXT: tdpbssd %tmm2, %tmm0, %tmm1 +; AVX512-NEXT: movl $buf, %ecx +; AVX512-NEXT: movl $32, %esi +; AVX512-NEXT: tilestored %tmm1, (%rcx,%rsi) +; AVX512-NEXT: tilerelease +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +; +; AVX2-LABEL: test_api: +; AVX2: # %bb.0: +; AVX2-NEXT: testl %edi, %edi +; AVX2-NEXT: movsbl %sil, %eax +; AVX2-NEXT: movb $1, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movw %si, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movw %dx, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: movw %dx, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) +; AVX2-NEXT: je .LBB0_2 +; AVX2-NEXT: # %bb.1: +; AVX2-NEXT: movl $buf, %ecx +; AVX2-NEXT: jmp .LBB0_3 +; AVX2-NEXT: .LBB0_2: +; AVX2-NEXT: movl $buf2, %ecx +; AVX2-NEXT: .LBB0_3: +; AVX2-NEXT: movl $32, %edi +; AVX2-NEXT: tileloadd (%rcx,%rdi), %tmm0 +; AVX2-NEXT: tileloadd (%rcx,%rdi), %tmm2 +; AVX2-NEXT: tileloadd (%rcx,%rdi), %tmm1 +; AVX2-NEXT: tdpbssd %tmm2, %tmm0, %tmm1 +; AVX2-NEXT: movl $buf, %ecx +; AVX2-NEXT: movl $32, %esi +; AVX2-NEXT: tilestored %tmm1, (%rcx,%rsi) +; AVX2-NEXT: tilerelease +; AVX2-NEXT: retq +; +; SSE2-LABEL: test_api: +; SSE2: # %bb.0: +; SSE2-NEXT: testl %edi, %edi +; SSE2-NEXT: movsbl %sil, %eax +; SSE2-NEXT: movb $1, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movw %si, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movw %dx, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movw %dx, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) +; SSE2-NEXT: je .LBB0_2 +; SSE2-NEXT: # %bb.1: +; SSE2-NEXT: movl $buf, %ecx +; SSE2-NEXT: jmp .LBB0_3 +; SSE2-NEXT: .LBB0_2: +; SSE2-NEXT: movl $buf2, %ecx +; SSE2-NEXT: .LBB0_3: +; SSE2-NEXT: movl $32, %edi +; SSE2-NEXT: tileloadd (%rcx,%rdi), %tmm0 +; SSE2-NEXT: tileloadd (%rcx,%rdi), %tmm2 +; SSE2-NEXT: tileloadd (%rcx,%rdi), %tmm1 +; SSE2-NEXT: tdpbssd %tmm2, %tmm0, %tmm1 +; SSE2-NEXT: movl $buf, %ecx +; SSE2-NEXT: movl $32, %esi +; SSE2-NEXT: tilestored %tmm1, (%rcx,%rsi) +; SSE2-NEXT: tilerelease +; SSE2-NEXT: retq %4 = icmp eq i32 %0, 0 %5 = shl i16 %1, 8 %6 = ashr exact i16 %5, 8