Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[X86]Support lowering for APX Promoted SHA/MOVDIR/CRC32/INVPCID/CET instructions #76786

Merged
merged 7 commits into from
Jan 5, 2024

Conversation

XinWang10
Copy link
Contributor

@XinWang10 XinWang10 commented Jan 3, 2024

R16-R31 was added into GPRs in #70958,
This patch supports the lowering for promoted SHA/MOVDIR/CRC32/INVPCID/CET.

RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4

@XinWang10 XinWang10 marked this pull request as ready for review January 3, 2024 07:31
@llvmbot
Copy link
Collaborator

llvmbot commented Jan 3, 2024

@llvm/pr-subscribers-backend-x86

Author: None (XinWang10)

Changes

R16-R31 was added into GPRs in #70958,
This patch supports the lowering for promoted SHA/MOVDIR/CRC32/INVPCID.

RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4


Patch is 22.11 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76786.diff

9 Files Affected:

  • (modified) llvm/lib/Target/X86/X86FastISel.cpp (+4-4)
  • (modified) llvm/lib/Target/X86/X86InstrSystem.td (+10-3)
  • (modified) llvm/lib/Target/X86/X86InstrVMX.td (+4-4)
  • (added) llvm/test/CodeGen/X86/apx/cet.ll (+50)
  • (added) llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll (+61)
  • (added) llvm/test/CodeGen/X86/apx/crc32.ll (+58)
  • (added) llvm/test/CodeGen/X86/apx/invpcid.ll (+27)
  • (added) llvm/test/CodeGen/X86/apx/movdir.ll (+38)
  • (added) llvm/test/CodeGen/X86/apx/sha.ll (+186)
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 0ba31e173a1a72..8b4ff4c8ed878f 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3047,19 +3047,19 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
     default:
       llvm_unreachable("Unexpected intrinsic.");
     case Intrinsic::x86_sse42_crc32_32_8:
-      Opc = X86::CRC32r32r8;
+      Opc = Subtarget->hasEGPR() ? X86::CRC32r32r8_EVEX : X86::CRC32r32r8;
       RC = &X86::GR32RegClass;
       break;
     case Intrinsic::x86_sse42_crc32_32_16:
-      Opc = X86::CRC32r32r16;
+      Opc = Subtarget->hasEGPR() ? X86::CRC32r32r16_EVEX : X86::CRC32r32r16;
       RC = &X86::GR32RegClass;
       break;
     case Intrinsic::x86_sse42_crc32_32_32:
-      Opc = X86::CRC32r32r32;
+      Opc = Subtarget->hasEGPR() ? X86::CRC32r32r32_EVEX : X86::CRC32r32r32;
       RC = &X86::GR32RegClass;
       break;
     case Intrinsic::x86_sse42_crc32_64_64:
-      Opc = X86::CRC32r64r64;
+      Opc = Subtarget->hasEGPR() ? X86::CRC32r64r64_EVEX : X86::CRC32r64r64;
       RC = &X86::GR64RegClass;
       break;
     }
diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td
index 699e5847e63fb9..30530a00809f3f 100644
--- a/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/llvm/lib/Target/X86/X86InstrSystem.td
@@ -695,14 +695,14 @@ def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
                   Requires<[Not64BitMode, HasINVPCID]>;
 def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
                   "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
-                  Requires<[In64BitMode, HasINVPCID]>;
+                  Requires<[In64BitMode, HasINVPCID, NoEGPR]>;
 
 def INVPCID64_EVEX : I<0xF2, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
                        "invpcid\t{$src2, $src1|$src1, $src2}", []>,
-                     EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID]>;
+                     EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID, HasEGPR]>;
 } // SchedRW
 
-let Predicates = [In64BitMode, HasINVPCID] in {
+let Predicates = [In64BitMode, HasINVPCID, NoEGPR] in {
   // The instruction can only use a 64 bit register as the register argument
   // in 64 bit mode, while the intrinsic only accepts a 32 bit argument
   // corresponding to it.
@@ -714,6 +714,13 @@ let Predicates = [In64BitMode, HasINVPCID] in {
               addr:$src2)>;
 }
 
+let Predicates = [In64BitMode, HasINVPCID, HasEGPR] in {
+  def : Pat<(int_x86_invpcid GR32:$src1, addr:$src2),
+            (INVPCID64_EVEX
+              (SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src1), sub_32bit),
+              addr:$src2)>;
+}
+
 
 //===----------------------------------------------------------------------===//
 // SMAP Instruction
diff --git a/llvm/lib/Target/X86/X86InstrVMX.td b/llvm/lib/Target/X86/X86InstrVMX.td
index 7cc468fe15ad4e..e6722467897216 100644
--- a/llvm/lib/Target/X86/X86InstrVMX.td
+++ b/llvm/lib/Target/X86/X86InstrVMX.td
@@ -21,10 +21,10 @@ def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
                Requires<[Not64BitMode]>;
 def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
                "invept\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
-               Requires<[In64BitMode]>;
+               Requires<[In64BitMode, NoEGPR]>;
 def INVEPT64_EVEX : I<0xF0, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
                       "invept\t{$src2, $src1|$src1, $src2}", []>,
-                    EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>;
+                    EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasEGPR]>;
 
 // 66 0F 38 81
 def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
@@ -32,10 +32,10 @@ def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
                 Requires<[Not64BitMode]>;
 def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
                 "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
-                Requires<[In64BitMode]>;
+                Requires<[In64BitMode, NoEGPR]>;
 def INVVPID64_EVEX : I<0xF1, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
                        "invvpid\t{$src2, $src1|$src1, $src2}", []>,
-                     EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>;
+                     EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasEGPR]>;
 
 // 0F 01 C1
 def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
diff --git a/llvm/test/CodeGen/X86/apx/cet.ll b/llvm/test/CodeGen/X86/apx/cet.ll
new file mode 100644
index 00000000000000..98f3844d1ccd19
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/cet.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+shstk,+egpr --show-mc-encoding | FileCheck %s
+
+define void @test_wrssd(i32 %a, ptr %__p) {
+; CHECK-LABEL: test_wrssd:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    wrssd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7c,0x08,0x66,0x3e]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+entry:
+  tail call void @llvm.x86.wrssd(i32 %a, ptr %__p)
+  ret void
+}
+
+declare void @llvm.x86.wrssd(i32, ptr)
+
+define void @test_wrssq(i64 %a, ptr %__p) {
+; CHECK-LABEL: test_wrssq:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    wrssq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfc,0x08,0x66,0x3e]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+entry:
+  tail call void @llvm.x86.wrssq(i64 %a, ptr %__p)
+  ret void
+}
+
+declare void @llvm.x86.wrssq(i64, ptr)
+
+define void @test_wrussd(i32 %a, ptr %__p) {
+; CHECK-LABEL: test_wrussd:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    wrussd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7d,0x08,0x65,0x3e]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+entry:
+  tail call void @llvm.x86.wrussd(i32 %a, ptr %__p)
+  ret void
+}
+
+declare void @llvm.x86.wrussd(i32, ptr)
+
+define void @test_wrussq(i64 %a, ptr %__p) {
+; CHECK-LABEL: test_wrussq:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    wrussq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfd,0x08,0x65,0x3e]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+entry:
+  tail call void @llvm.x86.wrussq(i64 %a, ptr %__p)
+  ret void
+}
+
+declare void @llvm.x86.wrussq(i64, ptr)
diff --git a/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll b/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll
new file mode 100644
index 00000000000000..0b51679ccd7fb5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32,+egpr --show-mc-encoding | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32,+egpr --show-mc-encoding | FileCheck %s
+
+define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind {
+; CHECK-LABEL: test_mm_crc32_u8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT:    crc32b %sil, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %trunc = trunc i32 %a1 to i8
+  %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc)
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone
+
+define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind {
+; CHECK-LABEL: test_mm_crc32_u16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT:    crc32w %si, %eax # encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %trunc = trunc i32 %a1 to i16
+  %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc)
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind readnone
+
+define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind {
+; CHECK-LABEL: test_mm_crc32_u32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT:    crc32l %esi, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind readnone
+
+define i64 @test_mm_crc64_u8(i64 %a0, i32 %a1) nounwind{
+; CHECK-LABEL: test_mm_crc64_u8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    crc32b %sil, %edi # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xfe]
+; CHECK-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %trunc = trunc i32 %a1 to i8
+  %res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %trunc)
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind readnone
+
+define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{
+; CHECK-LABEL: test_mm_crc64_u64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; CHECK-NEXT:    crc32q %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/apx/crc32.ll b/llvm/test/CodeGen/X86/apx/crc32.ll
new file mode 100644
index 00000000000000..4bcc4d15cc6b5a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/crc32.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s
+
+define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
+; CHECK-LABEL: crc32_32_8:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; CHECK-NEXT:    crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
+  ret i32 %tmp
+}
+
+define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
+; CHECK-LABEL: crc32_32_16:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; CHECK-NEXT:    crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
+  ret i32 %tmp
+}
+
+define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: crc32_32_32:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
+; CHECK-NEXT:    crc32l %esi, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
+
+define i64 @crc32_64_8(i64 %a, i8 %b) nounwind {
+; CHECK-LABEL: crc32_64_8:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
+; CHECK-NEXT:    crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b)
+  ret i64 %tmp
+}
+
+define i64 @crc32_64_64(i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: crc32_64_64:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
+; CHECK-NEXT:    crc32q %rsi, %rax ## encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b)
+  ret i64 %tmp
+}
+
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
+declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
+declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
diff --git a/llvm/test/CodeGen/X86/apx/invpcid.ll b/llvm/test/CodeGen/X86/apx/invpcid.ll
new file mode 100644
index 00000000000000..389895f4921305
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/invpcid.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+invpcid,+egpr --show-mc-encoding | FileCheck %s
+
+define void @test_invpcid(i32 %type, ptr %descriptor) {
+; CHECK-LABEL: test_invpcid:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT:    invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  call void @llvm.x86.invpcid(i32 %type, ptr %descriptor)
+  ret void
+}
+
+define void @test_invpcid2(ptr readonly %type, ptr %descriptor) {
+; CHECK-LABEL: test_invpcid2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl (%rdi), %eax # encoding: [0x8b,0x07]
+; CHECK-NEXT:    invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = load i32, ptr %type, align 4
+  tail call void @llvm.x86.invpcid(i32 %0, ptr %descriptor) #1
+  ret void
+}
+
+declare void @llvm.x86.invpcid(i32, ptr)
diff --git a/llvm/test/CodeGen/X86/apx/movdir.ll b/llvm/test/CodeGen/X86/apx/movdir.ll
new file mode 100644
index 00000000000000..06fd7511bc143c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/movdir.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri,+movdir64b,+egpr --show-mc-encoding | FileCheck %s
+
+define void @test_movdiri(ptr %p, i32 %v) {
+; CHECK-LABEL: test_movdiri:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movdiri %esi, (%rdi) # encoding: [0x62,0xf4,0x7c,0x08,0xf9,0x37]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  call void @llvm.x86.directstore32(ptr %p, i32 %v)
+  ret void
+}
+
+declare void @llvm.x86.directstore32(ptr, i32)
+
+define void @test_movdiri_64(ptr %p, i64 %v) {
+; CHECK-LABEL: test_movdiri_64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movdiri %rsi, (%rdi) # encoding: [0x62,0xf4,0xfc,0x08,0xf9,0x37]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  call void @llvm.x86.directstore64(ptr %p, i64 %v)
+  ret void
+}
+
+declare void @llvm.x86.directstore64(ptr, i64)
+
+define void @test_movdir64b(ptr %dst, ptr %src) {
+; CHECK-LABEL: test_movdir64b:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movdir64b (%rsi), %rdi # encoding: [0x62,0xf4,0x7d,0x08,0xf8,0x3e]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  call void @llvm.x86.movdir64b(ptr %dst, ptr %src)
+  ret void
+}
+
+declare void @llvm.x86.movdir64b(ptr, ptr)
diff --git a/llvm/test/CodeGen/X86/apx/sha.ll b/llvm/test/CodeGen/X86/apx/sha.ll
new file mode 100644
index 00000000000000..088ee61a97f4ea
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/sha.ll
@@ -0,0 +1,186 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mattr=+sha,+egpr -mtriple=x86_64-unknown-unknown --show-mc-encoding | FileCheck %s
+
+declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone
+
+define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1rnds4rr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha1rnds4 $3, %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0xc1,0x03]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3)
+  ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1rnds4rm:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = load <4 x i32>, ptr %b
+  %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
+  ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1nexterr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha1nexte %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0xc1]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha1nexterm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1nexterm:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha1nexte (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = load <4 x i32>, ptr %b
+  %1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0)
+  ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1msg1rr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha1msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0xc1]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1msg1rm:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha1msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = load <4 x i32>, ptr %b
+  %1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0)
+  ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1msg2rr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha1msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0xc1]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1msg2rm:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha1msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = load <4 x i32>, ptr %b
+  %1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0)
+  ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind uwtable {
+;
+; CHECK-LABEL: test_sha256rnds2rr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8]
+; CHECK-NEXT:    movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2]
+; CHECK-NEXT:    sha256rnds2 %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0xd9]
+; CHECK-NEXT:    movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+  ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, ptr %b, <4 x i32> %c) nounwind uwtable {
+;
+; CHECK-LABEL: test_sha256rnds2rm:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movaps %xmm0, %xmm2 # encoding: [0x0f,0x28,0xd0]
+; CHECK-NEXT:    movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
+; CHECK-NEXT:    sha256rnds2 %xmm0, (%rdi), %xmm2 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0x17]
+; CHECK-NEXT:    movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = load <4 x i32>, ptr %b
+  %1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c)
+  ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha256msg1rr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    sha256msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0xc1]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+ent...
[truncated]

@KanRobert KanRobert requested a review from topperc January 3, 2024 11:19
@llvm llvm deleted a comment from github-actions bot Jan 4, 2024
@XinWang10 XinWang10 changed the title [X86]Support lowering for APX Promoted SHA/MOVDIR/CRC32/INVPCID instructions [X86]Support lowering for APX Promoted SHA/MOVDIR/CRC32/INVPCID/CET instructions Jan 5, 2024
Copy link
Contributor

@KanRobert KanRobert left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@XinWang10 XinWang10 merged commit f5f66e2 into llvm:main Jan 5, 2024
4 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

4 participants