Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[X86][APX] Remove KEYLOCKER and SHA promotions from EVEX MAP4. #89173

Merged
merged 5 commits into from
Apr 19, 2024

Conversation

FreddyLeaf
Copy link
Contributor

@FreddyLeaf FreddyLeaf commented Apr 18, 2024

APX spec: https://cdrdv2.intel.com/v1/dl/getContent/784266
Change happended in version 4.0.
Removed instructions' Opcodes:
AESDEC128KL
AESDEC256KL
AESDECWIDE128KL
AESDECWIDE256KL
AESENC128KL
AESENC256KL
AESENCWIDE128KL
AESENCWIDE256KL
ENCODEKEY128
ENCODEKEY256
SHA1MSG1
SHA1MSG2
SHA1NEXTE
SHA1RNDS4
SHA256MSG1
SHA256MSG2
SHA256RNDS2

@llvmbot llvmbot added backend:X86 mc Machine (object) code labels Apr 18, 2024
@llvmbot
Copy link
Collaborator

llvmbot commented Apr 18, 2024

@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-mc

Author: Freddy Ye (FreddyLeaf)

Changes
  • [X86][APX] Remove KEYLOCKER promotions from EVEX MAP4.
  • Revert "[X86][MC] Support Enc/Dec for EGPR for promoted SHA instruction (#75582)"
  • cont. fix for SHA

Patch is 61.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/89173.diff

15 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelDAGToDAG.cpp (+7-12)
  • (modified) llvm/lib/Target/X86/X86InstrAsmAlias.td (-8)
  • (modified) llvm/lib/Target/X86/X86InstrKL.td (+2-16)
  • (modified) llvm/lib/Target/X86/X86InstrSSE.td (+29-65)
  • (modified) llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll (-146)
  • (modified) llvm/test/CodeGen/X86/sha.ll (-83)
  • (removed) llvm/test/MC/Disassembler/X86/apx/keylocker.txt (-102)
  • (removed) llvm/test/MC/Disassembler/X86/apx/sha.txt (-124)
  • (removed) llvm/test/MC/X86/apx/keylocker-att.s (-105)
  • (removed) llvm/test/MC/X86/apx/keylocker-intel.s (-101)
  • (removed) llvm/test/MC/X86/apx/sha-att.s (-127)
  • (removed) llvm/test/MC/X86/apx/sha-intel.s (-123)
  • (modified) llvm/test/MC/X86/x86_64-asm-match.s (+1-1)
  • (modified) llvm/test/TableGen/x86-fold-tables.inc (-7)
  • (modified) llvm/utils/TableGen/X86FoldTablesEmitter.cpp (+2-4)
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 4e4241efd63d6b..65e0c06a806e07 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5046,18 +5046,14 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
       unsigned Opcode;
       switch (IntNo) {
       default: llvm_unreachable("Impossible intrinsic");
-      case Intrinsic::x86_encodekey128:
-        Opcode = GET_EGPR_IF_ENABLED(X86::ENCODEKEY128);
-        break;
-      case Intrinsic::x86_encodekey256:
-        Opcode = GET_EGPR_IF_ENABLED(X86::ENCODEKEY256);
-        break;
+      case Intrinsic::x86_encodekey128: Opcode = X86::ENCODEKEY128; break;
+      case Intrinsic::x86_encodekey256: Opcode = X86::ENCODEKEY256; break;
       }
 
       SDValue Chain = Node->getOperand(0);
       Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(3),
                                    SDValue());
-      if (Opcode == X86::ENCODEKEY256 || Opcode == X86::ENCODEKEY256_EVEX)
+      if (Opcode == X86::ENCODEKEY256)
         Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(4),
                                      Chain.getValue(1));
 
@@ -6476,18 +6472,17 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     default:
       llvm_unreachable("Unexpected opcode!");
     case X86ISD::AESENCWIDE128KL:
-      Opcode = GET_EGPR_IF_ENABLED(X86::AESENCWIDE128KL);
+      Opcode = X86::AESENCWIDE128KL;
       break;
     case X86ISD::AESDECWIDE128KL:
-      Opcode = GET_EGPR_IF_ENABLED(X86::AESDECWIDE128KL);
+      Opcode = X86::AESDECWIDE128KL;
       break;
     case X86ISD::AESENCWIDE256KL:
-      Opcode = GET_EGPR_IF_ENABLED(X86::AESENCWIDE256KL);
+      Opcode = X86::AESENCWIDE256KL;
       break;
     case X86ISD::AESDECWIDE256KL:
-      Opcode = GET_EGPR_IF_ENABLED(X86::AESDECWIDE256KL);
+      Opcode = X86::AESDECWIDE256KL;
       break;
-#undef GET_EGPR_IF_ENABLED
     }
 
     SDValue Chain = Node->getOperand(0);
diff --git a/llvm/lib/Target/X86/X86InstrAsmAlias.td b/llvm/lib/Target/X86/X86InstrAsmAlias.td
index d06a0c79b46bbb..25b8297502f60d 100644
--- a/llvm/lib/Target/X86/X86InstrAsmAlias.td
+++ b/llvm/lib/Target/X86/X86InstrAsmAlias.td
@@ -894,11 +894,3 @@ def : InstAlias<"vmsave\t{%rax|rax}", (VMSAVE64), 0>, Requires<[In64BitMode]>;
 def : InstAlias<"invlpga\t{%eax, %ecx|eax, ecx}", (INVLPGA32), 0>, Requires<[Not64BitMode]>;
 def : InstAlias<"invlpga\t{%rax, %ecx|rax, ecx}", (INVLPGA64), 0>, Requires<[In64BitMode]>;
 
-// Aliases with explicit %xmm0
-def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
-                (SHA256RNDS2rr VR128:$dst, VR128:$src2), 0>;
-def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
-                (SHA256RNDS2rm VR128:$dst, i128mem:$src2), 0>;
-
-def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
-                (SHA256RNDS2rm_EVEX VR128:$dst, i128mem:$src2), 0>;
diff --git a/llvm/lib/Target/X86/X86InstrKL.td b/llvm/lib/Target/X86/X86InstrKL.td
index 32423e08ed4c53..d5a4be45cc9b10 100644
--- a/llvm/lib/Target/X86/X86InstrKL.td
+++ b/llvm/lib/Target/X86/X86InstrKL.td
@@ -48,7 +48,7 @@ let SchedRW = [WriteSystem] in {
                       [(int_x86_loadiwkey XMM0, VR128:$src1, VR128:$src2, EAX)]>, T8, XS;
   }
 
-  let Predicates = [HasKL, NoEGPR] in {
+  let Predicates = [HasKL] in {
     let Uses = [XMM0], Defs = [XMM0, XMM1, XMM2, XMM4, XMM5, XMM6, EFLAGS] in
       def ENCODEKEY128 : Encodekey<0xFA, "encodekey128">, T8;
 
@@ -58,17 +58,6 @@ let SchedRW = [WriteSystem] in {
     let Constraints = "$src1 = $dst", Defs = [EFLAGS] in
       defm "" : Aesencdec<"">, T8;
   }
-
-  let Predicates = [HasKL, HasEGPR, In64BitMode] in {
-    let Uses = [XMM0], Defs = [XMM0, XMM1, XMM2, XMM4, XMM5, XMM6, EFLAGS] in
-      def ENCODEKEY128_EVEX : Encodekey<0xDA, "encodekey128">, EVEX, T_MAP4;
-
-    let Uses = [XMM0, XMM1], Defs = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, EFLAGS] in
-      def ENCODEKEY256_EVEX : Encodekey<0xDB, "encodekey256">, EVEX, T_MAP4;
-
-    let Constraints = "$src1 = $dst", Defs = [EFLAGS] in
-      defm "" : Aesencdec<"_EVEX">, EVEX, T_MAP4;
-  }
 } // SchedRW
 
 multiclass Aesencdecwide<string suffix> {
@@ -80,9 +69,6 @@ multiclass Aesencdecwide<string suffix> {
 
 let SchedRW = [WriteSystem], Uses = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7],
     Defs = [EFLAGS, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7], mayLoad = 1 in {
-  let Predicates = [HasWIDEKL, NoEGPR] in
+  let Predicates = [HasWIDEKL] in
     defm "" : Aesencdecwide<"">, T8;
-
-  let Predicates = [HasWIDEKL, HasEGPR, In64BitMode] in
-    defm "" : Aesencdecwide<"_EVEX">, EVEX, T_MAP4;
 } // SchedRW
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 2b391b60f2c9bb..8542e5ce233364 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -6689,31 +6689,31 @@ let Predicates = [HasCRC32, HasEGPR, In64BitMode], OpMap = T_MAP4, OpEnc = EncEV
 
 // FIXME: Is there a better scheduler class for SHA than WriteVecIMul?
 multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
-                      X86FoldableSchedWrite sched, string Suffix = "", bit UsesXMM0 = 0> {
-  def rr#Suffix : I<Opc, MRMSrcReg, (outs VR128:$dst),
-                    (ins VR128:$src1, VR128:$src2),
-                    !if(UsesXMM0,
-                        !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
-                        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
-                    [!if(UsesXMM0,
-                         (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)),
-                         (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>,
-                    T8, Sched<[sched]>;
-
-  def rm#Suffix : I<Opc, MRMSrcMem, (outs VR128:$dst),
-                    (ins VR128:$src1, i128mem:$src2),
-                    !if(UsesXMM0,
-                        !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
-                        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
-                    [!if(UsesXMM0,
-                         (set VR128:$dst, (IntId VR128:$src1,
-                           (memop addr:$src2), XMM0)),
-                         (set VR128:$dst, (IntId VR128:$src1,
-                           (memop addr:$src2))))]>, T8,
-                    Sched<[sched.Folded, sched.ReadAfterFold]>;
+                      X86FoldableSchedWrite sched, bit UsesXMM0 = 0> {
+  def rr : I<Opc, MRMSrcReg, (outs VR128:$dst),
+             (ins VR128:$src1, VR128:$src2),
+             !if(UsesXMM0,
+                 !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
+                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
+             [!if(UsesXMM0,
+                  (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)),
+                  (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>,
+             T8, Sched<[sched]>;
+
+  def rm : I<Opc, MRMSrcMem, (outs VR128:$dst),
+             (ins VR128:$src1, i128mem:$src2),
+             !if(UsesXMM0,
+                 !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
+                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
+             [!if(UsesXMM0,
+                  (set VR128:$dst, (IntId VR128:$src1,
+                    (memop addr:$src2), XMM0)),
+                  (set VR128:$dst, (IntId VR128:$src1,
+                    (memop addr:$src2))))]>, T8,
+             Sched<[sched.Folded, sched.ReadAfterFold]>;
 }
 
-let Constraints = "$src1 = $dst", Predicates = [HasSHA, NoEGPR] in {
+let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
   def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst),
                          (ins VR128:$src1, VR128:$src2, u8imm:$src3),
                          "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
@@ -6740,7 +6740,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA, NoEGPR] in {
 
   let Uses=[XMM0] in
   defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2,
-                                SchedWriteVecIMul.XMM, "", 1>;
+                                SchedWriteVecIMul.XMM, 1>;
 
   defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1,
                                SchedWriteVecIMul.XMM>;
@@ -6748,47 +6748,11 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA, NoEGPR] in {
                                SchedWriteVecIMul.XMM>;
 }
 
-let Constraints = "$src1 = $dst", Predicates = [HasSHA, HasEGPR, In64BitMode] in {
-  def SHA1RNDS4rri_EVEX: Ii8<0xD4, MRMSrcReg, (outs VR128:$dst),
-                             (ins VR128:$src1, VR128:$src2, u8imm:$src3),
-                             "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                             [(set VR128:$dst,
-                               (int_x86_sha1rnds4 VR128:$src1, VR128:$src2,
-                                (i8 timm:$src3)))]>,
-                         EVEX, NoCD8, T_MAP4, Sched<[SchedWriteVecIMul.XMM]>;
-  def SHA1RNDS4rmi_EVEX: Ii8<0xD4, MRMSrcMem, (outs VR128:$dst),
-                             (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
-                             "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                             [(set VR128:$dst,
-                               (int_x86_sha1rnds4 VR128:$src1,
-                                (memop addr:$src2),
-                                (i8 timm:$src3)))]>,
-                         EVEX, NoCD8, T_MAP4,
-                         Sched<[SchedWriteVecIMul.XMM.Folded,
-                                SchedWriteVecIMul.XMM.ReadAfterFold]>;
-
-  defm SHA1NEXTE : SHAI_binop<0xD8, "sha1nexte", int_x86_sha1nexte,
-                                   SchedWriteVecIMul.XMM, "_EVEX">,
-                        EVEX, NoCD8, T_MAP4;
-  defm SHA1MSG1  : SHAI_binop<0xD9, "sha1msg1", int_x86_sha1msg1,
-                              SchedWriteVecIMul.XMM, "_EVEX">,
-                   EVEX, NoCD8, T_MAP4;
-  defm SHA1MSG2  : SHAI_binop<0xDA, "sha1msg2", int_x86_sha1msg2,
-                              SchedWriteVecIMul.XMM, "_EVEX">,
-                   EVEX, NoCD8, T_MAP4;
-
-  let Uses=[XMM0] in
-  defm SHA256RNDS2 : SHAI_binop<0xDB, "sha256rnds2", int_x86_sha256rnds2,
-                                SchedWriteVecIMul.XMM, "_EVEX", 1>,
-                     EVEX, NoCD8, T_MAP4;
-
-  defm SHA256MSG1 : SHAI_binop<0xDC, "sha256msg1", int_x86_sha256msg1,
-                               SchedWriteVecIMul.XMM, "_EVEX">,
-                    EVEX, NoCD8, T_MAP4;
-  defm SHA256MSG2 : SHAI_binop<0xDD, "sha256msg2", int_x86_sha256msg2,
-                               SchedWriteVecIMul.XMM, "_EVEX">,
-                    EVEX, NoCD8, T_MAP4;
-}
+// Aliases with explicit %xmm0
+def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
+                (SHA256RNDS2rr VR128:$dst, VR128:$src2), 0>;
+def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
+                (SHA256RNDS2rm VR128:$dst, i128mem:$src2), 0>;
 
 //===----------------------------------------------------------------------===//
 // AES-NI Instructions
diff --git a/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll
index f91c4ad0e8e343..5ae1b06b072ff3 100644
--- a/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/keylocker-intrinsics-fast-isel.ll
@@ -1,6 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+kl,+widekl | FileCheck %s
-; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+kl,+widekl,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
 
 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/X86/keylocker-builtins.c
 
@@ -10,12 +9,6 @@ define void @test_loadiwkey(i32 %ctl, <2 x i64> %intkey, <2 x i64> %enkey_lo, <2
 ; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    loadiwkey %xmm2, %xmm1
 ; CHECK-NEXT:    retq
-;
-; EGPR-LABEL: test_loadiwkey:
-; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    movl %edi, %eax # encoding: [0x89,0xf8]
-; EGPR-NEXT:    loadiwkey %xmm2, %xmm1 # encoding: [0xf3,0x0f,0x38,0xdc,0xca]
-; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   tail call void @llvm.x86.loadiwkey(<2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi, i32 %ctl)
   ret void
@@ -32,17 +25,6 @@ define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, ptr nocapture %h)
 ; CHECK-NEXT:    movups %xmm5, 64(%rsi)
 ; CHECK-NEXT:    movups %xmm6, 80(%rsi)
 ; CHECK-NEXT:    retq
-;
-; EGPR-LABEL: test_encodekey128_u32:
-; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    encodekey128 %edi, %eax # EVEX TO LEGACY Compression encoding: [0xf3,0x0f,0x38,0xfa,0xc7]
-; EGPR-NEXT:    movups %xmm0, (%rsi) # encoding: [0x0f,0x11,0x06]
-; EGPR-NEXT:    movups %xmm1, 16(%rsi) # encoding: [0x0f,0x11,0x4e,0x10]
-; EGPR-NEXT:    movups %xmm2, 32(%rsi) # encoding: [0x0f,0x11,0x56,0x20]
-; EGPR-NEXT:    movups %xmm4, 48(%rsi) # encoding: [0x0f,0x11,0x66,0x30]
-; EGPR-NEXT:    movups %xmm5, 64(%rsi) # encoding: [0x0f,0x11,0x6e,0x40]
-; EGPR-NEXT:    movups %xmm6, 80(%rsi) # encoding: [0x0f,0x11,0x76,0x50]
-; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %htype, <2 x i64> %key)
   %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
@@ -78,18 +60,6 @@ define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_
 ; CHECK-NEXT:    movups %xmm5, 80(%rsi)
 ; CHECK-NEXT:    movups %xmm6, 96(%rsi)
 ; CHECK-NEXT:    retq
-;
-; EGPR-LABEL: test_encodekey256_u32:
-; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    encodekey256 %edi, %eax # EVEX TO LEGACY Compression encoding: [0xf3,0x0f,0x38,0xfb,0xc7]
-; EGPR-NEXT:    movups %xmm0, (%rsi) # encoding: [0x0f,0x11,0x06]
-; EGPR-NEXT:    movups %xmm1, 16(%rsi) # encoding: [0x0f,0x11,0x4e,0x10]
-; EGPR-NEXT:    movups %xmm2, 32(%rsi) # encoding: [0x0f,0x11,0x56,0x20]
-; EGPR-NEXT:    movups %xmm3, 48(%rsi) # encoding: [0x0f,0x11,0x5e,0x30]
-; EGPR-NEXT:    movups %xmm4, 64(%rsi) # encoding: [0x0f,0x11,0x66,0x40]
-; EGPR-NEXT:    movups %xmm5, 80(%rsi) # encoding: [0x0f,0x11,0x6e,0x50]
-; EGPR-NEXT:    movups %xmm6, 96(%rsi) # encoding: [0x0f,0x11,0x76,0x60]
-; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi)
   %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
@@ -123,13 +93,6 @@ define zeroext i8 @test_mm_aesenc256kl_u8(ptr %odata, <2 x i64> %idata, ptr %h)
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    movaps %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; EGPR-LABEL: test_mm_aesenc256kl_u8:
-; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    aesenc256kl (%rsi), %xmm0 # EVEX TO LEGACY Compression encoding: [0xf3,0x0f,0x38,0xde,0x06]
-; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
-; EGPR-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
-; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %idata, ptr %h) #1
   %1 = extractvalue { i8, <2 x i64> } %0, 1
@@ -145,13 +108,6 @@ define zeroext i8 @test_mm_aesdec256kl_u8(ptr %odata, <2 x i64> %idata, ptr %h)
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    movaps %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; EGPR-LABEL: test_mm_aesdec256kl_u8:
-; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    aesdec256kl (%rsi), %xmm0 # EVEX TO LEGACY Compression encoding: [0xf3,0x0f,0x38,0xdf,0x06]
-; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
-; EGPR-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
-; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %idata, ptr %h) #1
   %1 = extractvalue { i8, <2 x i64> } %0, 1
@@ -167,13 +123,6 @@ define zeroext i8 @test_mm_aesenc128kl_u8(ptr %odata, <2 x i64> %idata, ptr %h)
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    movaps %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; EGPR-LABEL: test_mm_aesenc128kl_u8:
-; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    aesenc128kl (%rsi), %xmm0 # EVEX TO LEGACY Compression encoding: [0xf3,0x0f,0x38,0xdc,0x06]
-; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
-; EGPR-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
-; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %idata, ptr %h) #1
   %1 = extractvalue { i8, <2 x i64> } %0, 1
@@ -189,13 +138,6 @@ define zeroext i8 @test_mm_aesdec128kl_u8(ptr %odata, <2 x i64> %idata, ptr %h)
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    movaps %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; EGPR-LABEL: test_mm_aesdec128kl_u8:
-; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    aesdec128kl (%rsi), %xmm0 # EVEX TO LEGACY Compression encoding: [0xf3,0x0f,0x38,0xdd,0x06]
-; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
-; EGPR-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
-; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %idata, ptr %h) #1
   %1 = extractvalue { i8, <2 x i64> } %0, 1
@@ -226,28 +168,6 @@ define zeroext i8 @test__mm_aesencwide128kl_u8(ptr %odata, ptr %idata, ptr %h) {
 ; CHECK-NEXT:    movaps %xmm6, 96(%rdi)
 ; CHECK-NEXT:    movaps %xmm7, 112(%rdi)
 ; CHECK-NEXT:    retq
-;
-; EGPR-LABEL: test__mm_aesencwide128kl_u8:
-; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    movaps (%rsi), %xmm0 # encoding: [0x0f,0x28,0x06]
-; EGPR-NEXT:    movaps 16(%rsi), %xmm1 # encoding: [0x0f,0x28,0x4e,0x10]
-; EGPR-NEXT:    movaps 32(%rsi), %xmm2 # encoding: [0x0f,0x28,0x56,0x20]
-; EGPR-NEXT:    movaps 48(%rsi), %xmm3 # encoding: [0x0f,0x28,0x5e,0x30]
-; EGPR-NEXT:    movaps 64(%rsi), %xmm4 # encoding: [0x0f,0x28,0x66,0x40]
-; EGPR-NEXT:    movaps 80(%rsi), %xmm5 # encoding: [0x0f,0x28,0x6e,0x50]
-; EGPR-NEXT:    movaps 96(%rsi), %xmm6 # encoding: [0x0f,0x28,0x76,0x60]
-; EGPR-NEXT:    movaps 112(%rsi), %xmm7 # encoding: [0x0f,0x28,0x7e,0x70]
-; EGPR-NEXT:    aesencwide128kl (%rdx) # EVEX TO LEGACY Compression encoding: [0xf3,0x0f,0x38,0xd8,0x02]
-; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
-; EGPR-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07]
-; EGPR-NEXT:    movaps %xmm1, 16(%rdi) # encoding: [0x0f,0x29,0x4f,0x10]
-; EGPR-NEXT:    movaps %xmm2, 32(%rdi) # encoding: [0x0f,0x29,0x57,0x20]
-; EGPR-NEXT:    movaps %xmm3, 48(%rdi) # encoding: [0x0f,0x29,0x5f,0x30]
-; EGPR-NEXT:    movaps %xmm4, 64(%rdi) # encoding: [0x0f,0x29,0x67,0x40]
-; EGPR-NEXT:    movaps %xmm5, 80(%rdi) # encoding: [0x0f,0x29,0x6f,0x50]
-; EGPR-NEXT:    movaps %xmm6, 96(%rdi) # encoding: [0x0f,0x29,0x77,0x60]
-; EGPR-NEXT:    movaps %xmm7, 112(%rdi) # encoding: [0x0f,0x29,0x7f,0x70]
-; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = load <2 x i64>, ptr %idata, align 16
   %1 = getelementptr <2 x i64>, ptr %idata, i64 1
@@ -314,28 +234,6 @@ define zeroext i8 @test__mm_aesdecwide128kl_u8(ptr %odata, ptr %idata, ptr %h) {
 ; CHECK-NEXT:    movaps %xmm6, 96(%rdi)
 ; CHECK-NEXT:    movaps %xmm7, 112(%rdi)
 ; CHECK-NEXT:    retq
-;
-; EGPR-LABEL: test__mm_aesdecwide128kl_u8:
-; EGPR:       # %bb.0: # %entry
-; EGPR-NEXT:    movaps (%rsi), %xmm0 # encoding: [0x0f,0x28,0x06]
-; EGPR-NEXT:    movaps 16(%rsi), %xmm1 # encoding: [0x0f,0x28,0x4e,0x10]
-; EGPR-NEXT:    movaps 32(%rsi), %xmm2 # encoding: [0x0f,0x28,0x56,0x20]
-; EGPR-NEXT:    movaps 48(%rsi), %xmm3 # encoding: [0x0f,0x28,0x5e,0x30]
-; EGPR-NEXT:    movaps 64(%rsi), %xmm4 # encoding: [0x0f,0x28,0x66,0x40]
-; EGPR-NEXT:    movaps 80(%rsi), %xmm5 # encoding: [0x0f,0x28,0x6e,0x50]
-; EGPR-NEXT:    movaps 96(%rsi), %xmm6 # encoding: [0x0f,0x28,0x76,0x60]
-; EGPR-NEXT:    movaps 112(%rsi), %xmm7 # encoding: [0x0f,0x28,0x7e,0x70]
-; EGPR-NEXT:    aesdecwide128kl (%rdx) # EVEX TO LEGACY Compression encoding: [0xf3,0x0f,0x38,0xd8,0x0a]
-; EGPR-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
-; EGPR-NEXT:    movaps %xmm0, (%rdi) # encoding: [0x0f,0x...
[truncated]

Copy link

github-actions bot commented Apr 18, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

@KanRobert
Copy link
Contributor

Could you list the opcodes for your change in the description? I noticed AESENCWIDE256KL is removed in your patch, is it expected?

@FreddyLeaf
Copy link
Contributor Author

Could you list the opcodes for your change in the description? I noticed AESENCWIDE256KL is removed in your patch, is it expected?

Done. it is expected. See version 4.0

Copy link
Contributor

@KanRobert KanRobert left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@FreddyLeaf FreddyLeaf merged commit de3e4a9 into llvm:main Apr 19, 2024
3 of 4 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants