Skip to content

Commit

Permalink
[X86][AVX512] Remove constant pool shuffle decoding from SelectionDAG
Browse files Browse the repository at this point in the history
This patch removes the last use of the constant pool shuffle decode helper and consistently uses the 'getTargetShuffleMaskIndices' versions instead. The constant pool versions are now purely used for assembly comments.

The avx512vbmi intrinsic upgrades had to be altered as they were being decoded as broadcasts, similar to what I fixed in rL346032. I don't think the change is critical - although its annoying that we lose the {k}{z} instruction test coverage as they are tricky to generate....

Differential Revision: https://reviews.llvm.org/D54083

llvm-svn: 346850
  • Loading branch information
RKSimon committed Nov 14, 2018
1 parent 18e9864 commit 7501780
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 18 deletions.
6 changes: 3 additions & 3 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include "X86InstrBuilder.h"
#include "X86IntrinsicsInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86ShuffleDecodeConstantPool.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
#include "llvm/ADT/SmallBitVector.h"
Expand Down Expand Up @@ -6147,8 +6146,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
Ops.push_back(N->getOperand(0));
Ops.push_back(N->getOperand(2));
SDValue MaskNode = N->getOperand(1);
if (auto *C = getTargetConstantFromNode(MaskNode)) {
DecodeVPERMV3Mask(C, MaskEltSize, VT.getSizeInBits(), Mask);
if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask,
RawUndefs)) {
DecodeVPERMV3Mask(RawMask, RawUndefs, Mask);
break;
}
return false;
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/X86/avx512vbmi-intrinsics-upgrade.ll
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ define <64 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_512(<64 x i8> %x0, <64 x
; X86-NEXT: vpermt2b %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0x75,0x48,0x7d,0xda]
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT: vpermi2b %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x75,0xca]
; X86-NEXT: vpxor %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0xef,0xe4]
; X86-NEXT: vpermi2b %zmm2, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x75,0xe2]
; X86-NEXT: vmovdqa64 %zmm1, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe1]
; X86-NEXT: vpermi2b %zmm2, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x75,0xe2]
; X86-NEXT: vpaddb %zmm3, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfc,0xc3]
; X86-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
Expand All @@ -53,13 +53,13 @@ define <64 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_512(<64 x i8> %x0, <64 x
; X64-NEXT: vpermt2b %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0x75,0x48,0x7d,0xda]
; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
; X64-NEXT: vpermi2b %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x75,0xca]
; X64-NEXT: vpxor %xmm4, %xmm4, %xmm4 # encoding: [0xc5,0xd9,0xef,0xe4]
; X64-NEXT: vpermi2b %zmm2, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x75,0xe2]
; X64-NEXT: vmovdqa64 %zmm1, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe1]
; X64-NEXT: vpermi2b %zmm2, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x75,0xe2]
; X64-NEXT: vpaddb %zmm3, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfc,0xc3]
; X64-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> zeroinitializer, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %res, <64 x i8> %x2, i64 %x3)
%res2 = call <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
%res3 = add <64 x i8> %res, %res1
%res4 = add <64 x i8> %res3, %res2
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/X86/avx512vbmivl-intrinsics-upgrade.ll
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ define <16 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_128(<16 x i8> %x0, <16 x
; X86-NEXT: vpermt2b %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7d,0xda]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
; X86-NEXT: vpermi2b %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x75,0xca]
; X86-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
; X86-NEXT: vpermi2b %xmm2, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x75,0xe2]
; X86-NEXT: vmovdqa %xmm1, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe1]
; X86-NEXT: vpermi2b %xmm2, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x75,0xe2]
; X86-NEXT: vpaddb %xmm3, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3]
; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
Expand All @@ -83,13 +83,13 @@ define <16 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_128(<16 x i8> %x0, <16 x
; X64-NEXT: vpermt2b %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7d,0xda]
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT: vpermi2b %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x75,0xca]
; X64-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
; X64-NEXT: vpermi2b %xmm2, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x75,0xe2]
; X64-NEXT: vmovdqa %xmm1, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe1]
; X64-NEXT: vpermi2b %xmm2, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x75,0xe2]
; X64-NEXT: vpaddb %xmm3, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3]
; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
%res1 = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> zeroinitializer, <16 x i8> %x2, i16 %x3)
%res1 = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %res, <16 x i8> %x2, i16 %x3)
%res2 = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
%res3 = add <16 x i8> %res, %res1
%res4 = add <16 x i8> %res3, %res2
Expand All @@ -105,8 +105,8 @@ define <32 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_256(<32 x i8> %x0, <32 x
; X86-NEXT: vpermt2b %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7d,0xda]
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
; X86-NEXT: vpermi2b %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x75,0xca]
; X86-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
; X86-NEXT: vpermi2b %ymm2, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x75,0xe2]
; X86-NEXT: vmovdqa %ymm1, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe1]
; X86-NEXT: vpermi2b %ymm2, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x75,0xe2]
; X86-NEXT: vpaddb %ymm3, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3]
; X86-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
; X86-NEXT: retl # encoding: [0xc3]
Expand All @@ -117,13 +117,13 @@ define <32 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_256(<32 x i8> %x0, <32 x
; X64-NEXT: vpermt2b %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7d,0xda]
; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
; X64-NEXT: vpermi2b %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x75,0xca]
; X64-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
; X64-NEXT: vpermi2b %ymm2, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x75,0xe2]
; X64-NEXT: vmovdqa %ymm1, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe1]
; X64-NEXT: vpermi2b %ymm2, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x75,0xe2]
; X64-NEXT: vpaddb %ymm3, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3]
; X64-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
; X64-NEXT: retq # encoding: [0xc3]
%res = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
%res1 = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> zeroinitializer, <32 x i8> %x2, i32 %x3)
%res1 = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %res, <32 x i8> %x2, i32 %x3)
%res2 = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
%res3 = add <32 x i8> %res, %res1
%res4 = add <32 x i8> %res3, %res2
Expand Down

0 comments on commit 7501780

Please sign in to comment.