diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 07a858fd682fc..89e01885b62d6 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -1177,6 +1177,13 @@ def or_disjoint : PatFrag<(ops node:$lhs, node:$rhs), }]; } +def addlike : PatFrags<(ops node:$lhs, node:$rhs), + [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{ + if (Op.getOpcode() == ISD::ADD) + return true; + return CurDAG->isADDLike(Op); +}]>; + def xor_like : PatFrags<(ops node:$lhs, node:$rhs), [(xor node:$lhs, node:$rhs), (or_disjoint node:$lhs, node:$rhs)]>; diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 1b748b7355716..34f6665ab44a9 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4685,7 +4685,7 @@ let Predicates = [HasVLX], AddedComplexity = 400 in { //===----------------------------------------------------------------------===// // AVX-512 - Integer arithmetic // -multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_binop_rm opc, string OpcodeStr, SDPatternOperator OpNode, X86VectorVTInfo _, X86FoldableSchedWrite sched, bit IsCommutable = 0> { defm rr : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, Sched<[sched.Folded, sched.ReadAfterFold]>; } -multiclass avx512_binop_rmb opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_binop_rmb opc, string OpcodeStr, SDPatternOperator OpNode, X86VectorVTInfo _, X86FoldableSchedWrite sched, bit IsCommutable = 0> : avx512_binop_rm { @@ -4719,7 +4719,7 @@ multiclass avx512_binop_rmb opc, string OpcodeStr, SDNode OpNode, Sched<[sched.Folded, sched.ReadAfterFold]>; } -multiclass avx512_binop_rm_vl opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_binop_rm_vl opc, string OpcodeStr, SDPatternOperator OpNode, AVX512VLVectorVTInfo VTInfo, X86SchedWriteWidths sched, Predicate prd, bit IsCommutable = 0> { @@ -4735,7 +4735,7 @@ multiclass avx512_binop_rm_vl opc, string OpcodeStr, SDNode OpNode, } } -multiclass avx512_binop_rmb_vl opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_binop_rmb_vl opc, string OpcodeStr, SDPatternOperator OpNode, AVX512VLVectorVTInfo VTInfo, X86SchedWriteWidths sched, Predicate prd, bit IsCommutable = 0> { @@ -4751,7 +4751,7 @@ multiclass avx512_binop_rmb_vl opc, string OpcodeStr, SDNode OpNode, } } -multiclass avx512_binop_rm_vl_q opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_binop_rm_vl_q opc, string OpcodeStr, SDPatternOperator OpNode, X86SchedWriteWidths sched, Predicate prd, bit IsCommutable = 0> { defm NAME : avx512_binop_rmb_vl opc, string OpcodeStr, SDNode OpNode, REX_W, EVEX_CD8<64, CD8VF>; } -multiclass avx512_binop_rm_vl_d opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_binop_rm_vl_d opc, string OpcodeStr, SDPatternOperator OpNode, X86SchedWriteWidths sched, Predicate prd, bit IsCommutable = 0> { defm NAME : avx512_binop_rmb_vl, EVEX_CD8<32, CD8VF>; } -multiclass avx512_binop_rm_vl_w opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_binop_rm_vl_w opc, string OpcodeStr, SDPatternOperator OpNode, X86SchedWriteWidths sched, Predicate prd, bit IsCommutable = 0> { defm NAME : avx512_binop_rm_vl opc, string OpcodeStr, SDNode OpNode, WIG; } -multiclass avx512_binop_rm_vl_b opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_binop_rm_vl_b opc, string OpcodeStr, SDPatternOperator OpNode, X86SchedWriteWidths sched, Predicate prd, bit IsCommutable = 0> { defm NAME : avx512_binop_rm_vl opc, string OpcodeStr, SDNode OpNode, } multiclass avx512_binop_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, - SDNode OpNode, X86SchedWriteWidths sched, + SDPatternOperator OpNode, X86SchedWriteWidths sched, Predicate prd, bit IsCommutable = 0> { defm Q : avx512_binop_rm_vl_q; @@ -4793,7 +4793,7 @@ multiclass avx512_binop_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, } multiclass avx512_binop_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr, - SDNode OpNode, X86SchedWriteWidths sched, + SDPatternOperator OpNode, X86SchedWriteWidths sched, Predicate prd, bit IsCommutable = 0> { defm W : avx512_binop_rm_vl_w; @@ -4804,7 +4804,7 @@ multiclass avx512_binop_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr, multiclass avx512_binop_rm_vl_all opc_b, bits<8> opc_w, bits<8> opc_d, bits<8> opc_q, - string OpcodeStr, SDNode OpNode, + string OpcodeStr, SDPatternOperator OpNode, X86SchedWriteWidths sched, bit IsCommutable = 0> { defm NAME : avx512_binop_rm_vl_dq opc, string OpcodeStr, } } -defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, +defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", addlike, SchedWriteVecALU, 1>; defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, SchedWriteVecALU, 0>; diff --git a/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll b/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll index 885bc805d6552..65fab8202787c 100644 --- a/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll +++ b/llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll @@ -100,55 +100,48 @@ define dso_local <16 x i32> @test_api(i16 signext %0, i16 signext %1) nounwind { ; O0-NEXT: movb $1, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %si, %cx ; O0-NEXT: movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; O0-NEXT: movw %di, %ax -; O0-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill +; O0-NEXT: movw %di, %dx +; O0-NEXT: movw %dx, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; O0-NEXT: movl $buf, %esi ; O0-NEXT: movl $32, %edi -; O0-NEXT: movw $8, %dx -; O0-NEXT: # implicit-def: $al -; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) -; O0-NEXT: movw %dx, {{[0-9]+}}(%rsp) -; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; O0-NEXT: tileloadd (%rsi,%rdi), %tmm0 -; O0-NEXT: movl $64, %edi -; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; O0-NEXT: movw $8, %dx -; O0-NEXT: tilestored %tmm0, (%rsi,%rdi) -; O0-NEXT: movl $32, %esi -; O0-NEXT: movl $buf+1024, %edx ; O0-NEXT: movw $8, %ax +; O0-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; O0-NEXT: # implicit-def: $al ; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; O0-NEXT: # implicit-def: $dl +; O0-NEXT: movb %dl, {{[0-9]+}}(%rsp) +; O0-NEXT: movw %ax, {{[0-9]+}}(%rsp) ; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; O0-NEXT: tileloadd (%rdx,%rsi), %tmm0 +; O0-NEXT: tileloadd (%rsi,%rdi), %tmm0 ; O0-NEXT: movl $64, %esi +; O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; O0-NEXT: leaq {{[0-9]+}}(%rsp), %r8 +; O0-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; O0-NEXT: tilestored %tmm0, (%r8,%rsi) +; O0-NEXT: movl $buf+1024, %edx +; O0-NEXT: tileloadd (%rdx,%rdi), %tmm0 ; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx -; O0-NEXT: movw $8, %ax +; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; O0-NEXT: tilestored %tmm0, (%rdx,%rsi) ; O0-NEXT: vzeroupper ; O0-NEXT: callq foo -; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %dx # 2-byte Reload +; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %di # 2-byte Reload +; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx # 2-byte Reload ; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload -; O0-NEXT: movl $64, %edi -; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; O0-NEXT: movw $8, %cx -; O0-NEXT: # implicit-def: $cl -; O0-NEXT: movb %cl, {{[0-9]+}}(%rsp) -; O0-NEXT: movw %dx, {{[0-9]+}}(%rsp) -; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) -; O0-NEXT: tileloadd (%rsi,%rdi), %tmm0 -; O0-NEXT: movw $8, %cx -; O0-NEXT: tilemovrow $2, %tmm0, %zmm0 -; O0-NEXT: movl $64, %esi -; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx -; O0-NEXT: movw $8, %cx ; O0-NEXT: # implicit-def: $al ; O0-NEXT: movb %al, {{[0-9]+}}(%rsp) ; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp) +; O0-NEXT: # implicit-def: $cl +; O0-NEXT: movb %cl, {{[0-9]+}}(%rsp) +; O0-NEXT: movw %di, {{[0-9]+}}(%rsp) ; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp) +; O0-NEXT: tileloadd (%r8,%rsi), %tmm0 +; O0-NEXT: tilemovrow $2, %tmm0, %zmm0 ; O0-NEXT: tileloadd (%rdx,%rsi), %tmm0 -; O0-NEXT: movw $8, %cx ; O0-NEXT: tilemovrow $2, %tmm0, %zmm1 ; O0-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; O0-NEXT: movq %rbp, %rsp diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll index 76c87900b04d2..2d105c4a350bb 100644 --- a/llvm/test/CodeGen/X86/avx512-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt.ll @@ -353,7 +353,7 @@ define <8 x double> @ulto8f64(<8 x i64> %a) { ; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200] ; NODQ-NEXT: vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & m64bcst) ; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0 -; NODQ-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 +; NODQ-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 ; NODQ-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 ; NODQ-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ; NODQ-NEXT: retq @@ -380,13 +380,13 @@ define <16 x double> @ulto16f64(<16 x i64> %a) { ; NODQ-NEXT: vpternlogq {{.*#+}} zmm4 = zmm4 | (zmm0 & zmm2) ; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0 ; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm5 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072] -; NODQ-NEXT: vporq %zmm5, %zmm0, %zmm0 +; NODQ-NEXT: vpaddq %zmm5, %zmm0, %zmm0 ; NODQ-NEXT: vbroadcastsd {{.*#+}} zmm6 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25] ; NODQ-NEXT: vsubpd %zmm6, %zmm0, %zmm0 ; NODQ-NEXT: vaddpd %zmm0, %zmm4, %zmm0 ; NODQ-NEXT: vpternlogq {{.*#+}} zmm3 = zmm3 | (zmm1 & zmm2) ; NODQ-NEXT: vpsrlq $32, %zmm1, %zmm1 -; NODQ-NEXT: vporq %zmm5, %zmm1, %zmm1 +; NODQ-NEXT: vpaddq %zmm5, %zmm1, %zmm1 ; NODQ-NEXT: vsubpd %zmm6, %zmm1, %zmm1 ; NODQ-NEXT: vaddpd %zmm1, %zmm3, %zmm1 ; NODQ-NEXT: retq diff --git a/llvm/test/CodeGen/X86/combine-shl.ll b/llvm/test/CodeGen/X86/combine-shl.ll index 9548967fd0592..b61ad05a10b16 100644 --- a/llvm/test/CodeGen/X86/combine-shl.ll +++ b/llvm/test/CodeGen/X86/combine-shl.ll @@ -837,7 +837,7 @@ define <4 x i32> @combine_vec_add_shl_nonsplat(<4 x i32> %a0) { ; AVX512-LABEL: combine_vec_add_shl_nonsplat: ; AVX512: # %bb.0: ; AVX512-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = shl <4 x i32> %a0, %2 = add <4 x i32> %1, @@ -881,7 +881,7 @@ define <4 x i32> @combine_vec_add_shl_and_nonsplat(<4 x i32> %a0) { ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7] ; AVX512-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = and <4 x i32> %a0, %2 = shl <4 x i32> %1, @@ -922,7 +922,7 @@ define <4 x i32> @combine_vec_add_shuffle_shl(<4 x i32> %a0) { ; AVX512: # %bb.0: ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0] ; AVX512-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = shl <4 x i32> %a0, %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> diff --git a/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll b/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll index 7001bf7f28071..dfbd14625a0a8 100644 --- a/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll +++ b/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll @@ -126,7 +126,7 @@ define <16 x i8> @var_fshl_v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %amt) nou ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; GFNIAVX512BW-NEXT: vpsllw $8, %ymm0, %ymm0 -; GFNIAVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; GFNIAVX512BW-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; GFNIAVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm1 ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; GFNIAVX512BW-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 @@ -259,7 +259,7 @@ define <16 x i8> @var_fshr_v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %amt) nou ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; GFNIAVX512BW-NEXT: vpsllw $8, %ymm0, %ymm0 -; GFNIAVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; GFNIAVX512BW-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; GFNIAVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm1 ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; GFNIAVX512BW-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 @@ -412,7 +412,7 @@ define <16 x i8> @constant_fshl_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; GFNIAVX512BW-NEXT: vpsllw $8, %ymm0, %ymm0 -; GFNIAVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; GFNIAVX512BW-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; GFNIAVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; GFNIAVX512BW-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX512BW-NEXT: vpmovwb %ymm0, %xmm0 @@ -463,7 +463,7 @@ define <16 x i8> @constant_fshr_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; GFNIAVX512BW-NEXT: vpsllw $8, %ymm0, %ymm0 -; GFNIAVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; GFNIAVX512BW-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; GFNIAVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; GFNIAVX512BW-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX512BW-NEXT: vpmovwb %ymm0, %xmm0 @@ -727,7 +727,7 @@ define <32 x i8> @var_fshl_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %amt) nou ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; GFNIAVX512BW-NEXT: vpsllw $8, %zmm0, %zmm0 -; GFNIAVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; GFNIAVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; GFNIAVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm1 ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; GFNIAVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 @@ -937,7 +937,7 @@ define <32 x i8> @var_fshr_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %amt) nou ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; GFNIAVX512BW-NEXT: vpsllw $8, %zmm0, %zmm0 -; GFNIAVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; GFNIAVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; GFNIAVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm1 ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; GFNIAVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 @@ -1187,7 +1187,7 @@ define <32 x i8> @constant_fshl_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; GFNIAVX512BW-NEXT: vpsllw $8, %zmm0, %zmm0 -; GFNIAVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; GFNIAVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; GFNIAVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; GFNIAVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 ; GFNIAVX512BW-NEXT: vpmovwb %zmm0, %ymm0 @@ -1271,7 +1271,7 @@ define <32 x i8> @constant_fshr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; GFNIAVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; GFNIAVX512BW-NEXT: vpsllw $8, %zmm0, %zmm0 -; GFNIAVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; GFNIAVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; GFNIAVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; GFNIAVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 ; GFNIAVX512BW-NEXT: vpmovwb %zmm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/gfni-rotates.ll b/llvm/test/CodeGen/X86/gfni-rotates.ll index 967f26f70946a..89db41b3e4f5c 100644 --- a/llvm/test/CodeGen/X86/gfni-rotates.ll +++ b/llvm/test/CodeGen/X86/gfni-rotates.ll @@ -63,7 +63,7 @@ define <16 x i8> @var_rotl_v16i8(<16 x i8> %a, <16 x i8> %amt) nounwind { ; GFNIAVX512VL: # %bb.0: ; GFNIAVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; GFNIAVX512VL-NEXT: vpslld $8, %zmm0, %zmm2 -; GFNIAVX512VL-NEXT: vpord %zmm2, %zmm0, %zmm0 +; GFNIAVX512VL-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ; GFNIAVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 ; GFNIAVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero ; GFNIAVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 @@ -143,7 +143,7 @@ define <16 x i8> @var_rotr_v16i8(<16 x i8> %a, <16 x i8> %amt) nounwind { ; GFNIAVX512VL: # %bb.0: ; GFNIAVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; GFNIAVX512VL-NEXT: vpslld $8, %zmm0, %zmm2 -; GFNIAVX512VL-NEXT: vpord %zmm2, %zmm0, %zmm0 +; GFNIAVX512VL-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ; GFNIAVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 ; GFNIAVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero ; GFNIAVX512VL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 diff --git a/llvm/test/CodeGen/X86/rotate-extract-vector.ll b/llvm/test/CodeGen/X86/rotate-extract-vector.ll index 7d0ec6435d863..722f39e3a2a61 100644 --- a/llvm/test/CodeGen/X86/rotate-extract-vector.ll +++ b/llvm/test/CodeGen/X86/rotate-extract-vector.ll @@ -130,7 +130,7 @@ define <32 x i16> @illegal_no_extract_mul(<32 x i16> %i) nounwind { ; X86-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0 # [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] ; X86-NEXT: vpsrlw $10, %zmm0, %zmm1 ; X86-NEXT: vpsllw $6, %zmm0, %zmm0 -; X86-NEXT: vporq %zmm1, %zmm0, %zmm0 +; X86-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; X86-NEXT: retl ; ; X64-LABEL: illegal_no_extract_mul: @@ -138,7 +138,7 @@ define <32 x i16> @illegal_no_extract_mul(<32 x i16> %i) nounwind { ; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] ; X64-NEXT: vpsrlw $10, %zmm0, %zmm1 ; X64-NEXT: vpsllw $6, %zmm0, %zmm0 -; X64-NEXT: vporq %zmm1, %zmm0, %zmm0 +; X64-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; X64-NEXT: retq %lhs_mul = mul <32 x i16> %i, %rhs_mul = mul <32 x i16> %i, @@ -185,7 +185,7 @@ define <8 x i32> @no_extract_mul(<8 x i32> %i) nounwind { ; X86-NEXT: vpslld $3, %ymm0, %ymm2 ; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ; X86-NEXT: vpsrld $23, %ymm0, %ymm0 -; X86-NEXT: vpor %ymm0, %ymm1, %ymm0 +; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; X86-NEXT: retl ; ; X64-LABEL: no_extract_mul: @@ -194,7 +194,7 @@ define <8 x i32> @no_extract_mul(<8 x i32> %i) nounwind { ; X64-NEXT: vpslld $3, %ymm0, %ymm2 ; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ; X64-NEXT: vpsrld $23, %ymm0, %ymm0 -; X64-NEXT: vpor %ymm0, %ymm1, %ymm0 +; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq %lhs_mul = mul <8 x i32> %i, %rhs_mul = mul <8 x i32> %i, @@ -271,7 +271,7 @@ define <2 x i64> @no_extract_udiv(<2 x i64> %i) nounwind { ; X64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; X64-NEXT: vpsrlq $9, %xmm1, %xmm1 ; X64-NEXT: vpsllq $56, %xmm0, %xmm0 -; X64-NEXT: vpor %xmm1, %xmm0, %xmm0 +; X64-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; X64-NEXT: retq %lhs_div = udiv <2 x i64> %i, %rhs_div = udiv <2 x i64> %i, diff --git a/llvm/test/CodeGen/X86/rotate_vec.ll b/llvm/test/CodeGen/X86/rotate_vec.ll index a5349cb33193f..e5c62556777c9 100644 --- a/llvm/test/CodeGen/X86/rotate_vec.ll +++ b/llvm/test/CodeGen/X86/rotate_vec.ll @@ -175,7 +175,7 @@ define <8 x i16> @or_fshl_v8i16(<8 x i16> %x, <8 x i16> %y) { ; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm1 ; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1 ; AVX512-NEXT: vpsrlw $11, %xmm0, %xmm0 -; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq %or1 = or <8 x i16> %y, %x %sh1 = shl <8 x i16> %or1, @@ -198,7 +198,7 @@ define <4 x i32> @or_fshl_v4i32(<4 x i32> %x, <4 x i32> %y) { ; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm1 ; AVX512-NEXT: vpslld $21, %xmm1, %xmm1 ; AVX512-NEXT: vpsrld $11, %xmm0, %xmm0 -; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq %or1 = or <4 x i32> %y, %x %sh1 = shl <4 x i32> %or1, diff --git a/llvm/test/CodeGen/X86/shift-i512.ll b/llvm/test/CodeGen/X86/shift-i512.ll index 03b61d9235254..aa21d1ef0e8ff 100644 --- a/llvm/test/CodeGen/X86/shift-i512.ll +++ b/llvm/test/CodeGen/X86/shift-i512.ll @@ -14,17 +14,17 @@ define <8 x i64> @shl_i512_1(<8 x i64> %a) { ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[2,3,2,3] ; AVX512VL-NEXT: vpsrlq $63, %xmm4, %xmm4 ; AVX512VL-NEXT: vpaddq %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vpor %xmm4, %xmm2, %xmm2 +; AVX512VL-NEXT: vpaddq %xmm4, %xmm2, %xmm2 ; AVX512VL-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 ; AVX512VL-NEXT: vpaddq %ymm3, %ymm3, %ymm3 ; AVX512VL-NEXT: vpsrlq $63, %ymm1, %ymm1 -; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX512VL-NEXT: vpaddq %ymm1, %ymm3, %ymm1 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 ; AVX512VL-NEXT: vpsrlq $63, %zmm0, %zmm2 ; AVX512VL-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15] ; AVX512VL-NEXT: vpaddq %zmm0, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm1[0],zmm0[0],zmm1[2],zmm0[2],zmm1[4],zmm0[4],zmm1[6],zmm0[6] ; AVX512VL-NEXT: retq ; @@ -78,7 +78,7 @@ define <8 x i64> @lshr_i512_1(<8 x i64> %a) { ; AVX512VL-NEXT: vpsllq $63, %xmm3, %xmm4 ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[2,3,2,3] ; AVX512VL-NEXT: vpsrlq $1, %xmm5, %xmm5 -; AVX512VL-NEXT: vpor %xmm5, %xmm4, %xmm4 +; AVX512VL-NEXT: vpaddq %xmm5, %xmm4, %xmm4 ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3] ; AVX512VL-NEXT: vpsrlq $1, %xmm3, %xmm3 ; AVX512VL-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3 @@ -86,12 +86,12 @@ define <8 x i64> @lshr_i512_1(<8 x i64> %a) { ; AVX512VL-NEXT: vpsllq $63, %ymm1, %ymm1 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm2 = ymm0[2,3,2,3,6,7,6,7] ; AVX512VL-NEXT: vpsrlq $1, %ymm2, %ymm2 -; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1 +; AVX512VL-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 ; AVX512VL-NEXT: vpsrlq $1, %zmm0, %zmm2 ; AVX512VL-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15] ; AVX512VL-NEXT: vpsllq $63, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; AVX512VL-NEXT: retq ; @@ -147,7 +147,7 @@ define <8 x i64> @ashr_i512_1(<8 x i64> %a) { ; AVX512VL-NEXT: vpsllq $63, %xmm3, %xmm4 ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[2,3,2,3] ; AVX512VL-NEXT: vpsrlq $1, %xmm5, %xmm5 -; AVX512VL-NEXT: vpor %xmm5, %xmm4, %xmm4 +; AVX512VL-NEXT: vpaddq %xmm5, %xmm4, %xmm4 ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3] ; AVX512VL-NEXT: vpsraq $1, %xmm3, %xmm3 ; AVX512VL-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3 @@ -155,12 +155,12 @@ define <8 x i64> @ashr_i512_1(<8 x i64> %a) { ; AVX512VL-NEXT: vpsllq $63, %ymm1, %ymm1 ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm2 = ymm0[2,3,2,3,6,7,6,7] ; AVX512VL-NEXT: vpsrlq $1, %ymm2, %ymm2 -; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1 +; AVX512VL-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 ; AVX512VL-NEXT: vpsrlq $1, %zmm0, %zmm2 ; AVX512VL-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15] ; AVX512VL-NEXT: vpsllq $63, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; AVX512VL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll index 5954e3456d674..1c1aa721adc4d 100644 --- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll @@ -636,9 +636,9 @@ define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 +; AVX512VL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 ; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0 -; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; AVX512VL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ; AVX512VL-NEXT: retq @@ -990,9 +990,9 @@ define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] -; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm1 +; AVX512VL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm1 ; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0 -; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 ; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 ; AVX512VL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq @@ -3203,9 +3203,9 @@ define <2 x double> @uitofp_load_2i64_to_2f64(ptr%a) { ; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 +; AVX512VL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 ; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0 -; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 ; AVX512VL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ; AVX512VL-NEXT: retq @@ -3584,9 +3584,9 @@ define <4 x double> @uitofp_load_4i64_to_4f64(ptr%a) { ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] -; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm1 +; AVX512VL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm1 ; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0 -; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 ; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 ; AVX512VL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq @@ -5584,18 +5584,18 @@ define void @PR43609(ptr nocapture %x, <2 x i64> %y) { ; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] -; AVX512VL-NEXT: vpor %xmm4, %xmm3, %xmm3 +; AVX512VL-NEXT: vpaddq %xmm4, %xmm3, %xmm3 ; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0 ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] -; AVX512VL-NEXT: vpor %xmm5, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddq %xmm5, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] ; AVX512VL-NEXT: # xmm6 = mem[0,0] ; AVX512VL-NEXT: vsubpd %xmm6, %xmm0, %xmm0 ; AVX512VL-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; AVX512VL-NEXT: vpor %xmm4, %xmm2, %xmm2 +; AVX512VL-NEXT: vpaddq %xmm4, %xmm2, %xmm2 ; AVX512VL-NEXT: vpsrlq $32, %xmm1, %xmm1 -; AVX512VL-NEXT: vpor %xmm5, %xmm1, %xmm1 +; AVX512VL-NEXT: vpaddq %xmm5, %xmm1, %xmm1 ; AVX512VL-NEXT: vsubpd %xmm6, %xmm1, %xmm1 ; AVX512VL-NEXT: vaddpd %xmm1, %xmm2, %xmm1 ; AVX512VL-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1] diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll index 834dfd63432b0..048386609bffd 100644 --- a/llvm/test/CodeGen/X86/vector-bitreverse.ll +++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll @@ -369,18 +369,18 @@ define <16 x i8> @test_bitreverse_v16i8(<16 x i8> %a) nounwind { ; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_bitreverse_v16i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] -; AVX512-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX512-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] -; AVX512-NEXT: vpshufb %xmm0, %xmm1, %xmm0 -; AVX512-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_bitreverse_v16i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] +; AVX512F-NEXT: vpshufb %xmm2, %xmm3, %xmm2 +; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] +; AVX512F-NEXT: vpshufb %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: retq ; ; XOP-LABEL: test_bitreverse_v16i8: ; XOP: # %bb.0: @@ -474,19 +474,19 @@ define <8 x i16> @test_bitreverse_v8i16(<8 x i16> %a) nounwind { ; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_bitreverse_v8i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] -; AVX512-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] -; AVX512-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX512-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] -; AVX512-NEXT: vpshufb %xmm0, %xmm1, %xmm0 -; AVX512-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_bitreverse_v8i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; AVX512F-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] +; AVX512F-NEXT: vpshufb %xmm2, %xmm3, %xmm2 +; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] +; AVX512F-NEXT: vpshufb %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: retq ; ; XOP-LABEL: test_bitreverse_v8i16: ; XOP: # %bb.0: @@ -596,19 +596,19 @@ define <4 x i32> @test_bitreverse_v4i32(<4 x i32> %a) nounwind { ; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_bitreverse_v4i32: -; AVX512: # %bb.0: -; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] -; AVX512-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] -; AVX512-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX512-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] -; AVX512-NEXT: vpshufb %xmm0, %xmm1, %xmm0 -; AVX512-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_bitreverse_v4i32: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; AVX512F-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] +; AVX512F-NEXT: vpshufb %xmm2, %xmm3, %xmm2 +; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] +; AVX512F-NEXT: vpshufb %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: retq ; ; XOP-LABEL: test_bitreverse_v4i32: ; XOP: # %bb.0: @@ -725,19 +725,19 @@ define <2 x i64> @test_bitreverse_v2i64(<2 x i64> %a) nounwind { ; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_bitreverse_v2i64: -; AVX512: # %bb.0: -; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] -; AVX512-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm2 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] -; AVX512-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX512-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] -; AVX512-NEXT: vpshufb %xmm0, %xmm1, %xmm0 -; AVX512-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_bitreverse_v2i64: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; AVX512F-NEXT: vpbroadcastb {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] +; AVX512F-NEXT: vpshufb %xmm2, %xmm3, %xmm2 +; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] +; AVX512F-NEXT: vpshufb %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: retq ; ; XOP-LABEL: test_bitreverse_v2i64: ; XOP: # %bb.0: @@ -880,20 +880,20 @@ define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind { ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_bitreverse_v32i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] -; AVX512-NEXT: # ymm3 = mem[0,1,0,1] -; AVX512-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] -; AVX512-NEXT: # ymm1 = mem[0,1,0,1] -; AVX512-NEXT: vpshufb %ymm0, %ymm1, %ymm0 -; AVX512-NEXT: vpor %ymm0, %ymm2, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_bitreverse_v32i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 +; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] +; AVX512F-NEXT: # ymm3 = mem[0,1,0,1] +; AVX512F-NEXT: vpshufb %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] +; AVX512F-NEXT: # ymm1 = mem[0,1,0,1] +; AVX512F-NEXT: vpshufb %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: retq ; ; XOPAVX1-LABEL: test_bitreverse_v32i8: ; XOPAVX1: # %bb.0: @@ -1048,21 +1048,21 @@ define <16 x i16> @test_bitreverse_v16i16(<16 x i16> %a) nounwind { ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_bitreverse_v16i16: -; AVX512: # %bb.0: -; AVX512-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30] -; AVX512-NEXT: vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] -; AVX512-NEXT: # ymm3 = mem[0,1,0,1] -; AVX512-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] -; AVX512-NEXT: # ymm1 = mem[0,1,0,1] -; AVX512-NEXT: vpshufb %ymm0, %ymm1, %ymm0 -; AVX512-NEXT: vpor %ymm0, %ymm2, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_bitreverse_v16i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30] +; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 +; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] +; AVX512F-NEXT: # ymm3 = mem[0,1,0,1] +; AVX512F-NEXT: vpshufb %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] +; AVX512F-NEXT: # ymm1 = mem[0,1,0,1] +; AVX512F-NEXT: vpshufb %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: retq ; ; XOPAVX1-LABEL: test_bitreverse_v16i16: ; XOPAVX1: # %bb.0: @@ -1262,21 +1262,21 @@ define <8 x i32> @test_bitreverse_v8i32(<8 x i32> %a) nounwind { ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_bitreverse_v8i32: -; AVX512: # %bb.0: -; AVX512-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28] -; AVX512-NEXT: vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] -; AVX512-NEXT: # ymm3 = mem[0,1,0,1] -; AVX512-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] -; AVX512-NEXT: # ymm1 = mem[0,1,0,1] -; AVX512-NEXT: vpshufb %ymm0, %ymm1, %ymm0 -; AVX512-NEXT: vpor %ymm0, %ymm2, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_bitreverse_v8i32: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28] +; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 +; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] +; AVX512F-NEXT: # ymm3 = mem[0,1,0,1] +; AVX512F-NEXT: vpshufb %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] +; AVX512F-NEXT: # ymm1 = mem[0,1,0,1] +; AVX512F-NEXT: vpshufb %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: retq ; ; XOPAVX1-LABEL: test_bitreverse_v8i32: ; XOPAVX1: # %bb.0: @@ -1489,21 +1489,21 @@ define <4 x i64> @test_bitreverse_v4i64(<4 x i64> %a) nounwind { ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_bitreverse_v4i64: -; AVX512: # %bb.0: -; AVX512-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24] -; AVX512-NEXT: vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm2 -; AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] -; AVX512-NEXT: # ymm3 = mem[0,1,0,1] -; AVX512-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; AVX512-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] -; AVX512-NEXT: # ymm1 = mem[0,1,0,1] -; AVX512-NEXT: vpshufb %ymm0, %ymm1, %ymm0 -; AVX512-NEXT: vpor %ymm0, %ymm2, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_bitreverse_v4i64: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24] +; AVX512F-NEXT: vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm2 +; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] +; AVX512F-NEXT: # ymm3 = mem[0,1,0,1] +; AVX512F-NEXT: vpshufb %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] +; AVX512F-NEXT: # ymm1 = mem[0,1,0,1] +; AVX512F-NEXT: vpshufb %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: retq ; ; XOPAVX1-LABEL: test_bitreverse_v4i64: ; XOPAVX1: # %bb.0: @@ -1802,7 +1802,7 @@ define <64 x i8> @test_bitreverse_v64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] ; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; AVX512BW-NEXT: vpshufb %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; XOPAVX1-LABEL: test_bitreverse_v64i8: @@ -2113,7 +2113,7 @@ define <32 x i16> @test_bitreverse_v32i16(<32 x i16> %a) nounwind { ; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] ; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; AVX512BW-NEXT: vpshufb %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; XOPAVX1-LABEL: test_bitreverse_v32i16: @@ -2496,7 +2496,7 @@ define <16 x i32> @test_bitreverse_v16i32(<16 x i32> %a) nounwind { ; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] ; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; AVX512BW-NEXT: vpshufb %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; XOPAVX1-LABEL: test_bitreverse_v16i32: @@ -2904,7 +2904,7 @@ define <8 x i64> @test_bitreverse_v8i64(<8 x i64> %a) nounwind { ; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15,0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] ; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; AVX512BW-NEXT: vpshufb %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; XOPAVX1-LABEL: test_bitreverse_v8i64: diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll index a0c27601a845d..4c6698f406d75 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll @@ -861,7 +861,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %amt) ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX512VLBW-NEXT: vpsllw $8, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm1 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 @@ -2113,7 +2113,7 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX512VLBW-NEXT: vpsllw $8, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpmovwb %ymm0, %xmm0 @@ -2187,7 +2187,7 @@ define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwi ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrlq $50, %xmm1, %xmm1 ; AVX512VL-NEXT: vpsllq $14, %xmm0, %xmm0 -; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v2i64: @@ -2210,7 +2210,7 @@ define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwi ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlq $50, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpsllq $14, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v2i64: @@ -2261,7 +2261,7 @@ define <4 x i32> @splatconstant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwi ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrld $28, %xmm1, %xmm1 ; AVX512VL-NEXT: vpslld $4, %xmm0, %xmm0 -; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v4i32: @@ -2284,7 +2284,7 @@ define <4 x i32> @splatconstant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwi ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrld $28, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpslld $4, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v4i32: @@ -2358,7 +2358,7 @@ define <8 x i16> @splatconstant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwi ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $9, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i16: diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll index 2fadf5f101626..a9f30464a4d50 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll @@ -372,7 +372,7 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> % ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512F-NEXT: vpslld $16, %zmm0, %zmm0 -; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm1 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512F-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 @@ -385,7 +385,7 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> % ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VL-NEXT: vpslld $16, %zmm0, %zmm0 -; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm1 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 @@ -645,7 +645,7 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt) ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm1 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 @@ -671,7 +671,7 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt) ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; AVX512VLBW-NEXT: vpsllw $8, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm1 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 @@ -1903,7 +1903,7 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 @@ -1925,7 +1925,7 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; AVX512VLBW-NEXT: vpsllw $8, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsrlw $8, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0 @@ -2017,7 +2017,7 @@ define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwi ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrlq $50, %ymm1, %ymm1 ; AVX512VL-NEXT: vpsllq $14, %ymm0, %ymm0 -; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v4i64: @@ -2039,7 +2039,7 @@ define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwi ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlq $50, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpsllq $14, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v4i64: @@ -2102,7 +2102,7 @@ define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwi ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrld $28, %ymm1, %ymm1 ; AVX512VL-NEXT: vpslld $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v8i32: @@ -2124,7 +2124,7 @@ define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwi ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrld $28, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpslld $4, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i32: @@ -2209,7 +2209,7 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) no ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $9, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i16: diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll index 34ad667f01171..9c22178b6e8bf 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll @@ -137,7 +137,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> % ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512F-NEXT: vpslld $16, %zmm4, %zmm4 -; AVX512F-NEXT: vpord %zmm3, %zmm4, %zmm3 +; AVX512F-NEXT: vpaddd %zmm3, %zmm4, %zmm3 ; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero ; AVX512F-NEXT: vpsllvd %zmm4, %zmm3, %zmm3 @@ -148,7 +148,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> % ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512F-NEXT: vpslld $16, %zmm0, %zmm0 -; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm1 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512F-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 @@ -162,7 +162,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> % ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VL-NEXT: vpslld $16, %zmm4, %zmm4 -; AVX512VL-NEXT: vpord %zmm3, %zmm4, %zmm3 +; AVX512VL-NEXT: vpaddd %zmm3, %zmm4, %zmm3 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero ; AVX512VL-NEXT: vpsllvd %zmm4, %zmm3, %zmm3 @@ -173,7 +173,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> % ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VL-NEXT: vpslld $16, %zmm0, %zmm0 -; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm2, %ymm1 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 @@ -980,21 +980,21 @@ define <8 x i64> @splatconstant_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y) nounwi ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpsrlq $50, %zmm1, %zmm1 ; AVX512F-NEXT: vpsllq $14, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v8i64: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrlq $50, %zmm1, %zmm1 ; AVX512VL-NEXT: vpsllq $14, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v8i64: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsrlq $50, %zmm1, %zmm1 ; AVX512BW-NEXT: vpsllq $14, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i64: @@ -1006,7 +1006,7 @@ define <8 x i64> @splatconstant_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y) nounwi ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlq $50, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpsllq $14, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i64: @@ -1022,21 +1022,21 @@ define <16 x i32> @splatconstant_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y) no ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpsrld $28, %zmm1, %zmm1 ; AVX512F-NEXT: vpslld $4, %zmm0, %zmm0 -; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v16i32: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrld $28, %zmm1, %zmm1 ; AVX512VL-NEXT: vpslld $4, %zmm0, %zmm0 -; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsrld $28, %zmm1, %zmm1 ; AVX512BW-NEXT: vpslld $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i32: @@ -1048,7 +1048,7 @@ define <16 x i32> @splatconstant_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y) no ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrld $28, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpslld $4, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i32: @@ -1090,7 +1090,7 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) no ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsrlw $9, %zmm1, %zmm1 ; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i16: @@ -1102,7 +1102,7 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) no ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $9, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpsllw $7, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i16: diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll index ec2efcd82395a..940d3fc4f657c 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -584,7 +584,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind { ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; AVX512F-NEXT: vpslld $8, %zmm0, %zmm2 -; AVX512F-NEXT: vpord %zmm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero ; AVX512F-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 @@ -597,7 +597,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; AVX512VL-NEXT: vpslld $8, %zmm0, %zmm2 -; AVX512VL-NEXT: vpord %zmm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero ; AVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 @@ -1803,7 +1803,7 @@ define <8 x i16> @splatconstant_funnnel_v8i16(<8 x i16> %x) nounwind { ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $9, %xmm0, %xmm1 ; AVX512VLBW-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i16: diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll index 5f7e4070b3783..52c258fa8ae31 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -1578,7 +1578,7 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x) nounwind { ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $9, %ymm0, %ymm1 ; AVX512VLBW-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i16: diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll index 4c6680ac4a19a..598c758e52e34 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll @@ -688,14 +688,14 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsrlw $9, %zmm0, %zmm1 ; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i16: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $9, %zmm0, %zmm1 ; AVX512VLBW-NEXT: vpsllw $7, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i16: diff --git a/llvm/test/CodeGen/X86/vector-fshl-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-sub128.ll index ae5dd18d4b663..826b689fbc850 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-sub128.ll @@ -621,12 +621,47 @@ define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y) nounwi ; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: splatconstant_funnnel_v2i32: -; AVX: # %bb.0: -; AVX-NEXT: vpsrld $28, %xmm1, %xmm1 -; AVX-NEXT: vpslld $4, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatconstant_funnnel_v2i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrld $28, %xmm1, %xmm1 +; AVX1-NEXT: vpslld $4, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatconstant_funnnel_v2i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpsrld $28, %xmm1, %xmm1 +; AVX2-NEXT: vpslld $4, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: splatconstant_funnnel_v2i32: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrld $28, %xmm1, %xmm1 +; AVX512F-NEXT: vpslld $4, %xmm0, %xmm0 +; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatconstant_funnnel_v2i32: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsrld $28, %xmm1, %xmm1 +; AVX512VL-NEXT: vpslld $4, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatconstant_funnnel_v2i32: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrld $28, %xmm1, %xmm1 +; AVX512BW-NEXT: vpslld $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: retq +; +; AVX512VLBW-LABEL: splatconstant_funnnel_v2i32: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpsrld $28, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpslld $4, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v2i32: ; AVX512VBMI2: # %bb.0: @@ -658,3 +693,5 @@ define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y) nounwi %res = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> ) ret <2 x i32> %res } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX: {{.*}} diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll index 33a6a7679bb9a..b909e984165a7 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -970,7 +970,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %amt) ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX512VLBW-NEXT: vpsllw $8, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm1 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; AVX512VLBW-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 @@ -2110,7 +2110,7 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX512VLBW-NEXT: vpsllw $8, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpmovwb %ymm0, %xmm0 ; AVX512VLBW-NEXT: vzeroupper @@ -2192,7 +2192,7 @@ define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwi ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrlq $14, %xmm1, %xmm1 ; AVX512VL-NEXT: vpsllq $50, %xmm0, %xmm0 -; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v2i64: @@ -2215,7 +2215,7 @@ define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwi ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlq $14, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpsllq $50, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v2i64: @@ -2266,7 +2266,7 @@ define <4 x i32> @splatconstant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwi ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrld $4, %xmm1, %xmm1 ; AVX512VL-NEXT: vpslld $28, %xmm0, %xmm0 -; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v4i32: @@ -2289,7 +2289,7 @@ define <4 x i32> @splatconstant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwi ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrld $4, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpslld $28, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v4i32: @@ -2363,7 +2363,7 @@ define <8 x i16> @splatconstant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwi ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $7, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpsllw $9, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i16: diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll index 217431be10d88..fae9382bcb37e 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -402,7 +402,7 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> % ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512F-NEXT: vpslld $16, %zmm0, %zmm0 -; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm1 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512F-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 @@ -414,7 +414,7 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> % ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VL-NEXT: vpslld $16, %zmm0, %zmm0 -; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm1 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 @@ -676,7 +676,7 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt) ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm1 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 @@ -700,7 +700,7 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %amt) ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; AVX512VLBW-NEXT: vpsllw $8, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm1 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 @@ -1700,7 +1700,7 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: retq @@ -1720,7 +1720,7 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; AVX512VLBW-NEXT: vpsllw $8, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512VLBW-NEXT: retq @@ -1808,7 +1808,7 @@ define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwi ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrlq $14, %ymm1, %ymm1 ; AVX512VL-NEXT: vpsllq $50, %ymm0, %ymm0 -; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v4i64: @@ -1830,7 +1830,7 @@ define <4 x i64> @splatconstant_funnnel_v4i64(<4 x i64> %x, <4 x i64> %y) nounwi ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlq $14, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpsllq $50, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v4i64: @@ -1893,7 +1893,7 @@ define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwi ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrld $4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpslld $28, %ymm0, %ymm0 -; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v8i32: @@ -1915,7 +1915,7 @@ define <8 x i32> @splatconstant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwi ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrld $4, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpslld $28, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i32: @@ -2000,7 +2000,7 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) no ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $7, %ymm1, %ymm1 ; AVX512VLBW-NEXT: vpsllw $9, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i16: diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll index 3a522ccb6214a..2daad1e96660f 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll @@ -141,7 +141,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> % ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512F-NEXT: vpslld $16, %zmm4, %zmm4 -; AVX512F-NEXT: vpord %zmm3, %zmm4, %zmm3 +; AVX512F-NEXT: vpaddd %zmm3, %zmm4, %zmm3 ; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero ; AVX512F-NEXT: vpsrlvd %zmm4, %zmm3, %zmm3 @@ -151,7 +151,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> % ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512F-NEXT: vpslld $16, %zmm0, %zmm0 -; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm1 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512F-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 @@ -164,7 +164,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> % ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VL-NEXT: vpslld $16, %zmm4, %zmm4 -; AVX512VL-NEXT: vpord %zmm3, %zmm4, %zmm3 +; AVX512VL-NEXT: vpaddd %zmm3, %zmm4, %zmm3 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm2 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero ; AVX512VL-NEXT: vpsrlvd %zmm4, %zmm3, %zmm3 @@ -174,7 +174,7 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> % ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VL-NEXT: vpslld $16, %zmm0, %zmm0 -; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm2, %ymm1 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 @@ -1022,21 +1022,21 @@ define <8 x i64> @splatconstant_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y) nounwi ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpsrlq $14, %zmm1, %zmm1 ; AVX512F-NEXT: vpsllq $50, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v8i64: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrlq $14, %zmm1, %zmm1 ; AVX512VL-NEXT: vpsllq $50, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v8i64: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsrlq $14, %zmm1, %zmm1 ; AVX512BW-NEXT: vpsllq $50, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i64: @@ -1048,7 +1048,7 @@ define <8 x i64> @splatconstant_funnnel_v8i64(<8 x i64> %x, <8 x i64> %y) nounwi ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlq $14, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpsllq $50, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i64: @@ -1064,21 +1064,21 @@ define <16 x i32> @splatconstant_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y) no ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpsrld $4, %zmm1, %zmm1 ; AVX512F-NEXT: vpslld $28, %zmm0, %zmm0 -; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v16i32: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrld $4, %zmm1, %zmm1 ; AVX512VL-NEXT: vpslld $28, %zmm0, %zmm0 -; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_funnnel_v16i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsrld $4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpslld $28, %zmm0, %zmm0 -; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i32: @@ -1090,7 +1090,7 @@ define <16 x i32> @splatconstant_funnnel_v16i32(<16 x i32> %x, <16 x i32> %y) no ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrld $4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpslld $28, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i32: @@ -1132,7 +1132,7 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) no ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsrlw $7, %zmm1, %zmm1 ; AVX512BW-NEXT: vpsllw $9, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i16: @@ -1144,7 +1144,7 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) no ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $7, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vpsllw $9, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i16: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll index 5d01dfd459f8c..d3e87752d4264 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -608,7 +608,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind { ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; AVX512F-NEXT: vpslld $8, %zmm0, %zmm2 -; AVX512F-NEXT: vpord %zmm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero ; AVX512F-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 @@ -620,7 +620,7 @@ define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; AVX512VL-NEXT: vpslld $8, %zmm0, %zmm2 -; AVX512VL-NEXT: vpord %zmm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero ; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 @@ -1872,7 +1872,7 @@ define <8 x i16> @splatconstant_funnnel_v8i16(<8 x i16> %x) nounwind { ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $7, %xmm0, %xmm1 ; AVX512VLBW-NEXT: vpsllw $9, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i16: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll index 4dc931dd304fb..1765a833858cc 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -1630,7 +1630,7 @@ define <16 x i16> @splatconstant_funnnel_v16i16(<16 x i16> %x) nounwind { ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $7, %ymm0, %ymm1 ; AVX512VLBW-NEXT: vpsllw $9, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i16: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll index 1d089e427bfad..ecfcbfee2f5ed 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -688,14 +688,14 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm1 ; AVX512BW-NEXT: vpsllw $9, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i16: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $7, %zmm0, %zmm1 ; AVX512VLBW-NEXT: vpsllw $9, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v32i16: diff --git a/llvm/test/CodeGen/X86/vector-fshr-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-sub128.ll index 2d8670a6d3f23..1387000306c64 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-sub128.ll @@ -634,12 +634,47 @@ define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y) nounwi ; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: splatconstant_funnnel_v2i32: -; AVX: # %bb.0: -; AVX-NEXT: vpsrld $4, %xmm1, %xmm1 -; AVX-NEXT: vpslld $28, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: splatconstant_funnnel_v2i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrld $4, %xmm1, %xmm1 +; AVX1-NEXT: vpslld $28, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: splatconstant_funnnel_v2i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpsrld $4, %xmm1, %xmm1 +; AVX2-NEXT: vpslld $28, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: splatconstant_funnnel_v2i32: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrld $4, %xmm1, %xmm1 +; AVX512F-NEXT: vpslld $28, %xmm0, %xmm0 +; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: splatconstant_funnnel_v2i32: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsrld $4, %xmm1, %xmm1 +; AVX512VL-NEXT: vpslld $28, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: splatconstant_funnnel_v2i32: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrld $4, %xmm1, %xmm1 +; AVX512BW-NEXT: vpslld $28, %xmm0, %xmm0 +; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: retq +; +; AVX512VLBW-LABEL: splatconstant_funnnel_v2i32: +; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpsrld $4, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpslld $28, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_funnnel_v2i32: ; AVX512VBMI2: # %bb.0: @@ -671,3 +706,5 @@ define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x, <2 x i32> %y) nounwi %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> ) ret <2 x i32> %res } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX: {{.*}} diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll index 0bf5a8d6daeae..cae03c3f3ee3f 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -517,7 +517,7 @@ define <16 x i8> @var_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; AVX512F-NEXT: vpslld $8, %zmm0, %zmm2 -; AVX512F-NEXT: vpord %zmm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero ; AVX512F-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 @@ -530,7 +530,7 @@ define <16 x i8> @var_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; AVX512VL-NEXT: vpslld $8, %zmm0, %zmm2 -; AVX512VL-NEXT: vpord %zmm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero ; AVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 @@ -1523,7 +1523,7 @@ define <8 x i16> @splatconstant_rotate_v8i16(<8 x i16> %a) nounwind { ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $9, %xmm0, %xmm1 ; AVX512VLBW-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_rotate_v8i16: diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll index 5ae3e2f5d7621..f025c9b8bb213 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -1327,7 +1327,7 @@ define <16 x i16> @splatconstant_rotate_v16i16(<16 x i16> %a) nounwind { ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $9, %ymm0, %ymm1 ; AVX512VLBW-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_rotate_v16i16: diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll index 2cde988ed7762..b70f4ef3134fe 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-512.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll @@ -719,14 +719,14 @@ define <32 x i16> @splatconstant_rotate_v32i16(<32 x i16> %a) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsrlw $9, %zmm0, %zmm1 ; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_rotate_v32i16: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsrlw $9, %zmm0, %zmm1 ; AVX512VLBW-NEXT: vpsllw $7, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatconstant_rotate_v32i16: