Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions llvm/include/llvm/Target/TargetSelectionDAG.td
Original file line number Diff line number Diff line change
Expand Up @@ -1177,6 +1177,13 @@ def or_disjoint : PatFrag<(ops node:$lhs, node:$rhs),
}];
}

def addlike : PatFrags<(ops node:$lhs, node:$rhs),
[(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{
if (Op.getOpcode() == ISD::ADD)
return true;
return CurDAG->isADDLike(Op);
}]>;

def xor_like : PatFrags<(ops node:$lhs, node:$rhs),
[(xor node:$lhs, node:$rhs),
(or_disjoint node:$lhs, node:$rhs)]>;
Expand Down
24 changes: 12 additions & 12 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -4685,7 +4685,7 @@ let Predicates = [HasVLX], AddedComplexity = 400 in {
//===----------------------------------------------------------------------===//
// AVX-512 - Integer arithmetic
//
multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
X86VectorVTInfo _, X86FoldableSchedWrite sched,
bit IsCommutable = 0> {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
Expand All @@ -4704,7 +4704,7 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}

multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
X86VectorVTInfo _, X86FoldableSchedWrite sched,
bit IsCommutable = 0> :
avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
Expand All @@ -4719,7 +4719,7 @@ multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}

multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
AVX512VLVectorVTInfo VTInfo,
X86SchedWriteWidths sched, Predicate prd,
bit IsCommutable = 0> {
Expand All @@ -4735,7 +4735,7 @@ multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
}

multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
AVX512VLVectorVTInfo VTInfo,
X86SchedWriteWidths sched, Predicate prd,
bit IsCommutable = 0> {
Expand All @@ -4751,30 +4751,30 @@ multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
}

multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
X86SchedWriteWidths sched, Predicate prd,
bit IsCommutable = 0> {
defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
sched, prd, IsCommutable>,
REX_W, EVEX_CD8<64, CD8VF>;
}

multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
X86SchedWriteWidths sched, Predicate prd,
bit IsCommutable = 0> {
defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
}

multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
X86SchedWriteWidths sched, Predicate prd,
bit IsCommutable = 0> {
defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
WIG;
}

multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
X86SchedWriteWidths sched, Predicate prd,
bit IsCommutable = 0> {
defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
Expand All @@ -4783,7 +4783,7 @@ multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
}

multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
SDNode OpNode, X86SchedWriteWidths sched,
SDPatternOperator OpNode, X86SchedWriteWidths sched,
Predicate prd, bit IsCommutable = 0> {
defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
IsCommutable>;
Expand All @@ -4793,7 +4793,7 @@ multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
}

multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
SDNode OpNode, X86SchedWriteWidths sched,
SDPatternOperator OpNode, X86SchedWriteWidths sched,
Predicate prd, bit IsCommutable = 0> {
defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
IsCommutable>;
Expand All @@ -4804,7 +4804,7 @@ multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,

multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
bits<8> opc_d, bits<8> opc_q,
string OpcodeStr, SDNode OpNode,
string OpcodeStr, SDPatternOperator OpNode,
X86SchedWriteWidths sched,
bit IsCommutable = 0> {
defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
Expand Down Expand Up @@ -4847,7 +4847,7 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
}
}

defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", addlike,
SchedWriteVecALU, 1>;
defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
SchedWriteVecALU, 0>;
Expand Down
55 changes: 24 additions & 31 deletions llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll
Original file line number Diff line number Diff line change
Expand Up @@ -100,55 +100,48 @@ define dso_local <16 x i32> @test_api(i16 signext %0, i16 signext %1) nounwind {
; O0-NEXT: movb $1, {{[0-9]+}}(%rsp)
; O0-NEXT: movw %si, %cx
; O0-NEXT: movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; O0-NEXT: movw %di, %ax
; O0-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; O0-NEXT: movw %di, %dx
; O0-NEXT: movw %dx, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; O0-NEXT: movl $buf, %esi
; O0-NEXT: movl $32, %edi
; O0-NEXT: movw $8, %dx
; O0-NEXT: # implicit-def: $al
; O0-NEXT: movb %al, {{[0-9]+}}(%rsp)
; O0-NEXT: movw %dx, {{[0-9]+}}(%rsp)
; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
; O0-NEXT: tileloadd (%rsi,%rdi), %tmm0
; O0-NEXT: movl $64, %edi
; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
; O0-NEXT: movw $8, %dx
; O0-NEXT: tilestored %tmm0, (%rsi,%rdi)
; O0-NEXT: movl $32, %esi
; O0-NEXT: movl $buf+1024, %edx
; O0-NEXT: movw $8, %ax
; O0-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; O0-NEXT: # implicit-def: $al
; O0-NEXT: movb %al, {{[0-9]+}}(%rsp)
; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp)
; O0-NEXT: # implicit-def: $dl
; O0-NEXT: movb %dl, {{[0-9]+}}(%rsp)
; O0-NEXT: movw %ax, {{[0-9]+}}(%rsp)
; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
; O0-NEXT: tileloadd (%rdx,%rsi), %tmm0
; O0-NEXT: tileloadd (%rsi,%rdi), %tmm0
; O0-NEXT: movl $64, %esi
; O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; O0-NEXT: leaq {{[0-9]+}}(%rsp), %r8
; O0-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; O0-NEXT: tilestored %tmm0, (%r8,%rsi)
; O0-NEXT: movl $buf+1024, %edx
; O0-NEXT: tileloadd (%rdx,%rdi), %tmm0
; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; O0-NEXT: movw $8, %ax
; O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; O0-NEXT: tilestored %tmm0, (%rdx,%rsi)
; O0-NEXT: vzeroupper
; O0-NEXT: callq foo
; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %dx # 2-byte Reload
; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %di # 2-byte Reload
; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
; O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx # 2-byte Reload
; O0-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
; O0-NEXT: movl $64, %edi
; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
; O0-NEXT: movw $8, %cx
; O0-NEXT: # implicit-def: $cl
; O0-NEXT: movb %cl, {{[0-9]+}}(%rsp)
; O0-NEXT: movw %dx, {{[0-9]+}}(%rsp)
; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
; O0-NEXT: tileloadd (%rsi,%rdi), %tmm0
; O0-NEXT: movw $8, %cx
; O0-NEXT: tilemovrow $2, %tmm0, %zmm0
; O0-NEXT: movl $64, %esi
; O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; O0-NEXT: movw $8, %cx
; O0-NEXT: # implicit-def: $al
; O0-NEXT: movb %al, {{[0-9]+}}(%rsp)
; O0-NEXT: movw %cx, {{[0-9]+}}(%rsp)
; O0-NEXT: # implicit-def: $cl
; O0-NEXT: movb %cl, {{[0-9]+}}(%rsp)
; O0-NEXT: movw %di, {{[0-9]+}}(%rsp)
; O0-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
; O0-NEXT: tileloadd (%r8,%rsi), %tmm0
; O0-NEXT: tilemovrow $2, %tmm0, %zmm0
; O0-NEXT: tileloadd (%rdx,%rsi), %tmm0
; O0-NEXT: movw $8, %cx
; O0-NEXT: tilemovrow $2, %tmm0, %zmm1
; O0-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; O0-NEXT: movq %rbp, %rsp
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/avx512-cvt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ define <8 x double> @ulto8f64(<8 x i64> %a) {
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
; NODQ-NEXT: vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & m64bcst)
; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0
; NODQ-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
; NODQ-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
; NODQ-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
; NODQ-NEXT: vaddpd %zmm0, %zmm1, %zmm0
; NODQ-NEXT: retq
Expand All @@ -380,13 +380,13 @@ define <16 x double> @ulto16f64(<16 x i64> %a) {
; NODQ-NEXT: vpternlogq {{.*#+}} zmm4 = zmm4 | (zmm0 & zmm2)
; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm5 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
; NODQ-NEXT: vporq %zmm5, %zmm0, %zmm0
; NODQ-NEXT: vpaddq %zmm5, %zmm0, %zmm0
; NODQ-NEXT: vbroadcastsd {{.*#+}} zmm6 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
; NODQ-NEXT: vsubpd %zmm6, %zmm0, %zmm0
; NODQ-NEXT: vaddpd %zmm0, %zmm4, %zmm0
; NODQ-NEXT: vpternlogq {{.*#+}} zmm3 = zmm3 | (zmm1 & zmm2)
; NODQ-NEXT: vpsrlq $32, %zmm1, %zmm1
; NODQ-NEXT: vporq %zmm5, %zmm1, %zmm1
; NODQ-NEXT: vpaddq %zmm5, %zmm1, %zmm1
; NODQ-NEXT: vsubpd %zmm6, %zmm1, %zmm1
; NODQ-NEXT: vaddpd %zmm1, %zmm3, %zmm1
; NODQ-NEXT: retq
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/combine-shl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -837,7 +837,7 @@ define <4 x i32> @combine_vec_add_shl_nonsplat(<4 x i32> %a0) {
; AVX512-LABEL: combine_vec_add_shl_nonsplat:
; AVX512: # %bb.0:
; AVX512-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = shl <4 x i32> %a0, <i32 2, i32 3, i32 4, i32 5>
%2 = add <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
Expand Down Expand Up @@ -881,7 +881,7 @@ define <4 x i32> @combine_vec_add_shl_and_nonsplat(<4 x i32> %a0) {
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
; AVX512-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = and <4 x i32> %a0, <i32 4294901760, i32 4294901760, i32 4294901760, i32 4294901760>
%2 = shl <4 x i32> %1, <i32 2, i32 3, i32 4, i32 5>
Expand Down Expand Up @@ -922,7 +922,7 @@ define <4 x i32> @combine_vec_add_shuffle_shl(<4 x i32> %a0) {
; AVX512: # %bb.0:
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0]
; AVX512-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = shl <4 x i32> %a0, <i32 2, i32 3, i32 0, i32 1>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
Expand Down
Loading
Loading