Skip to content

Commit

Permalink
AVX512: Store (MOVNTPD, MOVNTPS, MOVNTDQ) using non-temporal hint int…
Browse files Browse the repository at this point in the history
…rinsic implementation.

Differential Revision: http://reviews.llvm.org/D16350

llvm-svn: 258309
  • Loading branch information
Igor Breger committed Jan 20, 2016
1 parent f7696f8 commit d3341f5
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 38 deletions.
12 changes: 12 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsX86.td
Expand Up @@ -2030,6 +2030,18 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[IntrReadWriteArgMem]>;
}

// Store ops using non-temporal hint
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_storent_q_512 :
GCCBuiltin<"__builtin_ia32_movntdq512">,
Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty], [IntrReadWriteArgMem]>;
def int_x86_avx512_storent_pd_512 :
GCCBuiltin<"__builtin_ia32_movntpd512">,
Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty], [IntrReadWriteArgMem]>;
def int_x86_avx512_storent_ps_512 :
GCCBuiltin<"__builtin_ia32_movntps512">,
Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty], [IntrReadWriteArgMem]>;
}
//===----------------------------------------------------------------------===//
// AVX2

Expand Down
17 changes: 16 additions & 1 deletion llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -4229,10 +4229,11 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
break;
}
case STOREA:
case STOREANT:
case STOREU: {
Info.ptrVal = I.getArgOperand(0);
Info.memVT = MVT::getVT(I.getArgOperand(1)->getType());
Info.align = (IntrData->Type == STOREA ? Info.memVT.getSizeInBits()/8 : 1);
Info.align = (IntrData->Type == STOREU ? 1 : Info.memVT.getSizeInBits()/8);
Info.writeMem = true;
break;
}
Expand Down Expand Up @@ -17739,6 +17740,20 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getMaskedStore(Chain, dl, Data, Addr, VMask, VT,
MemIntr->getMemOperand(), false);
}
case STOREANT: {
// Store (MOVNTPD, MOVNTPS, MOVNTDQ) using non-temporal hint intrinsic implementation.
SDValue Data = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);

MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
assert(MemIntr && "Expected MemIntrinsicSDNode!");
MachineMemOperand *MMO = MemIntr->getMemOperand();

MMO->setFlags(MachineMemOperand::MONonTemporal);

return DAG.getStore(Chain, dl, Data, Addr, MMO);
}
}
}

Expand Down
51 changes: 16 additions & 35 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Expand Up @@ -3194,50 +3194,31 @@ let SchedRW = [WriteLoad] in {
}
}

multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
ValueType OpVT, RegisterClass RC, X86MemOperand memop,
Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
PatFrag st_frag = alignednontemporalstore,
InstrItinClass itin = IIC_SSE_MOVNT> {
let SchedRW = [WriteStore], mayStore = 1,
AddedComplexity = 400 in
def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
[(st_frag (_.VT _.RC:$src), addr:$dst)],
_.ExeDomain, itin>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
}

multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
string elty, string elsz, string vsz512,
string vsz256, string vsz128, Domain d,
Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
let Predicates = [prd] in
defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
!cast<ValueType>("v"##vsz512##elty##elsz), VR512,
!cast<X86MemOperand>(elty##"512mem"), d, itin>,
EVEX_V512;

let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
!cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
!cast<X86MemOperand>(elty##"256mem"), d, itin>,
EVEX_V256;
multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo> {
let Predicates = [HasAVX512] in
defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;

defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
!cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
!cast<X86MemOperand>(elty##"128mem"), d, itin>,
EVEX_V128;
let Predicates = [HasAVX512, HasVLX] in {
defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
}
}

defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
"i", "64", "8", "4", "2", SSEPackedInt,
HasAVX512>, PD, EVEX_CD8<64, CD8VF>;

defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
"f", "64", "8", "4", "2", SSEPackedDouble,
HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;

defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
"f", "32", "16", "8", "4", SSEPackedSingle,
HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD;
defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W;
defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS;

//===----------------------------------------------------------------------===//
// AVX-512 - Integer arithmetic
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/X86/X86IntrinsicsInfo.h
Expand Up @@ -29,7 +29,7 @@ enum IntrinsicType {
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
EXPAND_FROM_MEM, LOADA, LOADU, STOREA, STOREU, BLEND, INSERT_SUBVEC,
EXPAND_FROM_MEM, LOADA, LOADU, STOREA, STOREU, STOREANT, BLEND, INSERT_SUBVEC,
TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
};
Expand Down Expand Up @@ -260,7 +260,9 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_scattersiv4_si, SCATTER, X86::VPSCATTERDDZ128mr, 0),
X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, X86::VSCATTERDPSZ256mr, 0),
X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, X86::VPSCATTERDDZ256mr, 0),

X86_INTRINSIC_DATA(avx512_storent_pd_512, STOREANT, ISD::DELETED_NODE, 0),
X86_INTRINSIC_DATA(avx512_storent_ps_512, STOREANT, ISD::DELETED_NODE, 0),
X86_INTRINSIC_DATA(avx512_storent_q_512, STOREANT, ISD::DELETED_NODE, 0),
X86_INTRINSIC_DATA(rdpmc, RDPMC, X86ISD::RDPMC_DAG, 0),
X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0),
X86_INTRINSIC_DATA(rdrand_32, RDRAND, X86ISD::RDRAND, 0),
Expand Down
32 changes: 32 additions & 0 deletions llvm/test/CodeGen/X86/avx512-intrinsics.ll
Expand Up @@ -7176,3 +7176,35 @@ define <2 x double>@test_int_x86_avx512_maskz_fixupimm_sd(<2 x double> %x0, <2 x
ret <2 x double> %res4
}

declare void @llvm.x86.avx512.storent.q.512(i8*, <8 x i64>)

define void@test_storent_q_512(<8 x i64> %data, i8* %ptr) {
; CHECK-LABEL: test_storent_q_512:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovntdq %zmm0, (%rdi)
; CHECK-NEXT: retq
call void @llvm.x86.avx512.storent.q.512(i8* %ptr, <8 x i64> %data)
ret void
}

declare void @llvm.x86.avx512.storent.pd.512(i8*, <8 x double>)

define void @test_storent_pd_512(<8 x double> %data, i8* %ptr) {
; CHECK-LABEL: test_storent_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovntpd %zmm0, (%rdi)
; CHECK-NEXT: retq
call void @llvm.x86.avx512.storent.pd.512(i8* %ptr, <8 x double> %data)
ret void
}

declare void @llvm.x86.avx512.storent.ps.512(i8*, <16 x float>)

define void @test_storent_ps_512(<16 x float> %data, i8* %ptr) {
; CHECK-LABEL: test_storent_ps_512:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovntps %zmm0, (%rdi)
; CHECK-NEXT: retq
call void @llvm.x86.avx512.storent.ps.512(i8* %ptr, <16 x float> %data)
ret void
}

0 comments on commit d3341f5

Please sign in to comment.