Skip to content

Commit

Permalink
[AArch64][SVE] Teach cost model masked gathers/scatters are cheap
Browse files Browse the repository at this point in the history
Tell the cost model to use the scalable calculation for non-neon fixed vector.
This results in a cheaper cost for fixed-length SVE masked gathers/scatters
allowing the vectorizor to emit them more frequently.
  • Loading branch information
MDevereau committed Aug 26, 2021
1 parent bd7ece4 commit 9b830c7
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 18 deletions.
3 changes: 1 addition & 2 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Expand Up @@ -1569,8 +1569,7 @@ AArch64TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {

if (!isa<ScalableVectorType>(DataTy))
if (useNeonVector(DataTy))
return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);
auto *VT = cast<VectorType>(DataTy);
Expand Down
79 changes: 63 additions & 16 deletions llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
Expand Up @@ -92,8 +92,8 @@ define <4 x i8> @gather_load_4xi8_constant_mask(<4 x i8*> %ptrs) {
; CHECK: gather_load_4xi8_constant_mask
; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
; CHECK-SVE-256: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
; CHECK-SVE-512: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
;
%lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
ret <4 x i8> %lv
Expand All @@ -103,8 +103,8 @@ define <4 x i8> @gather_load_4xi8_variable_mask(<4 x i8*> %ptrs, <4 x i1> %cond)
; CHECK: gather_load_4xi8_variable_mask
; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
; CHECK-SVE-256: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
; CHECK-SVE-512: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
;
%lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef)
ret <4 x i8> %lv
Expand All @@ -115,8 +115,8 @@ define void @scatter_store_4xi8_constant_mask(<4 x i8> %val, <4 x i8*> %ptrs) {
; CHECK: scatter_store_4xi8_constant_mask
; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
; CHECK-SVE-256: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
; CHECK-SVE-512: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
;
call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %val, <4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
ret void
Expand All @@ -126,8 +126,8 @@ define void @scatter_store_4xi8_variable_mask(<4 x i8> %val, <4 x i8*> %ptrs, <4
; CHECK: scatter_store_4xi8_variable_mask
; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
; CHECK-SVE-256: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
; CHECK-SVE-512: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
;
call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %val, <4 x i8*> %ptrs, i32 1, <4 x i1> %cond)
ret void
Expand All @@ -138,8 +138,8 @@ define <4 x i32> @gather_load_4xi32_constant_mask(<4 x i32*> %ptrs) {
; CHECK: gather_load_4xi32_constant_mask
; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
; CHECK-SVE-256: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
; CHECK-SVE-512: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
;
%lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %lv
Expand All @@ -149,8 +149,8 @@ define <4 x i32> @gather_load_4xi32_variable_mask(<4 x i32*> %ptrs, <4 x i1> %co
; CHECK: gather_load_4xi32_variable_mask
; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
; CHECK-SVE-256: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
; CHECK-SVE-512: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
;
%lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef)
ret <4 x i32> %lv
Expand All @@ -161,8 +161,8 @@ define void @scatter_store_4xi32_constant_mask(<4 x i32> %val, <4 x i32*> %ptrs)
; CHECK: scatter_store_4xi32_constant_mask
; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
; CHECK-SVE-256: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
; CHECK-SVE-512: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
;
call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
ret void
Expand All @@ -172,9 +172,56 @@ define void @scatter_store_4xi32_variable_mask(<4 x i32> %val, <4 x i32*> %ptrs,
; CHECK: scatter_store_4xi32_variable_mask
; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
; CHECK-SVE-256: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
; CHECK-SVE-512: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
; CHECK-SVE-256: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
; CHECK-SVE-512: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
;
call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %ptrs, i32 1, <4 x i1> %cond)
ret void
}

declare <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*>, i32, <256 x i1>, <256 x i16>)
define void @sve_gather_vls(<256 x i1> %v256i1mask) {
; CHECK-LABEL: 'sve_scatter_vls'
; CHECK-NEON: Cost Model: Found an estimated cost of 1952 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
; CHECK-SVE-128: Cost Model: Found an estimated cost of 1952 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
; CHECK-SVE-256: Cost Model: Found an estimated cost of 256 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
; CHECK-SVE-512: Cost Model: Found an estimated cost of 256 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
entry:
%res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0i16(<256 x i16*> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
ret void
}

declare <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*>, i32, <256 x i1>, <256 x float>)
define void @sve_gather_vls_float(<256 x i1> %v256i1mask) {
; CHECK-LABEL: 'sve_gather_vls_float'
; CHECK-NEON: Cost Model: Found an estimated cost of 1856 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
; CHECK-SVE-128: Cost Model: Found an estimated cost of 1856 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
; CHECK-SVE-256: Cost Model: Found an estimated cost of 256 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
; CHECK-SVE-512: Cost Model: Found an estimated cost of 256 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
entry:
%res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0f32(<256 x float*> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
ret void
}

declare void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8>, <256 x i8*>, i32, <256 x i1>)
define void @sve_scatter_vls(<256 x i1> %v256i1mask){
; CHECK-LABEL: 'sve_scatter_vls'
; CHECK-NEON: Cost Model: Found an estimated cost of 2000 for instruction: call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask)
; CHECK-SVE-128: Cost Model: Found an estimated cost of 2000 for instruction: call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask)
; CHECK-SVE-256: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask)
; CHECK-SVE-512: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask)
entry:
call void @llvm.masked.scatter.v256i8.v256p0i8(<256 x i8> undef, <256 x i8*> undef, i32 0, <256 x i1> %v256i1mask)
ret void
}

declare void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half>, <512 x half*>, i32, <512 x i1>)
define void @sve_scatter_vls_float(<512 x i1> %v512i1mask){
; CHECK-LABEL: 'sve_scatter_vls_float'
; CHECK-NEON: Cost Model: Found an estimated cost of 3904 for instruction: call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask)
; CHECK-SVE-128: Cost Model: Found an estimated cost of 3904 for instruction: call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask)
; CHECK-SVE-256: Cost Model: Found an estimated cost of 512 for instruction: call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask)
; CHECK-SVE-512: Cost Model: Found an estimated cost of 512 for instruction: call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask)
call void @llvm.masked.scatter.v512f16.v512p0f16(<512 x half> undef, <512 x half*> undef, i32 0, <512 x i1> %v512i1mask)
ret void
}

0 comments on commit 9b830c7

Please sign in to comment.