diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 353e96856b8f3..bee9ec4c71323 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -212,6 +212,30 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, return (CallerBits & CalleeBits) == CalleeBits; } +bool AArch64TTIImpl::areTypesABICompatible( + const Function *Caller, const Function *Callee, + const ArrayRef &Types) const { + if (!BaseT::areTypesABICompatible(Caller, Callee, Types)) + return false; + + // We need to ensure that argument promotion does not attempt to promote + // pointers to fixed-length vector types larger than 128 bits like + // <8 x float> (and pointers to aggregate types which have such fixed-length + // vector type members) into the values of the pointees. Such vector types + // are used for SVE VLS but there is no ABI for SVE VLS arguments and the + // backend cannot lower such value arguments. The 128-bit fixed-length SVE + // types can be safely treated as 128-bit NEON types and they cannot be + // distinguished in IR. + if (ST->useSVEForFixedLengthVectors() && llvm::any_of(Types, [](Type *Ty) { + auto FVTy = dyn_cast(Ty); + return FVTy && + FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128; + })) + return false; + + return true; +} + bool AArch64TTIImpl::shouldMaximizeVectorBandwidth( TargetTransformInfo::RegisterKind K) const { assert(K != TargetTransformInfo::RGK_Scalar); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 787cb3c5d34b5..d1977a62a76d3 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -77,6 +77,9 @@ class AArch64TTIImpl : public BasicTTIImplBase { bool areInlineCompatible(const Function *Caller, const Function *Callee) const; + bool areTypesABICompatible(const Function *Caller, const Function *Callee, + const ArrayRef &Types) const; + /// \name Scalar TTI Implementations /// @{ diff --git a/llvm/test/CodeGen/AArch64/arg_promotion.ll b/llvm/test/CodeGen/AArch64/arg_promotion.ll new file mode 100644 index 0000000000000..cc37d230c6cbe --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arg_promotion.ll @@ -0,0 +1,190 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py + +; RUN: opt -S -passes=argpromotion -mtriple=aarch64-unknwon-linux-gnu < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; Don't promote a vector pointer argument when the pointee type size is greater +; than 128 bits. + +define dso_local void @caller_8xi32(ptr noalias %src, ptr noalias %dst) #0 { +; CHECK-LABEL: define dso_local void @caller_8xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: call fastcc void @callee_8xi32(ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) +; CHECK-NEXT: ret void +; +entry: + call fastcc void @callee_8xi32(ptr noalias %src, ptr noalias %dst) + ret void +} + +define internal fastcc void @callee_8xi32(ptr noalias %src, ptr noalias %dst) #0 { +; CHECK-LABEL: define internal fastcc void @callee_8xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[SRC:%.*]], align 16 +; CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[DST:%.*]], align 16 +; CHECK-NEXT: ret void +; +entry: + %0 = load <8 x i32>, ptr %src, align 16 + store <8 x i32> %0, ptr %dst, align 16 + ret void +} + +; Promote a vector pointer argument when the pointee type size is 128 bits or +; less. + +define dso_local void @caller_4xi32(ptr noalias %src, ptr noalias %dst) #1 { +; CHECK-LABEL: define dso_local void @caller_4xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SRC_VAL:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 16 +; CHECK-NEXT: call fastcc void @callee_4xi32(<4 x i32> [[SRC_VAL]], ptr noalias [[DST:%.*]]) +; CHECK-NEXT: ret void +; +entry: + call fastcc void @callee_4xi32(ptr noalias %src, ptr noalias %dst) + ret void +} + +define internal fastcc void @callee_4xi32(ptr noalias %src, ptr noalias %dst) #1 { +; CHECK-LABEL: define internal fastcc void @callee_4xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x i32> [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16 +; CHECK-NEXT: ret void +; +entry: + %0 = load <4 x i32>, ptr %src, align 16 + store <4 x i32> %0, ptr %dst, align 16 + ret void +} + +; A scalar pointer argument is promoted even when the pointee type size is +; greater than 128 bits. + +define dso_local void @caller_i256(ptr noalias %src, ptr noalias %dst) #0 { +; CHECK-LABEL: define dso_local void @caller_i256( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SRC_VAL:%.*]] = load i256, ptr [[SRC:%.*]], align 16 +; CHECK-NEXT: call fastcc void @callee_i256(i256 [[SRC_VAL]], ptr noalias [[DST:%.*]]) +; CHECK-NEXT: ret void +; +entry: + call fastcc void @callee_i256(ptr noalias %src, ptr noalias %dst) + ret void +} + +define internal fastcc void @callee_i256(ptr noalias %src, ptr noalias %dst) #0 { +; CHECK-LABEL: define internal fastcc void @callee_i256( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i256 [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16 +; CHECK-NEXT: ret void +; +entry: + %0 = load i256, ptr %src, align 16 + store i256 %0, ptr %dst, align 16 + ret void +} + +; A scalable vector pointer argument is not a target of ArgumentPromotionPass. + +define dso_local void @caller_nx4xi32(ptr noalias %src, ptr noalias %dst) #2 { +; CHECK-LABEL: define dso_local void @caller_nx4xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: call fastcc void @callee_nx4xi32(ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) +; CHECK-NEXT: ret void +; +entry: + call fastcc void @callee_nx4xi32(ptr noalias %src, ptr noalias %dst) + ret void +} + +define internal fastcc void @callee_nx4xi32(ptr noalias %src, ptr noalias %dst) #2 { +; CHECK-LABEL: define internal fastcc void @callee_nx4xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[SRC:%.*]], align 16 +; CHECK-NEXT: store [[TMP0]], ptr [[DST:%.*]], align 16 +; CHECK-NEXT: ret void +; +entry: + %0 = load , ptr %src, align 16 + store %0, ptr %dst, align 16 + ret void +} + +; Don't promote a structure pointer argument when the pointee vector member +; type size is greater than 128 bits. + +%struct_8xi32 = type { <8 x i32>, <8 x i32> } + +define dso_local void @caller_struct8xi32(ptr noalias %src, ptr noalias %dst) #0 { +; CHECK-LABEL: define dso_local void @caller_struct8xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: call fastcc void @callee_struct8xi32(ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) +; CHECK-NEXT: ret void +; +entry: + call fastcc void @callee_struct8xi32(ptr noalias %src, ptr noalias %dst) + ret void +} + +define internal fastcc void @callee_struct8xi32(ptr noalias %src, ptr noalias %dst) #0 { +; CHECK-LABEL: define internal fastcc void @callee_struct8xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[SRC:%.*]], align 16 +; CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[DST:%.*]], align 16 +; CHECK-NEXT: [[SRC2:%.*]] = getelementptr inbounds [[STRUCT_8XI32:%.*]], ptr [[SRC]], i64 0, i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[SRC2]], align 16 +; CHECK-NEXT: [[DST2:%.*]] = getelementptr inbounds [[STRUCT_8XI32]], ptr [[DST]], i64 0, i32 1 +; CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[DST2]], align 16 +; CHECK-NEXT: ret void +; +entry: + %0 = load <8 x i32>, ptr %src, align 16 + store <8 x i32> %0, ptr %dst, align 16 + %src2 = getelementptr inbounds %struct_8xi32, ptr %src, i64 0, i32 1 + %1 = load <8 x i32>, ptr %src2, align 16 + %dst2 = getelementptr inbounds %struct_8xi32, ptr %dst, i64 0, i32 1 + store <8 x i32> %1, ptr %dst2, align 16 + ret void +} + +; Promote a structure pointer argument when the pointee vector member type size +; is 128 bits or less. + +%struct_4xi32 = type { <4 x i32>, <4 x i32> } + +define dso_local void @caller_struct4xi32(ptr noalias %src, ptr noalias %dst) #1 { +; CHECK-LABEL: define dso_local void @caller_struct4xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SRC_VAL:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[SRC]], i64 16 +; CHECK-NEXT: [[SRC_VAL1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 16 +; CHECK-NEXT: call fastcc void @callee_struct4xi32(<4 x i32> [[SRC_VAL]], <4 x i32> [[SRC_VAL1]], ptr noalias [[DST:%.*]]) +; CHECK-NEXT: ret void +; +entry: + call fastcc void @callee_struct4xi32(ptr noalias %src, ptr noalias %dst) + ret void +} + +define internal fastcc void @callee_struct4xi32(ptr noalias %src, ptr noalias %dst) #1 { +; CHECK-LABEL: define internal fastcc void @callee_struct4xi32( +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x i32> [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16 +; CHECK-NEXT: [[DST2:%.*]] = getelementptr inbounds [[STRUCT_4XI32:%.*]], ptr [[DST]], i64 0, i32 1 +; CHECK-NEXT: store <4 x i32> [[SRC_16_VAL:%.*]], ptr [[DST2]], align 16 +; CHECK-NEXT: ret void +; +entry: + %0 = load <4 x i32>, ptr %src, align 16 + store <4 x i32> %0, ptr %dst, align 16 + %src2 = getelementptr inbounds %struct_4xi32, ptr %src, i64 0, i32 1 + %1 = load <4 x i32>, ptr %src2, align 16 + %dst2 = getelementptr inbounds %struct_4xi32, ptr %dst, i64 0, i32 1 + store <4 x i32> %1, ptr %dst2, align 16 + ret void +} + +attributes #0 = { noinline vscale_range(2,2) "target-features"="+v8.2a,+neon,+sve" } +attributes #1 = { noinline vscale_range(1,1) "target-features"="+v8.2a,+neon,+sve" } +attributes #2 = { noinline "target-features"="+v8.2a,+neon,+sve" }