diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index d12b802fe234f..f0d378b66883f 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -102,6 +102,53 @@ bool ARMTTIImpl::areInlineCompatible(const Function *Caller, // the callers'. bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) == (CalleeBits & InlineFeaturesAllowed); + + LLVM_DEBUG({ + dbgs() << "=== Inline compatibility debug ===\n"; + dbgs() << "Caller: " << Caller->getName() << "\n"; + dbgs() << "Callee: " << Callee->getName() << "\n"; + + // Bit diffs + FeatureBitset MissingInCaller = CalleeBits & ~CallerBits; // callee-only + FeatureBitset ExtraInCaller = CallerBits & ~CalleeBits; // caller-only + + // Counts + dbgs() << "Only-in-caller bit count: " << ExtraInCaller.count() << "\n"; + dbgs() << "Only-in-callee bit count: " << MissingInCaller.count() << "\n"; + + dbgs() << "Only-in-caller feature indices ["; + { + bool First = true; + for (size_t I = 0, E = ExtraInCaller.size(); I < E; ++I) { + if (ExtraInCaller.test(I)) { + if (!First) + dbgs() << ", "; + dbgs() << I; + First = false; + } + } + } + dbgs() << "]\n"; + + dbgs() << "Only-in-callee feature indices ["; + { + bool First = true; + for (size_t I = 0, E = MissingInCaller.size(); I < E; ++I) { + if (MissingInCaller.test(I)) { + if (!First) + dbgs() << ", "; + dbgs() << I; + First = false; + } + } + } + dbgs() << "]\n"; + + // Indicies map to features as found in + // llvm-project/(your_build)/lib/Target/ARM/ARMGenSubtargetInfo.inc + dbgs() << "MatchExact=" << (MatchExact ? "true" : "false") + << " MatchSubset=" << (MatchSubset ? "true" : "false") << "\n"; + }); return MatchExact && MatchSubset; } diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 919a6fc9fd0b0..b4b828b9ef79b 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -40,13 +40,13 @@ class Type; class Value; namespace TailPredication { - enum Mode { - Disabled = 0, - EnabledNoReductions, - Enabled, - ForceEnabledNoReductions, - ForceEnabled - }; +enum Mode { + Disabled = 0, + EnabledNoReductions, + Enabled, + ForceEnabledNoReductions, + ForceEnabled +}; } // For controlling conversion of memcpy into Tail Predicated loop. @@ -64,37 +64,109 @@ class ARMTTIImpl final : public BasicTTIImplBase { const ARMTargetLowering *TLI; // Currently the following features are excluded from InlineFeaturesAllowed. - // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32 + // ModeThumb, FeatureNoARM, ModeSoftFloat. // Depending on whether they are set or unset, different // instructions/registers are available. For example, inlining a callee with // -thumb-mode in a caller with +thumb-mode, may cause the assembler to // fail if the callee uses ARM only instructions, e.g. in inline asm. const FeatureBitset InlineFeaturesAllowed = { - ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2, - ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8, - ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb, - ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex, - ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc, - ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt, - ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS, - ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing, - ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32, - ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR, - ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits, - ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg, - ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx, - ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs, - ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign, - ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx, - ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb, - ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR, - ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack, - ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP, - ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass, - ARM::FeatureAClass, ARM::FeatureStrictAlign, ARM::FeatureLongCalls, - ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt, - ARM::FeatureNoNegativeImmediates - }; + ARM::FeatureD32, + ARM::FeatureFPRegs64, + ARM::FeatureFPRegs16, + ARM::FeatureFPRegs, + ARM::FeatureAES, + ARM::FeatureVFP2_SP, + ARM::FeatureSHA2, + ARM::HasV5TEOps, + ARM::HasV6Ops, + ARM::HasV6KOps, + ARM::HasV6T2Ops, + ARM::HasV7Ops, + ARM::HasV5TOps, + ARM::HasV6MOps, + ARM::HasV8MBaselineOps, + ARM::HasV8MMainlineOps, + ARM::HasV8_1aOps, + ARM::HasV8_2aOps, + ARM::HasV8_3aOps, + ARM::HasV8_4aOps, + ARM::HasV8_5aOps, + ARM::HasV8_6aOps, + ARM::HasV8_7aOps, + ARM::HasV8_8aOps, + ARM::HasV8_9aOps, + ARM::HasV9_0aOps, + ARM::HasV9_1aOps, + ARM::HasV9_2aOps, + ARM::HasV9_3aOps, + ARM::HasV9_4aOps, + ARM::HasV9_5aOps, + ARM::HasV9_6aOps, + ARM::HasV9_7aOps, + ARM::HasV8_1MMainlineOps, + ARM::FeatureDotProd, + ARM::HasV8Ops, + ARM::FeatureSB, + ARM::FeatureBF16, + ARM::FeatureVFP2, + ARM::FeatureVFP3, + ARM::FeatureNEON, + ARM::FeatureThumb2, + ARM::FeatureFP16, + ARM::FeatureVFP4, + ARM::FeatureFPARMv8, + ARM::FeatureFullFP16, + ARM::FeatureFP16FML, + ARM::FeatureHWDivThumb, + ARM::FeatureHWDivARM, + ARM::FeatureDB, + ARM::FeatureV7Clrex, + ARM::FeatureAcquireRelease, + ARM::FeatureSlowFPBrcc, + ARM::FeaturePerfMon, + ARM::FeatureTrustZone, + ARM::Feature8MSecExt, + ARM::FeatureCrypto, + ARM::FeatureCRC, + ARM::FeatureRAS, + ARM::FeatureFPAO, + ARM::FeatureFuseAES, + ARM::FeatureZCZeroing, + ARM::FeatureProfUnpredicate, + ARM::FeatureSlowVGETLNi32, + ARM::FeatureSlowVDUP32, + ARM::FeaturePreferVMOVSR, + ARM::FeaturePrefISHSTBarrier, + ARM::FeatureMuxedUnits, + ARM::FeatureSlowOddRegister, + ARM::FeatureSlowLoadDSubreg, + ARM::FeatureDontWidenVMOVS, + ARM::FeatureExpandMLx, + ARM::FeatureHasVMLxHazards, + ARM::FeatureNEONForFPMovs, + ARM::FeatureNEONForFP, + ARM::FeatureCheckVLDnAlign, + ARM::FeatureHasSlowFPVMLx, + ARM::FeatureHasSlowFPVFMx, + ARM::FeatureVMLxForwarding, + ARM::FeaturePref32BitThumb, + ARM::FeatureAvoidPartialCPSR, + ARM::FeatureCheapPredicableCPSR, + ARM::FeatureAvoidMOVsShOp, + ARM::FeatureHasRetAddrStack, + ARM::FeatureHasNoBranchPredictor, + ARM::FeatureDSP, + ARM::FeatureMP, + ARM::FeatureVirtualization, + ARM::FeatureMClass, + ARM::FeatureRClass, + ARM::FeatureAClass, + ARM::FeatureStrictAlign, + ARM::FeatureLongCalls, + ARM::FeatureExecuteOnly, + ARM::FeatureReserveR9, + ARM::FeatureNoMovt, + ARM::FeatureNoNegativeImmediates}; const ARMSubtarget *getST() const { return ST; } const ARMTargetLowering *getTLI() const { return TLI; } diff --git a/llvm/test/Transforms/Inline/ARM/inline-dotprod.ll b/llvm/test/Transforms/Inline/ARM/inline-dotprod.ll new file mode 100644 index 0000000000000..2f8dbb7f01822 --- /dev/null +++ b/llvm/test/Transforms/Inline/ARM/inline-dotprod.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -passes=inline | FileCheck %s +; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s + +declare i32 @foo(...) #0 + +define i32 @callee() #0 { +entry: + %call = call i32 (...) @foo() + ret i32 %call +} + +define i32 @dotcallee() #1 { +entry: + %call = call i32 (...) @foo() + ret i32 %call +} + +define i32 @dotcaller() #1 { +entry: + %call = call i32 @callee() + ret i32 %call +; CHECK-LABEL: dotcaller +; CHECK: call i32 (...) @foo() +} + +define i32 @caller() #0 { +entry: + %call = call i32 @dotcallee() + ret i32 %call +; CHECK-LABEL: caller +; CHECK: call i32 @dotcallee() +} + +attributes #0 = { "target-cpu"="generic" "target-features"="+dsp,+neon" } +attributes #1 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+dotprod" }