diff --git a/llvm/lib/Target/Sparc/Sparc.td b/llvm/lib/Target/Sparc/Sparc.td index 7137e5fbff4ff..38b0508885069 100644 --- a/llvm/lib/Target/Sparc/Sparc.td +++ b/llvm/lib/Target/Sparc/Sparc.td @@ -95,6 +95,9 @@ def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true", def TuneSlowRDPC : SubtargetFeature<"slow-rdpc", "HasSlowRDPC", "true", "rd %pc, %XX is slow", [FeatureV9]>; +def TuneNoPredictor : SubtargetFeature<"no-predictor", "HasNoPredictor", "true", + "Processor has no branch predictor, branches stall execution", []>; + //==== Features added predmoninantly for LEON subtarget support include "LeonFeatures.td" @@ -174,12 +177,15 @@ def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated, FeatureVIS, FeatureVIS2], [TuneSlowRDPC]>; def : Proc<"niagara", [FeatureV9, FeatureV8Deprecated, FeatureVIS, - FeatureVIS2, FeatureUA2005]>; + FeatureVIS2, FeatureUA2005], + [TuneNoPredictor]>; def : Proc<"niagara2", [FeatureV9, FeatureV8Deprecated, UsePopc, - FeatureVIS, FeatureVIS2, FeatureUA2005]>; + FeatureVIS, FeatureVIS2, FeatureUA2005], + [TuneNoPredictor]>; def : Proc<"niagara3", [FeatureV9, FeatureV8Deprecated, UsePopc, FeatureVIS, FeatureVIS2, FeatureVIS3, - FeatureUA2005, FeatureUA2007]>; + FeatureUA2005, FeatureUA2007], + [TuneNoPredictor]>; def : Proc<"niagara4", [FeatureV9, FeatureV8Deprecated, UsePopc, FeatureVIS, FeatureVIS2, FeatureVIS3, FeatureUA2005, FeatureUA2007, FeatureOSA2011, diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index cbb7db68f7e7c..ae3c32687c207 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -2000,6 +2000,14 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM, setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + // Some processors have no branch predictor and have pipelines longer than + // what can be covered by the delay slot. This results in a stall, so mark + // branches to be expensive on those processors. + setJumpIsExpensive(Subtarget->hasNoPredictor()); + // The high cost of branching means that using conditional moves will + // still be profitable even if the condition is predictable. + PredictableSelectIsExpensive = !isJumpExpensive(); + setMinFunctionAlignment(Align(4)); computeRegisterProperties(Subtarget->getRegisterInfo()); diff --git a/llvm/test/CodeGen/SPARC/predictable-select.ll b/llvm/test/CodeGen/SPARC/predictable-select.ll new file mode 100644 index 0000000000000..cf200a121d0f1 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/predictable-select.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparc -mcpu=v9 | FileCheck --check-prefix=SPARC %s +; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparcv9 | FileCheck --check-prefix=SPARC64 %s +; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparc -mcpu=v9 -mattr=+no-predictor | FileCheck --check-prefix=SPARC-NO-PREDICTOR %s +; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparcv9 -mattr=+no-predictor | FileCheck --check-prefix=SPARC64-NO-PREDICTOR %s + +;; Normally, highly predictable selects should be turned into branches. +;; On the other hand, early Niagara processors should prefer conditional moves +;; over branches even when it's predictable. + +define i32 @cdiv(i32 %cond, i32 %num) #0 { +; SPARC-LABEL: cdiv: +; SPARC: ! %bb.0: ! %entry +; SPARC-NEXT: cmp %o0, 0 +; SPARC-NEXT: be %icc, .LBB0_2 +; SPARC-NEXT: mov %o1, %o0 +; SPARC-NEXT: ! %bb.1: ! %select.end +; SPARC-NEXT: retl +; SPARC-NEXT: nop +; SPARC-NEXT: .LBB0_2: ! %select.true.sink +; SPARC-NEXT: sethi 1398101, %o1 +; SPARC-NEXT: or %o1, 342, %o1 +; SPARC-NEXT: smul %o0, %o1, %o0 +; SPARC-NEXT: rd %y, %o0 +; SPARC-NEXT: srl %o0, 31, %o1 +; SPARC-NEXT: retl +; SPARC-NEXT: add %o0, %o1, %o0 +; +; SPARC64-LABEL: cdiv: +; SPARC64: ! %bb.0: ! %entry +; SPARC64-NEXT: cmp %o0, 0 +; SPARC64-NEXT: be %icc, .LBB0_2 +; SPARC64-NEXT: mov %o1, %o0 +; SPARC64-NEXT: ! %bb.1: ! %select.end +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop +; SPARC64-NEXT: .LBB0_2: ! %select.true.sink +; SPARC64-NEXT: sra %o0, 0, %o0 +; SPARC64-NEXT: sethi 1398101, %o1 +; SPARC64-NEXT: or %o1, 342, %o1 +; SPARC64-NEXT: mulx %o0, %o1, %o0 +; SPARC64-NEXT: srlx %o0, 63, %o1 +; SPARC64-NEXT: srlx %o0, 32, %o0 +; SPARC64-NEXT: retl +; SPARC64-NEXT: add %o0, %o1, %o0 +; +; SPARC-NO-PREDICTOR-LABEL: cdiv: +; SPARC-NO-PREDICTOR: ! %bb.0: ! %entry +; SPARC-NO-PREDICTOR-NEXT: sethi 1398101, %o2 +; SPARC-NO-PREDICTOR-NEXT: or %o2, 342, %o2 +; SPARC-NO-PREDICTOR-NEXT: smul %o1, %o2, %o2 +; SPARC-NO-PREDICTOR-NEXT: rd %y, %o2 +; SPARC-NO-PREDICTOR-NEXT: srl %o2, 31, %o3 +; SPARC-NO-PREDICTOR-NEXT: add %o2, %o3, %o2 +; SPARC-NO-PREDICTOR-NEXT: cmp %o0, 0 +; SPARC-NO-PREDICTOR-NEXT: move %icc, %o2, %o1 +; SPARC-NO-PREDICTOR-NEXT: retl +; SPARC-NO-PREDICTOR-NEXT: mov %o1, %o0 +; +; SPARC64-NO-PREDICTOR-LABEL: cdiv: +; SPARC64-NO-PREDICTOR: ! %bb.0: ! %entry +; SPARC64-NO-PREDICTOR-NEXT: sra %o1, 0, %o2 +; SPARC64-NO-PREDICTOR-NEXT: sethi 1398101, %o3 +; SPARC64-NO-PREDICTOR-NEXT: or %o3, 342, %o3 +; SPARC64-NO-PREDICTOR-NEXT: mulx %o2, %o3, %o2 +; SPARC64-NO-PREDICTOR-NEXT: srlx %o2, 63, %o3 +; SPARC64-NO-PREDICTOR-NEXT: srlx %o2, 32, %o2 +; SPARC64-NO-PREDICTOR-NEXT: add %o2, %o3, %o2 +; SPARC64-NO-PREDICTOR-NEXT: cmp %o0, 0 +; SPARC64-NO-PREDICTOR-NEXT: move %icc, %o2, %o1 +; SPARC64-NO-PREDICTOR-NEXT: retl +; SPARC64-NO-PREDICTOR-NEXT: mov %o1, %o0 +entry: + %div = sdiv i32 %num, 3 + %cmp = icmp eq i32 %cond, 0 + %ret = select i1 %cmp, i32 %div, i32 %num + ret i32 %ret +} + +attributes #0 = { nounwind }