-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV][SDAG] Improve codegen of select with constants if zicond is available #82456
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Yingwei Zheng (dtcxzyw) ChangesThis patch uses
The above code sequence is suggested by RISCV Optimization Guide. Full diff: https://github.com/llvm/llvm-project/pull/82456.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9ab6895aed521e..f0de990779f02a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -7355,6 +7355,26 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if (isNullConstant(TrueV))
return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
+ // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
+ // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
+ if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
+ const APInt &TrueVal = TrueV->getAsAPIntVal();
+ const APInt &FalseVal = FalseV->getAsAPIntVal();
+ const int TrueValCost = RISCVMatInt::getIntMatCost(
+ TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
+ const int FalseValCost = RISCVMatInt::getIntMatCost(
+ FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
+ bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
+ SDValue LHSVal = DAG.getConstant(
+ IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
+ SDValue RHSVal =
+ DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
+ SDValue CMOV =
+ DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
+ DL, VT, LHSVal, CondV);
+ return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
+ }
+
// (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
if (TrueV.getOpcode() == ISD::AND &&
(TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
diff --git a/llvm/test/CodeGen/RISCV/select.ll b/llvm/test/CodeGen/RISCV/select.ll
index 7dd223df5e557e..d3cff41a4f247d 100644
--- a/llvm/test/CodeGen/RISCV/select.ll
+++ b/llvm/test/CodeGen/RISCV/select.ll
@@ -1449,3 +1449,277 @@ entry:
%res = select i1 %cond, i32 %a, i32 %c
ret i32 %res
}
+
+define i32 @select_cst1(i1 zeroext %cond) {
+; RV32IM-LABEL: select_cst1:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: mv a1, a0
+; RV32IM-NEXT: li a0, 10
+; RV32IM-NEXT: bnez a1, .LBB37_2
+; RV32IM-NEXT: # %bb.1:
+; RV32IM-NEXT: li a0, 20
+; RV32IM-NEXT: .LBB37_2:
+; RV32IM-NEXT: ret
+;
+; RV64IM-LABEL: select_cst1:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: mv a1, a0
+; RV64IM-NEXT: li a0, 10
+; RV64IM-NEXT: bnez a1, .LBB37_2
+; RV64IM-NEXT: # %bb.1:
+; RV64IM-NEXT: li a0, 20
+; RV64IM-NEXT: .LBB37_2:
+; RV64IM-NEXT: ret
+;
+; RV64IMXVTCONDOPS-LABEL: select_cst1:
+; RV64IMXVTCONDOPS: # %bb.0:
+; RV64IMXVTCONDOPS-NEXT: li a1, 10
+; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
+; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 10
+; RV64IMXVTCONDOPS-NEXT: ret
+;
+; CHECKZICOND-LABEL: select_cst1:
+; CHECKZICOND: # %bb.0:
+; CHECKZICOND-NEXT: li a1, 10
+; CHECKZICOND-NEXT: czero.nez a0, a1, a0
+; CHECKZICOND-NEXT: addi a0, a0, 10
+; CHECKZICOND-NEXT: ret
+ %ret = select i1 %cond, i32 10, i32 20
+ ret i32 %ret
+}
+
+define i32 @select_cst2(i1 zeroext %cond) {
+; RV32IM-LABEL: select_cst2:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: mv a1, a0
+; RV32IM-NEXT: li a0, 10
+; RV32IM-NEXT: bnez a1, .LBB38_2
+; RV32IM-NEXT: # %bb.1:
+; RV32IM-NEXT: lui a0, 5
+; RV32IM-NEXT: addi a0, a0, -480
+; RV32IM-NEXT: .LBB38_2:
+; RV32IM-NEXT: ret
+;
+; RV64IM-LABEL: select_cst2:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: mv a1, a0
+; RV64IM-NEXT: li a0, 10
+; RV64IM-NEXT: bnez a1, .LBB38_2
+; RV64IM-NEXT: # %bb.1:
+; RV64IM-NEXT: lui a0, 5
+; RV64IM-NEXT: addiw a0, a0, -480
+; RV64IM-NEXT: .LBB38_2:
+; RV64IM-NEXT: ret
+;
+; RV64IMXVTCONDOPS-LABEL: select_cst2:
+; RV64IMXVTCONDOPS: # %bb.0:
+; RV64IMXVTCONDOPS-NEXT: lui a1, 5
+; RV64IMXVTCONDOPS-NEXT: addiw a1, a1, -490
+; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
+; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 10
+; RV64IMXVTCONDOPS-NEXT: ret
+;
+; RV32IMZICOND-LABEL: select_cst2:
+; RV32IMZICOND: # %bb.0:
+; RV32IMZICOND-NEXT: lui a1, 5
+; RV32IMZICOND-NEXT: addi a1, a1, -490
+; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT: addi a0, a0, 10
+; RV32IMZICOND-NEXT: ret
+;
+; RV64IMZICOND-LABEL: select_cst2:
+; RV64IMZICOND: # %bb.0:
+; RV64IMZICOND-NEXT: lui a1, 5
+; RV64IMZICOND-NEXT: addiw a1, a1, -490
+; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT: addi a0, a0, 10
+; RV64IMZICOND-NEXT: ret
+ %ret = select i1 %cond, i32 10, i32 20000
+ ret i32 %ret
+}
+
+define i32 @select_cst3(i1 zeroext %cond) {
+; RV32IM-LABEL: select_cst3:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: bnez a0, .LBB39_2
+; RV32IM-NEXT: # %bb.1:
+; RV32IM-NEXT: lui a0, 5
+; RV32IM-NEXT: addi a0, a0, -480
+; RV32IM-NEXT: ret
+; RV32IM-NEXT: .LBB39_2:
+; RV32IM-NEXT: lui a0, 7
+; RV32IM-NEXT: addi a0, a0, 1328
+; RV32IM-NEXT: ret
+;
+; RV64IM-LABEL: select_cst3:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: bnez a0, .LBB39_2
+; RV64IM-NEXT: # %bb.1:
+; RV64IM-NEXT: lui a0, 5
+; RV64IM-NEXT: addiw a0, a0, -480
+; RV64IM-NEXT: ret
+; RV64IM-NEXT: .LBB39_2:
+; RV64IM-NEXT: lui a0, 7
+; RV64IM-NEXT: addiw a0, a0, 1328
+; RV64IM-NEXT: ret
+;
+; RV64IMXVTCONDOPS-LABEL: select_cst3:
+; RV64IMXVTCONDOPS: # %bb.0:
+; RV64IMXVTCONDOPS-NEXT: lui a1, 1048574
+; RV64IMXVTCONDOPS-NEXT: addiw a1, a1, -1808
+; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
+; RV64IMXVTCONDOPS-NEXT: lui a1, 7
+; RV64IMXVTCONDOPS-NEXT: addiw a1, a1, 1328
+; RV64IMXVTCONDOPS-NEXT: add a0, a0, a1
+; RV64IMXVTCONDOPS-NEXT: ret
+;
+; RV32IMZICOND-LABEL: select_cst3:
+; RV32IMZICOND: # %bb.0:
+; RV32IMZICOND-NEXT: lui a1, 1048574
+; RV32IMZICOND-NEXT: addi a1, a1, -1808
+; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT: lui a1, 7
+; RV32IMZICOND-NEXT: addi a1, a1, 1328
+; RV32IMZICOND-NEXT: add a0, a0, a1
+; RV32IMZICOND-NEXT: ret
+;
+; RV64IMZICOND-LABEL: select_cst3:
+; RV64IMZICOND: # %bb.0:
+; RV64IMZICOND-NEXT: lui a1, 1048574
+; RV64IMZICOND-NEXT: addiw a1, a1, -1808
+; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT: lui a1, 7
+; RV64IMZICOND-NEXT: addiw a1, a1, 1328
+; RV64IMZICOND-NEXT: add a0, a0, a1
+; RV64IMZICOND-NEXT: ret
+ %ret = select i1 %cond, i32 30000, i32 20000
+ ret i32 %ret
+}
+
+define i32 @select_cst4(i1 zeroext %cond) {
+; RV32IM-LABEL: select_cst4:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: mv a1, a0
+; RV32IM-NEXT: li a0, -2048
+; RV32IM-NEXT: bnez a1, .LBB40_2
+; RV32IM-NEXT: # %bb.1:
+; RV32IM-NEXT: li a0, 2047
+; RV32IM-NEXT: .LBB40_2:
+; RV32IM-NEXT: ret
+;
+; RV64IM-LABEL: select_cst4:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: mv a1, a0
+; RV64IM-NEXT: li a0, -2048
+; RV64IM-NEXT: bnez a1, .LBB40_2
+; RV64IM-NEXT: # %bb.1:
+; RV64IM-NEXT: li a0, 2047
+; RV64IM-NEXT: .LBB40_2:
+; RV64IM-NEXT: ret
+;
+; RV64IMXVTCONDOPS-LABEL: select_cst4:
+; RV64IMXVTCONDOPS: # %bb.0:
+; RV64IMXVTCONDOPS-NEXT: lui a1, 1
+; RV64IMXVTCONDOPS-NEXT: addiw a1, a1, -1
+; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
+; RV64IMXVTCONDOPS-NEXT: addi a0, a0, -2048
+; RV64IMXVTCONDOPS-NEXT: ret
+;
+; RV32IMZICOND-LABEL: select_cst4:
+; RV32IMZICOND: # %bb.0:
+; RV32IMZICOND-NEXT: lui a1, 1
+; RV32IMZICOND-NEXT: addi a1, a1, -1
+; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT: addi a0, a0, -2048
+; RV32IMZICOND-NEXT: ret
+;
+; RV64IMZICOND-LABEL: select_cst4:
+; RV64IMZICOND: # %bb.0:
+; RV64IMZICOND-NEXT: lui a1, 1
+; RV64IMZICOND-NEXT: addiw a1, a1, -1
+; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT: addi a0, a0, -2048
+; RV64IMZICOND-NEXT: ret
+ %ret = select i1 %cond, i32 -2048, i32 2047
+ ret i32 %ret
+}
+
+define i32 @select_cst5(i1 zeroext %cond) {
+; RV32IM-LABEL: select_cst5:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: mv a1, a0
+; RV32IM-NEXT: li a0, 2047
+; RV32IM-NEXT: bnez a1, .LBB41_2
+; RV32IM-NEXT: # %bb.1:
+; RV32IM-NEXT: lui a0, 1
+; RV32IM-NEXT: addi a0, a0, -2047
+; RV32IM-NEXT: .LBB41_2:
+; RV32IM-NEXT: ret
+;
+; RV64IM-LABEL: select_cst5:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: mv a1, a0
+; RV64IM-NEXT: li a0, 2047
+; RV64IM-NEXT: bnez a1, .LBB41_2
+; RV64IM-NEXT: # %bb.1:
+; RV64IM-NEXT: lui a0, 1
+; RV64IM-NEXT: addiw a0, a0, -2047
+; RV64IM-NEXT: .LBB41_2:
+; RV64IM-NEXT: ret
+;
+; RV64IMXVTCONDOPS-LABEL: select_cst5:
+; RV64IMXVTCONDOPS: # %bb.0:
+; RV64IMXVTCONDOPS-NEXT: li a1, 2
+; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
+; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 2047
+; RV64IMXVTCONDOPS-NEXT: ret
+;
+; CHECKZICOND-LABEL: select_cst5:
+; CHECKZICOND: # %bb.0:
+; CHECKZICOND-NEXT: li a1, 2
+; CHECKZICOND-NEXT: czero.nez a0, a1, a0
+; CHECKZICOND-NEXT: addi a0, a0, 2047
+; CHECKZICOND-NEXT: ret
+ %ret = select i1 %cond, i32 2047, i32 2049
+ ret i32 %ret
+}
+
+define i32 @select_cst6(i1 zeroext %cond) {
+; RV32IM-LABEL: select_cst6:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: bnez a0, .LBB42_2
+; RV32IM-NEXT: # %bb.1:
+; RV32IM-NEXT: li a0, 2047
+; RV32IM-NEXT: ret
+; RV32IM-NEXT: .LBB42_2:
+; RV32IM-NEXT: lui a0, 1
+; RV32IM-NEXT: addi a0, a0, -2047
+; RV32IM-NEXT: ret
+;
+; RV64IM-LABEL: select_cst6:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: bnez a0, .LBB42_2
+; RV64IM-NEXT: # %bb.1:
+; RV64IM-NEXT: li a0, 2047
+; RV64IM-NEXT: ret
+; RV64IM-NEXT: .LBB42_2:
+; RV64IM-NEXT: lui a0, 1
+; RV64IM-NEXT: addiw a0, a0, -2047
+; RV64IM-NEXT: ret
+;
+; RV64IMXVTCONDOPS-LABEL: select_cst6:
+; RV64IMXVTCONDOPS: # %bb.0:
+; RV64IMXVTCONDOPS-NEXT: li a1, 2
+; RV64IMXVTCONDOPS-NEXT: vt.maskc a0, a1, a0
+; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 2047
+; RV64IMXVTCONDOPS-NEXT: ret
+;
+; CHECKZICOND-LABEL: select_cst6:
+; CHECKZICOND: # %bb.0:
+; CHECKZICOND-NEXT: li a1, 2
+; CHECKZICOND-NEXT: czero.eqz a0, a1, a0
+; CHECKZICOND-NEXT: addi a0, a0, 2047
+; CHECKZICOND-NEXT: ret
+ %ret = select i1 %cond, i32 2049, i32 2047
+ ret i32 %ret
+}
|
This patch lowers select of constants if `TrueV == ~FalseV`. Address the comment in #82456 (comment).
2cb0080
to
929308f
Compare
Rebased on the top of #82462. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This patch uses
add + czero.eqz/nez
to lower select with constants if zicond is available.The above code sequence is suggested by RISCV Optimization Guide.