diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 91ae7b0b28ba7..a6ba6e518899f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5442,6 +5442,24 @@ SDValue DAGCombiner::visitREM(SDNode *N) { if (SDValue DivRem = useDivRem(N)) return DivRem.getValue(1); + // fold urem(urem(A, BCst), Op1Cst) -> urem(A, Op1Cst) + // iff urem(BCst, Op1Cst) == 0 + SDValue A; + APInt Op1Cst, BCst; + if (sd_match(N, m_URem(m_URem(m_Value(A), m_ConstInt(BCst)), + m_ConstInt(Op1Cst))) && + BCst.urem(Op1Cst).isZero()) { + return DAG.getNode(ISD::UREM, DL, VT, A, DAG.getConstant(Op1Cst, DL, VT)); + } + + // fold srem(srem(A, BCst), Op1Cst) -> srem(A, Op1Cst) + // iff srem(BCst, Op1Cst) == 0 && Op1Cst != 1 + if (sd_match(N, m_SRem(m_SRem(m_Value(A), m_ConstInt(BCst)), + m_ConstInt(Op1Cst))) && + BCst.srem(Op1Cst).isZero() && !Op1Cst.isAllOnes()) { + return DAG.getNode(ISD::SREM, DL, VT, A, DAG.getConstant(Op1Cst, DL, VT)); + } + return SDValue(); } diff --git a/llvm/test/CodeGen/RISCV/srem.ll b/llvm/test/CodeGen/RISCV/srem.ll new file mode 100644 index 0000000000000..55a1850f18291 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/srem.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s + +define i32 @fold_srem_constants(i32 %v0) nounwind { +; RV32I-LABEL: fold_srem_constants: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: tail __modsi3 +; +; RV64I-LABEL: fold_srem_constants: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %v1 = srem i32 %v0, 25 + %v2 = srem i32 %v1, 5 + ret i32 %v2 +} + +define i32 @dont_fold_srem_constants(i32 %v0) nounwind { +; RV32I-LABEL: dont_fold_srem_constants: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 25 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: li a1, 3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: tail __modsi3 +; +; RV64I-LABEL: dont_fold_srem_constants: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: li a1, 25 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: li a1, 3 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %v1 = srem i32 %v0, 25 + %v2 = srem i32 %v1, 3 + ret i32 %v2 +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/urem.ll b/llvm/test/CodeGen/RISCV/urem.ll new file mode 100644 index 0000000000000..faeeb396a698b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/urem.ll @@ -0,0 +1,120 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s + +define i32 @fold_urem_constants(i32 %v0) nounwind { +; RV32I-LABEL: fold_urem_constants: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: tail __umodsi3 +; +; RV64I-LABEL: fold_urem_constants: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %v1 = urem i32 %v0, 25 + %v2 = urem i32 %v1, 5 + ret i32 %v2 +} + +define i32 @dont_fold_urem_constants(i32 %v0) nounwind { +; RV32I-LABEL: dont_fold_urem_constants: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 25 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: li a1, 3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: tail __umodsi3 +; +; RV64I-LABEL: dont_fold_urem_constants: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: li a1, 25 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: li a1, 3 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %v1 = urem i32 %v0, 25 + %v2 = urem i32 %v1, 3 + ret i32 %v2 +} + +define i32 @dont_fold_urem_srem_mixed_constants(i32 %v0) nounwind { +; RV32I-LABEL: dont_fold_urem_srem_mixed_constants: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 25 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: li a1, 3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: tail __umodsi3 +; +; RV64I-LABEL: dont_fold_urem_srem_mixed_constants: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: li a1, 25 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: li a1, 3 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %v1 = urem i32 %v0, 25 + %v2 = srem i32 %v1, 3 + ret i32 %v2 +} + +define i32 @dont_fold_srem_urem_mixed_constants(i32 %v0) nounwind { +; RV32I-LABEL: dont_fold_srem_urem_mixed_constants: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 25 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: li a1, 3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: tail __umodsi3 +; +; RV64I-LABEL: dont_fold_srem_urem_mixed_constants: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: li a1, 25 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: li a1, 3 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %v1 = srem i32 %v0, 25 + %v2 = urem i32 %v1, 3 + ret i32 %v2 +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}}