From 0d9f557b6c36da3aa92daff4c0d37ea821d7ae1e Mon Sep 17 00:00:00 2001 From: hev Date: Thu, 23 Nov 2023 19:34:50 +0800 Subject: [PATCH] [LoongArch] Disable mulodi4 and muloti4 libcalls (#73199) This library function only exists in compiler-rt not libgcc. So this would fail to link unless we were linking with compiler-rt. Fixes https://github.com/ClangBuiltLinux/linux/issues/1958 --- .../LoongArch/LoongArchISelLowering.cpp | 5 + .../CodeGen/LoongArch/smul-with-overflow.ll | 463 +++++++++++++++--- 2 files changed, 397 insertions(+), 71 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index f3f72e74ef085..ac78789c2c331 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -152,8 +152,13 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, // Set libcalls. setLibcallName(RTLIB::MUL_I128, nullptr); + // The MULO libcall is not part of libgcc, only compiler-rt. + setLibcallName(RTLIB::MULO_I64, nullptr); } + // The MULO libcall is not part of libgcc, only compiler-rt. + setLibcallName(RTLIB::MULO_I128, nullptr); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); static const ISD::CondCode FPCCToExpand[] = { diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll index a53e77e5aa4b3..6cba4108d63c6 100644 --- a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll +++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll @@ -5,23 +5,53 @@ define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { ; LA32-LABEL: smuloi64: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: .cfi_def_cfa_offset 16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill -; LA32-NEXT: .cfi_offset 1, -4 -; LA32-NEXT: .cfi_offset 22, -8 -; LA32-NEXT: move $fp, $a4 -; LA32-NEXT: st.w $zero, $sp, 4 -; LA32-NEXT: addi.w $a4, $sp, 4 -; LA32-NEXT: bl %plt(__mulodi4) -; LA32-NEXT: st.w $a1, $fp, 4 -; LA32-NEXT: st.w $a0, $fp, 0 -; LA32-NEXT: ld.w $a0, $sp, 4 +; LA32-NEXT: srai.w $a5, $a1, 31 +; LA32-NEXT: mul.w $a6, $a2, $a5 +; LA32-NEXT: mulh.wu $a7, $a2, $a5 +; LA32-NEXT: add.w $a7, $a7, $a6 +; LA32-NEXT: mul.w $a5, $a3, $a5 +; LA32-NEXT: add.w $a5, $a7, $a5 +; LA32-NEXT: srai.w $a7, $a3, 31 +; LA32-NEXT: mul.w $t0, $a7, $a1 +; LA32-NEXT: mulh.wu $t1, $a7, $a0 +; LA32-NEXT: add.w $t0, $t1, $t0 +; LA32-NEXT: mul.w $a7, $a7, $a0 +; LA32-NEXT: add.w $t0, $t0, $a7 +; LA32-NEXT: add.w $a5, $t0, $a5 +; LA32-NEXT: mulh.wu $t0, $a0, $a2 +; LA32-NEXT: mul.w $t1, $a1, $a2 +; LA32-NEXT: add.w $t0, $t1, $t0 +; LA32-NEXT: sltu $t1, $t0, $t1 +; LA32-NEXT: mulh.wu $t2, $a1, $a2 +; LA32-NEXT: add.w $t1, $t2, $t1 +; LA32-NEXT: mul.w $t2, $a0, $a3 +; LA32-NEXT: add.w $t0, $t2, $t0 +; LA32-NEXT: sltu $t2, $t0, $t2 +; LA32-NEXT: mulh.wu $t3, $a0, $a3 +; LA32-NEXT: add.w $t2, $t3, $t2 +; LA32-NEXT: add.w $a6, $a7, $a6 +; LA32-NEXT: sltu $a7, $a6, $a7 +; LA32-NEXT: add.w $a5, $a5, $a7 +; LA32-NEXT: mul.w $a0, $a0, $a2 +; LA32-NEXT: mul.w $a2, $a1, $a3 +; LA32-NEXT: mulh.wu $a1, $a1, $a3 +; LA32-NEXT: add.w $a3, $t1, $t2 +; LA32-NEXT: sltu $a7, $a3, $t1 +; LA32-NEXT: add.w $a1, $a1, $a7 +; LA32-NEXT: st.w $a0, $a4, 0 +; LA32-NEXT: add.w $a0, $a2, $a3 +; LA32-NEXT: sltu $a2, $a0, $a2 +; LA32-NEXT: add.w $a1, $a1, $a2 +; LA32-NEXT: st.w $t0, $a4, 4 +; LA32-NEXT: add.w $a1, $a1, $a5 +; LA32-NEXT: add.w $a2, $a0, $a6 +; LA32-NEXT: sltu $a0, $a2, $a0 +; LA32-NEXT: add.w $a0, $a1, $a0 +; LA32-NEXT: srai.w $a1, $t0, 31 +; LA32-NEXT: xor $a0, $a0, $a1 +; LA32-NEXT: xor $a1, $a2, $a1 +; LA32-NEXT: or $a0, $a1, $a0 ; LA32-NEXT: sltu $a0, $zero, $a0 -; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret ; ; LA64-LABEL: smuloi64: @@ -43,69 +73,360 @@ define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { ; LA32-LABEL: smuloi128: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -64 -; LA32-NEXT: .cfi_def_cfa_offset 64 -; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill -; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill +; LA32-NEXT: addi.w $sp, $sp, -96 +; LA32-NEXT: .cfi_def_cfa_offset 96 +; LA32-NEXT: st.w $ra, $sp, 92 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 88 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 84 # 4-byte Folded Spill +; LA32-NEXT: st.w $s1, $sp, 80 # 4-byte Folded Spill +; LA32-NEXT: st.w $s2, $sp, 76 # 4-byte Folded Spill +; LA32-NEXT: st.w $s3, $sp, 72 # 4-byte Folded Spill +; LA32-NEXT: st.w $s4, $sp, 68 # 4-byte Folded Spill +; LA32-NEXT: st.w $s5, $sp, 64 # 4-byte Folded Spill +; LA32-NEXT: st.w $s6, $sp, 60 # 4-byte Folded Spill +; LA32-NEXT: st.w $s7, $sp, 56 # 4-byte Folded Spill +; LA32-NEXT: st.w $s8, $sp, 52 # 4-byte Folded Spill ; LA32-NEXT: .cfi_offset 1, -4 ; LA32-NEXT: .cfi_offset 22, -8 -; LA32-NEXT: move $fp, $a2 -; LA32-NEXT: st.w $zero, $sp, 52 -; LA32-NEXT: ld.w $a2, $a1, 12 -; LA32-NEXT: st.w $a2, $sp, 12 -; LA32-NEXT: ld.w $a2, $a1, 8 -; LA32-NEXT: st.w $a2, $sp, 8 -; LA32-NEXT: ld.w $a2, $a1, 4 -; LA32-NEXT: st.w $a2, $sp, 4 -; LA32-NEXT: ld.w $a1, $a1, 0 -; LA32-NEXT: st.w $a1, $sp, 0 -; LA32-NEXT: ld.w $a1, $a0, 12 -; LA32-NEXT: st.w $a1, $sp, 28 -; LA32-NEXT: ld.w $a1, $a0, 8 -; LA32-NEXT: st.w $a1, $sp, 24 -; LA32-NEXT: ld.w $a1, $a0, 4 -; LA32-NEXT: st.w $a1, $sp, 20 -; LA32-NEXT: ld.w $a0, $a0, 0 -; LA32-NEXT: st.w $a0, $sp, 16 -; LA32-NEXT: addi.w $a0, $sp, 32 -; LA32-NEXT: addi.w $a1, $sp, 16 -; LA32-NEXT: addi.w $a2, $sp, 0 -; LA32-NEXT: addi.w $a3, $sp, 52 -; LA32-NEXT: bl %plt(__muloti4) -; LA32-NEXT: ld.w $a0, $sp, 44 -; LA32-NEXT: st.w $a0, $fp, 12 -; LA32-NEXT: ld.w $a0, $sp, 40 -; LA32-NEXT: st.w $a0, $fp, 8 -; LA32-NEXT: ld.w $a0, $sp, 36 -; LA32-NEXT: st.w $a0, $fp, 4 -; LA32-NEXT: ld.w $a0, $sp, 32 -; LA32-NEXT: st.w $a0, $fp, 0 -; LA32-NEXT: ld.w $a0, $sp, 52 +; LA32-NEXT: .cfi_offset 23, -12 +; LA32-NEXT: .cfi_offset 24, -16 +; LA32-NEXT: .cfi_offset 25, -20 +; LA32-NEXT: .cfi_offset 26, -24 +; LA32-NEXT: .cfi_offset 27, -28 +; LA32-NEXT: .cfi_offset 28, -32 +; LA32-NEXT: .cfi_offset 29, -36 +; LA32-NEXT: .cfi_offset 30, -40 +; LA32-NEXT: .cfi_offset 31, -44 +; LA32-NEXT: st.w $a2, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ld.w $a6, $a1, 0 +; LA32-NEXT: ld.w $a7, $a0, 0 +; LA32-NEXT: mulh.wu $a3, $a7, $a6 +; LA32-NEXT: ld.w $a5, $a0, 4 +; LA32-NEXT: mul.w $a4, $a5, $a6 +; LA32-NEXT: add.w $a3, $a4, $a3 +; LA32-NEXT: sltu $a4, $a3, $a4 +; LA32-NEXT: mulh.wu $t0, $a5, $a6 +; LA32-NEXT: add.w $a4, $t0, $a4 +; LA32-NEXT: ld.w $t0, $a1, 4 +; LA32-NEXT: mul.w $t1, $a7, $t0 +; LA32-NEXT: add.w $a3, $t1, $a3 +; LA32-NEXT: st.w $a3, $sp, 44 # 4-byte Folded Spill +; LA32-NEXT: sltu $t1, $a3, $t1 +; LA32-NEXT: mulh.wu $t2, $a7, $t0 +; LA32-NEXT: add.w $t1, $t2, $t1 +; LA32-NEXT: ld.w $t4, $a0, 12 +; LA32-NEXT: ld.w $t2, $a0, 8 +; LA32-NEXT: ld.w $t3, $a1, 8 +; LA32-NEXT: mulh.wu $a0, $t2, $t3 +; LA32-NEXT: mul.w $t5, $t4, $t3 +; LA32-NEXT: add.w $a0, $t5, $a0 +; LA32-NEXT: sltu $t5, $a0, $t5 +; LA32-NEXT: mulh.wu $t6, $t4, $t3 +; LA32-NEXT: add.w $t5, $t6, $t5 +; LA32-NEXT: ld.w $t7, $a1, 12 +; LA32-NEXT: mul.w $a1, $t2, $t7 +; LA32-NEXT: add.w $a0, $a1, $a0 +; LA32-NEXT: st.w $a0, $sp, 48 # 4-byte Folded Spill +; LA32-NEXT: sltu $a1, $a0, $a1 +; LA32-NEXT: mulh.wu $t6, $t2, $t7 +; LA32-NEXT: add.w $t6, $t6, $a1 +; LA32-NEXT: srai.w $s7, $t4, 31 +; LA32-NEXT: mul.w $a1, $s7, $t7 +; LA32-NEXT: mulh.wu $t8, $s7, $t3 +; LA32-NEXT: add.w $t8, $t8, $a1 +; LA32-NEXT: mulh.wu $fp, $a6, $s7 +; LA32-NEXT: mul.w $s6, $t0, $s7 +; LA32-NEXT: add.w $s8, $s6, $fp +; LA32-NEXT: mul.w $a1, $a6, $s7 +; LA32-NEXT: add.w $ra, $a1, $s8 +; LA32-NEXT: sltu $s0, $ra, $a1 +; LA32-NEXT: add.w $a0, $fp, $s0 +; LA32-NEXT: add.w $a3, $a4, $t1 +; LA32-NEXT: st.w $a3, $sp, 20 # 4-byte Folded Spill +; LA32-NEXT: sltu $a4, $a3, $a4 +; LA32-NEXT: mulh.wu $t1, $a5, $t0 +; LA32-NEXT: add.w $a3, $t1, $a4 +; LA32-NEXT: st.w $a3, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: srai.w $s4, $t7, 31 +; LA32-NEXT: mul.w $fp, $a7, $s4 +; LA32-NEXT: mulh.wu $a4, $a7, $s4 +; LA32-NEXT: add.w $s1, $a4, $fp +; LA32-NEXT: sltu $s0, $s1, $fp +; LA32-NEXT: add.w $s5, $a4, $s0 +; LA32-NEXT: mul.w $a4, $s7, $t3 +; LA32-NEXT: add.w $t8, $t8, $a4 +; LA32-NEXT: add.w $s0, $ra, $t8 +; LA32-NEXT: add.w $a3, $a1, $a4 +; LA32-NEXT: st.w $a3, $sp, 32 # 4-byte Folded Spill +; LA32-NEXT: sltu $a4, $a3, $a1 +; LA32-NEXT: add.w $a3, $s0, $a4 +; LA32-NEXT: st.w $a3, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: add.w $s3, $t5, $t6 +; LA32-NEXT: sltu $a4, $s3, $t5 +; LA32-NEXT: mulh.wu $t5, $t4, $t7 +; LA32-NEXT: add.w $a3, $t5, $a4 +; LA32-NEXT: st.w $a3, $sp, 16 # 4-byte Folded Spill +; LA32-NEXT: mul.w $a4, $a7, $a6 +; LA32-NEXT: st.w $a4, $a2, 0 +; LA32-NEXT: sltu $a4, $s8, $s6 +; LA32-NEXT: mulh.wu $t5, $t0, $s7 +; LA32-NEXT: add.w $a4, $t5, $a4 +; LA32-NEXT: add.w $t1, $a4, $a0 +; LA32-NEXT: sltu $a4, $t1, $a4 +; LA32-NEXT: add.w $s2, $t5, $a4 +; LA32-NEXT: mulh.wu $a4, $a7, $t3 +; LA32-NEXT: mul.w $t5, $a5, $t3 +; LA32-NEXT: add.w $a4, $t5, $a4 +; LA32-NEXT: sltu $t5, $a4, $t5 +; LA32-NEXT: mulh.wu $t6, $a5, $t3 +; LA32-NEXT: add.w $a3, $t6, $t5 +; LA32-NEXT: mul.w $t6, $a7, $t7 +; LA32-NEXT: add.w $t5, $t6, $a4 +; LA32-NEXT: sltu $a4, $t5, $t6 +; LA32-NEXT: mulh.wu $t6, $a7, $t7 +; LA32-NEXT: add.w $a4, $t6, $a4 +; LA32-NEXT: mulh.wu $t6, $t2, $a6 +; LA32-NEXT: mul.w $s7, $t4, $a6 +; LA32-NEXT: add.w $t6, $s7, $t6 +; LA32-NEXT: sltu $s7, $t6, $s7 +; LA32-NEXT: mulh.wu $s8, $t4, $a6 +; LA32-NEXT: add.w $a0, $s8, $s7 +; LA32-NEXT: mul.w $s7, $t2, $t0 +; LA32-NEXT: add.w $t6, $s7, $t6 +; LA32-NEXT: sltu $s7, $t6, $s7 +; LA32-NEXT: mulh.wu $s8, $t2, $t0 +; LA32-NEXT: add.w $a2, $s8, $s7 +; LA32-NEXT: mul.w $s8, $a5, $s4 +; LA32-NEXT: add.w $s7, $s1, $s8 +; LA32-NEXT: add.w $s1, $s7, $ra +; LA32-NEXT: add.w $a1, $fp, $a1 +; LA32-NEXT: st.w $a1, $sp, 40 # 4-byte Folded Spill +; LA32-NEXT: sltu $ra, $a1, $fp +; LA32-NEXT: add.w $a1, $s1, $ra +; LA32-NEXT: st.w $a1, $sp, 36 # 4-byte Folded Spill +; LA32-NEXT: xor $s0, $a1, $s7 +; LA32-NEXT: sltui $s0, $s0, 1 +; LA32-NEXT: sltu $a1, $a1, $s7 +; LA32-NEXT: masknez $s1, $a1, $s0 +; LA32-NEXT: maskeqz $s0, $ra, $s0 +; LA32-NEXT: add.w $t1, $s6, $t1 +; LA32-NEXT: sltu $s6, $t1, $s6 +; LA32-NEXT: add.w $s2, $s2, $s6 +; LA32-NEXT: add.w $a2, $a0, $a2 +; LA32-NEXT: sltu $a0, $a2, $a0 +; LA32-NEXT: mulh.wu $s6, $t4, $t0 +; LA32-NEXT: add.w $t8, $s6, $a0 +; LA32-NEXT: add.w $a4, $a3, $a4 +; LA32-NEXT: sltu $a3, $a4, $a3 +; LA32-NEXT: mulh.wu $s6, $a5, $t7 +; LA32-NEXT: add.w $a3, $s6, $a3 +; LA32-NEXT: mul.w $s6, $t4, $t7 +; LA32-NEXT: mul.w $t7, $a5, $t7 +; LA32-NEXT: mul.w $ra, $t4, $t0 +; LA32-NEXT: mul.w $t0, $a5, $t0 +; LA32-NEXT: mul.w $t4, $t4, $s4 +; LA32-NEXT: mul.w $a7, $a7, $t3 +; LA32-NEXT: mul.w $a6, $t2, $a6 +; LA32-NEXT: mul.w $t3, $t2, $t3 +; LA32-NEXT: mul.w $a0, $t2, $s4 +; LA32-NEXT: mulh.wu $t2, $t2, $s4 +; LA32-NEXT: mulh.wu $a5, $s4, $a5 +; LA32-NEXT: sltu $s4, $s7, $s8 +; LA32-NEXT: add.w $s4, $a5, $s4 +; LA32-NEXT: add.w $s4, $s5, $s4 +; LA32-NEXT: sltu $s5, $s4, $s5 +; LA32-NEXT: add.w $s5, $a5, $s5 +; LA32-NEXT: ld.w $a1, $sp, 20 # 4-byte Folded Reload +; LA32-NEXT: add.w $a1, $t0, $a1 +; LA32-NEXT: sltu $a5, $a1, $t0 +; LA32-NEXT: ld.w $t0, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: add.w $t0, $t0, $a5 +; LA32-NEXT: or $s0, $s0, $s1 +; LA32-NEXT: add.w $a4, $t7, $a4 +; LA32-NEXT: sltu $a5, $a4, $t7 +; LA32-NEXT: add.w $t7, $a3, $a5 +; LA32-NEXT: add.w $s1, $ra, $a2 +; LA32-NEXT: sltu $a2, $s1, $ra +; LA32-NEXT: add.w $t8, $t8, $a2 +; LA32-NEXT: add.w $a5, $s6, $s3 +; LA32-NEXT: sltu $a2, $a5, $s6 +; LA32-NEXT: ld.w $a3, $sp, 16 # 4-byte Folded Reload +; LA32-NEXT: add.w $a2, $a3, $a2 +; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: ld.w $a3, $sp, 44 # 4-byte Folded Reload +; LA32-NEXT: st.w $a3, $s6, 4 +; LA32-NEXT: ld.w $a3, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: add.w $a3, $s2, $a3 +; LA32-NEXT: ld.w $s2, $sp, 32 # 4-byte Folded Reload +; LA32-NEXT: add.w $s2, $t1, $s2 +; LA32-NEXT: sltu $t1, $s2, $t1 +; LA32-NEXT: add.w $a3, $a3, $t1 +; LA32-NEXT: add.w $t1, $s8, $s4 +; LA32-NEXT: sltu $s3, $t1, $s8 +; LA32-NEXT: add.w $s3, $s5, $s3 +; LA32-NEXT: add.w $t2, $t2, $a0 +; LA32-NEXT: add.w $t2, $t2, $t4 +; LA32-NEXT: add.w $t2, $t2, $s7 +; LA32-NEXT: add.w $t4, $a0, $fp +; LA32-NEXT: sltu $a0, $t4, $a0 +; LA32-NEXT: add.w $a0, $t2, $a0 +; LA32-NEXT: add.w $a0, $s3, $a0 +; LA32-NEXT: add.w $t2, $t1, $t4 +; LA32-NEXT: sltu $t1, $t2, $t1 +; LA32-NEXT: add.w $a0, $a0, $t1 +; LA32-NEXT: add.w $a0, $a0, $a3 +; LA32-NEXT: add.w $t1, $t2, $s2 +; LA32-NEXT: sltu $a3, $t1, $t2 +; LA32-NEXT: add.w $a0, $a0, $a3 +; LA32-NEXT: add.w $a3, $t6, $t0 +; LA32-NEXT: add.w $a1, $a6, $a1 +; LA32-NEXT: sltu $a6, $a1, $a6 +; LA32-NEXT: add.w $t0, $a3, $a6 +; LA32-NEXT: add.w $a1, $a7, $a1 +; LA32-NEXT: sltu $a7, $a1, $a7 +; LA32-NEXT: add.w $a3, $t5, $t0 +; LA32-NEXT: add.w $a3, $a3, $a7 +; LA32-NEXT: sltu $t2, $a3, $t5 +; LA32-NEXT: xor $t4, $a3, $t5 +; LA32-NEXT: sltui $t4, $t4, 1 +; LA32-NEXT: masknez $t2, $t2, $t4 +; LA32-NEXT: maskeqz $a7, $a7, $t4 +; LA32-NEXT: st.w $a1, $s6, 8 +; LA32-NEXT: or $a1, $a7, $t2 +; LA32-NEXT: sltu $a7, $t0, $t6 +; LA32-NEXT: xor $t0, $t0, $t6 +; LA32-NEXT: sltui $t0, $t0, 1 +; LA32-NEXT: masknez $a7, $a7, $t0 +; LA32-NEXT: maskeqz $a6, $a6, $t0 +; LA32-NEXT: or $a6, $a6, $a7 +; LA32-NEXT: add.w $a6, $s1, $a6 +; LA32-NEXT: sltu $a7, $a6, $s1 +; LA32-NEXT: add.w $a7, $t8, $a7 +; LA32-NEXT: add.w $a1, $a4, $a1 +; LA32-NEXT: sltu $a4, $a1, $a4 +; LA32-NEXT: add.w $a4, $t7, $a4 +; LA32-NEXT: add.w $t0, $t1, $s0 +; LA32-NEXT: sltu $t1, $t0, $t1 +; LA32-NEXT: add.w $a0, $a0, $t1 +; LA32-NEXT: st.w $a3, $s6, 12 +; LA32-NEXT: add.w $a1, $a6, $a1 +; LA32-NEXT: sltu $a6, $a1, $a6 +; LA32-NEXT: add.w $a4, $a7, $a4 +; LA32-NEXT: add.w $a4, $a4, $a6 +; LA32-NEXT: sltu $t1, $a4, $a7 +; LA32-NEXT: xor $a7, $a4, $a7 +; LA32-NEXT: sltui $a7, $a7, 1 +; LA32-NEXT: masknez $t1, $t1, $a7 +; LA32-NEXT: maskeqz $a6, $a6, $a7 +; LA32-NEXT: or $a6, $a6, $t1 +; LA32-NEXT: add.w $a6, $a5, $a6 +; LA32-NEXT: sltu $a5, $a6, $a5 +; LA32-NEXT: add.w $a2, $a2, $a5 +; LA32-NEXT: ld.w $t1, $sp, 48 # 4-byte Folded Reload +; LA32-NEXT: add.w $a4, $t1, $a4 +; LA32-NEXT: add.w $a1, $t3, $a1 +; LA32-NEXT: sltu $a5, $a1, $t3 +; LA32-NEXT: add.w $a4, $a4, $a5 +; LA32-NEXT: sltu $a7, $a4, $t1 +; LA32-NEXT: xor $t1, $a4, $t1 +; LA32-NEXT: sltui $t1, $t1, 1 +; LA32-NEXT: masknez $a7, $a7, $t1 +; LA32-NEXT: maskeqz $a5, $a5, $t1 +; LA32-NEXT: or $a5, $a5, $a7 +; LA32-NEXT: add.w $a5, $a6, $a5 +; LA32-NEXT: sltu $a6, $a5, $a6 +; LA32-NEXT: add.w $a2, $a2, $a6 +; LA32-NEXT: add.w $a0, $a2, $a0 +; LA32-NEXT: add.w $a2, $a5, $t0 +; LA32-NEXT: sltu $a5, $a2, $a5 +; LA32-NEXT: add.w $a0, $a0, $a5 +; LA32-NEXT: ld.w $a5, $sp, 40 # 4-byte Folded Reload +; LA32-NEXT: add.w $a5, $a1, $a5 +; LA32-NEXT: sltu $a1, $a5, $a1 +; LA32-NEXT: ld.w $a6, $sp, 36 # 4-byte Folded Reload +; LA32-NEXT: add.w $a6, $a4, $a6 +; LA32-NEXT: add.w $a6, $a6, $a1 +; LA32-NEXT: sltu $a7, $a6, $a4 +; LA32-NEXT: xor $a4, $a6, $a4 +; LA32-NEXT: sltui $a4, $a4, 1 +; LA32-NEXT: masknez $a7, $a7, $a4 +; LA32-NEXT: maskeqz $a1, $a1, $a4 +; LA32-NEXT: or $a1, $a1, $a7 +; LA32-NEXT: add.w $a1, $a2, $a1 +; LA32-NEXT: sltu $a2, $a1, $a2 +; LA32-NEXT: add.w $a0, $a0, $a2 +; LA32-NEXT: srai.w $a2, $a3, 31 +; LA32-NEXT: xor $a3, $a6, $a2 +; LA32-NEXT: xor $a0, $a0, $a2 +; LA32-NEXT: or $a0, $a3, $a0 +; LA32-NEXT: xor $a3, $a5, $a2 +; LA32-NEXT: xor $a1, $a1, $a2 +; LA32-NEXT: or $a1, $a3, $a1 +; LA32-NEXT: or $a0, $a1, $a0 ; LA32-NEXT: sltu $a0, $zero, $a0 -; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload -; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 64 +; LA32-NEXT: ld.w $s8, $sp, 52 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s7, $sp, 56 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s6, $sp, 60 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s5, $sp, 64 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s4, $sp, 68 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s3, $sp, 72 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s2, $sp, 76 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s1, $sp, 80 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 84 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 88 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 92 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 96 ; LA32-NEXT: ret ; ; LA64-LABEL: smuloi128: ; LA64: # %bb.0: -; LA64-NEXT: addi.d $sp, $sp, -32 -; LA64-NEXT: .cfi_def_cfa_offset 32 -; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill -; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill -; LA64-NEXT: .cfi_offset 1, -8 -; LA64-NEXT: .cfi_offset 22, -16 -; LA64-NEXT: move $fp, $a4 -; LA64-NEXT: st.d $zero, $sp, 8 -; LA64-NEXT: addi.d $a4, $sp, 8 -; LA64-NEXT: bl %plt(__muloti4) -; LA64-NEXT: st.d $a1, $fp, 8 -; LA64-NEXT: st.d $a0, $fp, 0 -; LA64-NEXT: ld.d $a0, $sp, 8 +; LA64-NEXT: srai.d $a5, $a1, 63 +; LA64-NEXT: mul.d $a6, $a2, $a5 +; LA64-NEXT: mulh.du $a7, $a2, $a5 +; LA64-NEXT: add.d $a7, $a7, $a6 +; LA64-NEXT: mul.d $a5, $a3, $a5 +; LA64-NEXT: add.d $a5, $a7, $a5 +; LA64-NEXT: srai.d $a7, $a3, 63 +; LA64-NEXT: mul.d $t0, $a7, $a1 +; LA64-NEXT: mulh.du $t1, $a7, $a0 +; LA64-NEXT: add.d $t0, $t1, $t0 +; LA64-NEXT: mul.d $a7, $a7, $a0 +; LA64-NEXT: add.d $t0, $t0, $a7 +; LA64-NEXT: add.d $a5, $t0, $a5 +; LA64-NEXT: mulh.du $t0, $a0, $a2 +; LA64-NEXT: mul.d $t1, $a1, $a2 +; LA64-NEXT: add.d $t0, $t1, $t0 +; LA64-NEXT: sltu $t1, $t0, $t1 +; LA64-NEXT: mulh.du $t2, $a1, $a2 +; LA64-NEXT: add.d $t1, $t2, $t1 +; LA64-NEXT: mul.d $t2, $a0, $a3 +; LA64-NEXT: add.d $t0, $t2, $t0 +; LA64-NEXT: sltu $t2, $t0, $t2 +; LA64-NEXT: mulh.du $t3, $a0, $a3 +; LA64-NEXT: add.d $t2, $t3, $t2 +; LA64-NEXT: add.d $a6, $a7, $a6 +; LA64-NEXT: sltu $a7, $a6, $a7 +; LA64-NEXT: add.d $a5, $a5, $a7 +; LA64-NEXT: mul.d $a0, $a0, $a2 +; LA64-NEXT: mul.d $a2, $a1, $a3 +; LA64-NEXT: mulh.du $a1, $a1, $a3 +; LA64-NEXT: add.d $a3, $t1, $t2 +; LA64-NEXT: sltu $a7, $a3, $t1 +; LA64-NEXT: add.d $a1, $a1, $a7 +; LA64-NEXT: st.d $a0, $a4, 0 +; LA64-NEXT: add.d $a0, $a2, $a3 +; LA64-NEXT: sltu $a2, $a0, $a2 +; LA64-NEXT: add.d $a1, $a1, $a2 +; LA64-NEXT: st.d $t0, $a4, 8 +; LA64-NEXT: add.d $a1, $a1, $a5 +; LA64-NEXT: add.d $a2, $a0, $a6 +; LA64-NEXT: sltu $a0, $a2, $a0 +; LA64-NEXT: add.d $a0, $a1, $a0 +; LA64-NEXT: srai.d $a1, $t0, 63 +; LA64-NEXT: xor $a0, $a0, $a1 +; LA64-NEXT: xor $a1, $a2, $a1 +; LA64-NEXT: or $a0, $a1, $a0 ; LA64-NEXT: sltu $a0, $zero, $a0 -; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload -; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 32 ; LA64-NEXT: ret %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2) %val = extractvalue {i128, i1} %t, 0