Skip to content

Commit

Permalink
[SelectionDAG] Treat CopyFromReg as freezing the value (#85932)
Browse files Browse the repository at this point in the history
The description of CopyFromReg in ISDOpcodes.h says that the input
valus is defined outside the scope of the current SelectionDAG. I
think that means that we basically can treat it as a FREEZE in the
sense that it can be seen as neither being undef nor poison.

Being able to fold freeze(CopyFromReg) into CopyFromReg seems
useful to avoid regressions if we start to introduce freeze
instruction in DAGCombiner/foldBoolSelectToLogic, e.g. to solve
#84653

Things _not_ dealt with in this patch:
- Depending on calling convention an input argument can be passed
  also on the stack and not in a register. If it is allowed to treat
  an argument received in a register as not being poison, then I think
  we want to treat arguments received on the stack the same way. But
  then we need to attribute load instructions, or add explicit FREEZE
  when lowering formal arguments.
- A common pattern is that there is an AssertZext or AssertSext just
  after CopyFromReg. I think that if we treat CopyFromReg as never
  being poison, then it should be allowed to fold
     (freeze(AssertZext(CopyFromReg))) -> AssertZext(CopyFromReg))
  • Loading branch information
bjope committed Apr 26, 2024
1 parent 431be86 commit 73472c5
Show file tree
Hide file tree
Showing 45 changed files with 1,856 additions and 1,919 deletions.
1 change: 1 addition & 0 deletions llvm/include/llvm/CodeGen/ISDOpcodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ enum NodeType {
/// CopyFromReg - This node indicates that the input value is a virtual or
/// physical register that is defined outside of the scope of this
/// SelectionDAG. The register is available from the RegisterSDNode object.
/// Note that CopyFromReg is considered as also freezing the value.
CopyFromReg,

/// UNDEF - An undefined node.
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5063,6 +5063,7 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
case ISD::VALUETYPE:
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
case ISD::CopyFromReg:
return true;

case ISD::UNDEF:
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/AArch64/combine-mul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ define <4 x i1> @PR48683_vec_undef(<4 x i32> %x) {
define i64 @combine_mul_self_demandedbits(i64 %x) {
; CHECK-LABEL: combine_mul_self_demandedbits:
; CHECK: // %bb.0:
; CHECK-NEXT: mul x8, x0, x0
; CHECK-NEXT: and x0, x8, #0xfffffffffffffffd
; CHECK-NEXT: mul x0, x0, x0
; CHECK-NEXT: ret
%1 = mul i64 %x, %x
%2 = and i64 %1, -3
Expand Down Expand Up @@ -77,7 +76,7 @@ define i8 @one_demanded_bit(i8 %x) {
define <2 x i64> @one_demanded_bit_splat(<2 x i64> %x) {
; CHECK-LABEL: one_demanded_bit_splat:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #32
; CHECK-NEXT: mov w8, #32 // =0x20
; CHECK-NEXT: shl v0.2d, v0.2d, #5
; CHECK-NEXT: dup v1.2d, x8
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
Expand Down Expand Up @@ -131,7 +130,7 @@ define i32 @squared_demanded_2_low_bits(i32 %x) {
define <2 x i64> @squared_demanded_2_low_bits_splat(<2 x i64> %x) {
; CHECK-LABEL: squared_demanded_2_low_bits_splat:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-2
; CHECK-NEXT: mov x8, #-2 // =0xfffffffffffffffe
; CHECK-NEXT: dup v1.2d, x8
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
Expand Down
42 changes: 18 additions & 24 deletions llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,9 @@ define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind {
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvst $xr0, $sp, 0
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
; CHECK-NEXT: addi.d $a3, $sp, 0
; CHECK-NEXT: bstrins.d $a3, $a0, 4, 0
; CHECK-NEXT: st.b $a2, $a3, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 0
; CHECK-NEXT: st.b $a2, $a0, 0
; CHECK-NEXT: xvld $xr0, $sp, 0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -64
Expand All @@ -150,10 +149,9 @@ define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind {
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvst $xr0, $sp, 0
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
; CHECK-NEXT: addi.d $a3, $sp, 0
; CHECK-NEXT: bstrins.d $a3, $a0, 4, 1
; CHECK-NEXT: st.h $a2, $a3, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 1
; CHECK-NEXT: st.h $a2, $a0, 0
; CHECK-NEXT: xvld $xr0, $sp, 0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -64
Expand All @@ -177,10 +175,9 @@ define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind {
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvst $xr0, $sp, 0
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
; CHECK-NEXT: addi.d $a3, $sp, 0
; CHECK-NEXT: bstrins.d $a3, $a0, 4, 2
; CHECK-NEXT: st.w $a2, $a3, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 2
; CHECK-NEXT: st.w $a2, $a0, 0
; CHECK-NEXT: xvld $xr0, $sp, 0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -64
Expand All @@ -204,10 +201,9 @@ define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind {
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvst $xr0, $sp, 0
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
; CHECK-NEXT: addi.d $a3, $sp, 0
; CHECK-NEXT: bstrins.d $a3, $a0, 4, 3
; CHECK-NEXT: st.d $a2, $a3, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 3
; CHECK-NEXT: st.d $a2, $a0, 0
; CHECK-NEXT: xvld $xr0, $sp, 0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -64
Expand All @@ -231,10 +227,9 @@ define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwin
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr1, $a0, 0
; CHECK-NEXT: xvst $xr1, $sp, 0
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
; CHECK-NEXT: addi.d $a2, $sp, 0
; CHECK-NEXT: bstrins.d $a2, $a0, 4, 2
; CHECK-NEXT: fst.s $fa0, $a2, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2
; CHECK-NEXT: fst.s $fa0, $a0, 0
; CHECK-NEXT: xvld $xr0, $sp, 0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -64
Expand All @@ -258,10 +253,9 @@ define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounw
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
; CHECK-NEXT: xvld $xr1, $a0, 0
; CHECK-NEXT: xvst $xr1, $sp, 0
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
; CHECK-NEXT: addi.d $a2, $sp, 0
; CHECK-NEXT: bstrins.d $a2, $a0, 4, 3
; CHECK-NEXT: fst.d $fa0, $a2, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3
; CHECK-NEXT: fst.d $fa0, $a0, 0
; CHECK-NEXT: xvld $xr0, $sp, 0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $fp, -64
Expand Down
42 changes: 18 additions & 24 deletions llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,9 @@ define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind {
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vst $vr0, $sp, 0
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
; CHECK-NEXT: addi.d $a3, $sp, 0
; CHECK-NEXT: bstrins.d $a3, $a0, 3, 0
; CHECK-NEXT: st.b $a2, $a3, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 0
; CHECK-NEXT: st.b $a2, $a0, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
Expand All @@ -107,10 +106,9 @@ define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind {
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vst $vr0, $sp, 0
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
; CHECK-NEXT: addi.d $a3, $sp, 0
; CHECK-NEXT: bstrins.d $a3, $a0, 3, 1
; CHECK-NEXT: st.h $a2, $a3, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 1
; CHECK-NEXT: st.h $a2, $a0, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
Expand All @@ -127,10 +125,9 @@ define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind {
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vst $vr0, $sp, 0
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
; CHECK-NEXT: addi.d $a3, $sp, 0
; CHECK-NEXT: bstrins.d $a3, $a0, 3, 2
; CHECK-NEXT: st.w $a2, $a3, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 2
; CHECK-NEXT: st.w $a2, $a0, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
Expand All @@ -147,10 +144,9 @@ define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind {
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vst $vr0, $sp, 0
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
; CHECK-NEXT: addi.d $a3, $sp, 0
; CHECK-NEXT: bstrins.d $a3, $a0, 3, 3
; CHECK-NEXT: st.d $a2, $a3, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 3
; CHECK-NEXT: st.d $a2, $a0, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
Expand All @@ -167,10 +163,9 @@ define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwi
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: vld $vr1, $a0, 0
; CHECK-NEXT: vst $vr1, $sp, 0
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
; CHECK-NEXT: addi.d $a2, $sp, 0
; CHECK-NEXT: bstrins.d $a2, $a0, 3, 2
; CHECK-NEXT: fst.s $fa0, $a2, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a2, 3, 2
; CHECK-NEXT: fst.s $fa0, $a0, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
Expand All @@ -187,10 +182,9 @@ define void @insert_2xdouble_idx(ptr %src, ptr %dst, double %ins, i32 %idx) noun
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: vld $vr1, $a0, 0
; CHECK-NEXT: vst $vr1, $sp, 0
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
; CHECK-NEXT: addi.d $a2, $sp, 0
; CHECK-NEXT: bstrins.d $a2, $a0, 3, 3
; CHECK-NEXT: fst.d $fa0, $a2, 0
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bstrins.d $a0, $a2, 3, 3
; CHECK-NEXT: fst.d $fa0, $a0, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/alu64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ define i64 @sltiu(i64 %a) nounwind {
;
; RV32I-LABEL: sltiu:
; RV32I: # %bb.0:
; RV32I-NEXT: seqz a1, a1
; RV32I-NEXT: sltiu a0, a0, 3
; RV32I-NEXT: seqz a1, a1
; RV32I-NEXT: and a0, a1, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: ret
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -372,10 +372,10 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; RV32IA-NEXT: # =>This Loop Header: Depth=1
; RV32IA-NEXT: # Child Loop BB2_3 Depth 2
; RV32IA-NEXT: mv a3, a2
; RV32IA-NEXT: addi a4, a2, 1
; RV32IA-NEXT: sltu a2, a2, a1
; RV32IA-NEXT: neg a2, a2
; RV32IA-NEXT: and a4, a2, a4
; RV32IA-NEXT: addi a2, a2, 1
; RV32IA-NEXT: sltu a4, a3, a1
; RV32IA-NEXT: neg a4, a4
; RV32IA-NEXT: and a4, a4, a2
; RV32IA-NEXT: .LBB2_3: # %atomicrmw.start
; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1
; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
Expand Down Expand Up @@ -607,10 +607,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; RV64IA-NEXT: # =>This Loop Header: Depth=1
; RV64IA-NEXT: # Child Loop BB3_3 Depth 2
; RV64IA-NEXT: mv a3, a2
; RV64IA-NEXT: addi a4, a2, 1
; RV64IA-NEXT: sltu a2, a2, a1
; RV64IA-NEXT: neg a2, a2
; RV64IA-NEXT: and a4, a2, a4
; RV64IA-NEXT: addi a2, a2, 1
; RV64IA-NEXT: sltu a4, a3, a1
; RV64IA-NEXT: neg a4, a4
; RV64IA-NEXT: and a4, a4, a2
; RV64IA-NEXT: .LBB3_3: # %atomicrmw.start
; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1
; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
Expand Down
108 changes: 48 additions & 60 deletions llvm/test/CodeGen/RISCV/bfloat-convert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -456,92 +456,80 @@ define i64 @fcvt_l_bf16(bfloat %a) nounwind {
define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
; RV32IZFBFMIN-LABEL: fcvt_l_bf16_sat:
; RV32IZFBFMIN: # %bb.0: # %start
; RV32IZFBFMIN-NEXT: addi sp, sp, -32
; RV32IZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32IZFBFMIN-NEXT: addi sp, sp, -16
; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
; RV32IZFBFMIN-NEXT: flt.s s0, fa5, fs0
; RV32IZFBFMIN-NEXT: neg s1, s0
; RV32IZFBFMIN-NEXT: lui a0, 913408
; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a0
; RV32IZFBFMIN-NEXT: fle.s s2, fa5, fs0
; RV32IZFBFMIN-NEXT: neg s3, s2
; RV32IZFBFMIN-NEXT: fle.s s0, fa5, fs0
; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0
; RV32IZFBFMIN-NEXT: call __fixsfdi
; RV32IZFBFMIN-NEXT: and a0, s3, a0
; RV32IZFBFMIN-NEXT: or a0, s1, a0
; RV32IZFBFMIN-NEXT: feq.s a2, fs0, fs0
; RV32IZFBFMIN-NEXT: neg a2, a2
; RV32IZFBFMIN-NEXT: lui a4, 524288
; RV32IZFBFMIN-NEXT: lui a3, 524288
; RV32IZFBFMIN-NEXT: beqz s2, .LBB10_2
; RV32IZFBFMIN-NEXT: lui a2, 524288
; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_2
; RV32IZFBFMIN-NEXT: # %bb.1: # %start
; RV32IZFBFMIN-NEXT: mv a3, a1
; RV32IZFBFMIN-NEXT: mv a2, a1
; RV32IZFBFMIN-NEXT: .LBB10_2: # %start
; RV32IZFBFMIN-NEXT: and a0, a2, a0
; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_4
; RV32IZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
; RV32IZFBFMIN-NEXT: flt.s a3, fa5, fs0
; RV32IZFBFMIN-NEXT: beqz a3, .LBB10_4
; RV32IZFBFMIN-NEXT: # %bb.3:
; RV32IZFBFMIN-NEXT: addi a3, a4, -1
; RV32IZFBFMIN-NEXT: addi a2, a4, -1
; RV32IZFBFMIN-NEXT: .LBB10_4: # %start
; RV32IZFBFMIN-NEXT: and a1, a2, a3
; RV32IZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: addi sp, sp, 32
; RV32IZFBFMIN-NEXT: feq.s a1, fs0, fs0
; RV32IZFBFMIN-NEXT: neg a4, a1
; RV32IZFBFMIN-NEXT: and a1, a4, a2
; RV32IZFBFMIN-NEXT: neg a2, a3
; RV32IZFBFMIN-NEXT: neg a3, s0
; RV32IZFBFMIN-NEXT: and a0, a3, a0
; RV32IZFBFMIN-NEXT: or a0, a2, a0
; RV32IZFBFMIN-NEXT: and a0, a4, a0
; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: addi sp, sp, 16
; RV32IZFBFMIN-NEXT: ret
;
; R32IDZFBFMIN-LABEL: fcvt_l_bf16_sat:
; R32IDZFBFMIN: # %bb.0: # %start
; R32IDZFBFMIN-NEXT: addi sp, sp, -32
; R32IDZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: addi sp, sp, -16
; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; R32IDZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
; R32IDZFBFMIN-NEXT: flt.s s0, fa5, fs0
; R32IDZFBFMIN-NEXT: neg s1, s0
; R32IDZFBFMIN-NEXT: lui a0, 913408
; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a0
; R32IDZFBFMIN-NEXT: fle.s s2, fa5, fs0
; R32IDZFBFMIN-NEXT: neg s3, s2
; R32IDZFBFMIN-NEXT: fle.s s0, fa5, fs0
; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0
; R32IDZFBFMIN-NEXT: call __fixsfdi
; R32IDZFBFMIN-NEXT: and a0, s3, a0
; R32IDZFBFMIN-NEXT: or a0, s1, a0
; R32IDZFBFMIN-NEXT: feq.s a2, fs0, fs0
; R32IDZFBFMIN-NEXT: neg a2, a2
; R32IDZFBFMIN-NEXT: lui a4, 524288
; R32IDZFBFMIN-NEXT: lui a3, 524288
; R32IDZFBFMIN-NEXT: beqz s2, .LBB10_2
; R32IDZFBFMIN-NEXT: lui a2, 524288
; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_2
; R32IDZFBFMIN-NEXT: # %bb.1: # %start
; R32IDZFBFMIN-NEXT: mv a3, a1
; R32IDZFBFMIN-NEXT: mv a2, a1
; R32IDZFBFMIN-NEXT: .LBB10_2: # %start
; R32IDZFBFMIN-NEXT: and a0, a2, a0
; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_4
; R32IDZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
; R32IDZFBFMIN-NEXT: flt.s a3, fa5, fs0
; R32IDZFBFMIN-NEXT: beqz a3, .LBB10_4
; R32IDZFBFMIN-NEXT: # %bb.3:
; R32IDZFBFMIN-NEXT: addi a3, a4, -1
; R32IDZFBFMIN-NEXT: addi a2, a4, -1
; R32IDZFBFMIN-NEXT: .LBB10_4: # %start
; R32IDZFBFMIN-NEXT: and a1, a2, a3
; R32IDZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: feq.s a1, fs0, fs0
; R32IDZFBFMIN-NEXT: neg a4, a1
; R32IDZFBFMIN-NEXT: and a1, a4, a2
; R32IDZFBFMIN-NEXT: neg a2, a3
; R32IDZFBFMIN-NEXT: neg a3, s0
; R32IDZFBFMIN-NEXT: and a0, a3, a0
; R32IDZFBFMIN-NEXT: or a0, a2, a0
; R32IDZFBFMIN-NEXT: and a0, a4, a0
; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
; R32IDZFBFMIN-NEXT: addi sp, sp, 32
; R32IDZFBFMIN-NEXT: addi sp, sp, 16
; R32IDZFBFMIN-NEXT: ret
;
; RV32ID-LABEL: fcvt_l_bf16_sat:
Expand Down
Loading

0 comments on commit 73472c5

Please sign in to comment.