From b35a842581f089daa57dd7e6b78ccb08d92709b2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 10 Jun 2021 16:42:29 -0700 Subject: [PATCH] [RISCV] Add test cases that show failure to use some W instructions if they are proceeded by a load. NFC The loads end up becoming sextload/zextload which prevent our isel patterns from finding the sign_extend_inreg or AND instruction we need. The easiest way to fix this is to use computeKnownBits or ComputeNumSignBits in our isel matching to catch this. --- llvm/test/CodeGen/RISCV/double-convert.ll | 170 ++++++++++++++-------- llvm/test/CodeGen/RISCV/float-convert.ll | 162 +++++++++++++-------- llvm/test/CodeGen/RISCV/half-convert.ll | 58 ++++++++ llvm/test/CodeGen/RISCV/rv64zbb.ll | 94 ++++++++++-- 4 files changed, 344 insertions(+), 140 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll index f793a7fb8c5f9..0e016bac8e672 100644 --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -191,6 +191,29 @@ define double @fcvt_d_w(i32 %a) nounwind { ret double %1 } +define double @fcvt_d_w_load(i32* %p) nounwind { +; RV32IFD-LABEL: fcvt_d_w_load: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: lw a0, 0(a0) +; RV32IFD-NEXT: fcvt.d.w ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_w_load: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lw a0, 0(a0) +; RV64IFD-NEXT: fcvt.d.l ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret + %a = load i32, i32* %p + %1 = sitofp i32 %a to double + ret double %1 +} + define double @fcvt_d_wu(i32 %a) nounwind { ; RV32IFD-LABEL: fcvt_d_wu: ; RV32IFD: # %bb.0: @@ -211,6 +234,29 @@ define double @fcvt_d_wu(i32 %a) nounwind { ret double %1 } +define double @fcvt_d_wu_load(i32* %p) nounwind { +; RV32IFD-LABEL: fcvt_d_wu_load: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: lw a0, 0(a0) +; RV32IFD-NEXT: fcvt.d.wu ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_wu_load: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lwu a0, 0(a0) +; RV64IFD-NEXT: fcvt.d.lu ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret + %a = load i32, i32* %p + %1 = uitofp i32 %a to double + ret double %1 +} + define i64 @fcvt_l_d(double %a) nounwind { ; RV32IFD-LABEL: fcvt_l_d: ; RV32IFD: # %bb.0: @@ -241,79 +287,79 @@ define i64 @fcvt_l_d_sat(double %a) nounwind { ; RV32IFD-NEXT: fsd ft0, 8(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call __fixdfdi@plt ; RV32IFD-NEXT: fld ft1, 8(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: lui a2, %hi(.LCPI9_0) -; RV32IFD-NEXT: fld ft0, %lo(.LCPI9_0)(a2) +; RV32IFD-NEXT: lui a2, %hi(.LCPI11_0) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI11_0)(a2) ; RV32IFD-NEXT: fle.d a3, ft0, ft1 ; RV32IFD-NEXT: mv a2, a0 -; RV32IFD-NEXT: bnez a3, .LBB9_2 +; RV32IFD-NEXT: bnez a3, .LBB11_2 ; RV32IFD-NEXT: # %bb.1: # %start ; RV32IFD-NEXT: mv a2, zero -; RV32IFD-NEXT: .LBB9_2: # %start -; RV32IFD-NEXT: lui a0, %hi(.LCPI9_1) -; RV32IFD-NEXT: fld ft0, %lo(.LCPI9_1)(a0) +; RV32IFD-NEXT: .LBB11_2: # %start +; RV32IFD-NEXT: lui a0, %hi(.LCPI11_1) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI11_1)(a0) ; RV32IFD-NEXT: flt.d a4, ft0, ft1 ; RV32IFD-NEXT: addi a0, zero, -1 -; RV32IFD-NEXT: beqz a4, .LBB9_9 +; RV32IFD-NEXT: beqz a4, .LBB11_9 ; RV32IFD-NEXT: # %bb.3: # %start ; RV32IFD-NEXT: feq.d a2, ft1, ft1 -; RV32IFD-NEXT: beqz a2, .LBB9_10 -; RV32IFD-NEXT: .LBB9_4: # %start +; RV32IFD-NEXT: beqz a2, .LBB11_10 +; RV32IFD-NEXT: .LBB11_4: # %start ; RV32IFD-NEXT: lui a5, 524288 -; RV32IFD-NEXT: beqz a3, .LBB9_11 -; RV32IFD-NEXT: .LBB9_5: # %start -; RV32IFD-NEXT: bnez a4, .LBB9_12 -; RV32IFD-NEXT: .LBB9_6: # %start -; RV32IFD-NEXT: bnez a2, .LBB9_8 -; RV32IFD-NEXT: .LBB9_7: # %start +; RV32IFD-NEXT: beqz a3, .LBB11_11 +; RV32IFD-NEXT: .LBB11_5: # %start +; RV32IFD-NEXT: bnez a4, .LBB11_12 +; RV32IFD-NEXT: .LBB11_6: # %start +; RV32IFD-NEXT: bnez a2, .LBB11_8 +; RV32IFD-NEXT: .LBB11_7: # %start ; RV32IFD-NEXT: mv a1, zero -; RV32IFD-NEXT: .LBB9_8: # %start +; RV32IFD-NEXT: .LBB11_8: # %start ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret -; RV32IFD-NEXT: .LBB9_9: # %start +; RV32IFD-NEXT: .LBB11_9: # %start ; RV32IFD-NEXT: mv a0, a2 ; RV32IFD-NEXT: feq.d a2, ft1, ft1 -; RV32IFD-NEXT: bnez a2, .LBB9_4 -; RV32IFD-NEXT: .LBB9_10: # %start +; RV32IFD-NEXT: bnez a2, .LBB11_4 +; RV32IFD-NEXT: .LBB11_10: # %start ; RV32IFD-NEXT: mv a0, zero ; RV32IFD-NEXT: lui a5, 524288 -; RV32IFD-NEXT: bnez a3, .LBB9_5 -; RV32IFD-NEXT: .LBB9_11: # %start +; RV32IFD-NEXT: bnez a3, .LBB11_5 +; RV32IFD-NEXT: .LBB11_11: # %start ; RV32IFD-NEXT: lui a1, 524288 -; RV32IFD-NEXT: beqz a4, .LBB9_6 -; RV32IFD-NEXT: .LBB9_12: +; RV32IFD-NEXT: beqz a4, .LBB11_6 +; RV32IFD-NEXT: .LBB11_12: ; RV32IFD-NEXT: addi a1, a5, -1 -; RV32IFD-NEXT: beqz a2, .LBB9_7 -; RV32IFD-NEXT: j .LBB9_8 +; RV32IFD-NEXT: beqz a2, .LBB11_7 +; RV32IFD-NEXT: j .LBB11_8 ; ; RV64IFD-LABEL: fcvt_l_d_sat: ; RV64IFD: # %bb.0: # %start -; RV64IFD-NEXT: lui a1, %hi(.LCPI9_0) -; RV64IFD-NEXT: fld ft1, %lo(.LCPI9_0)(a1) +; RV64IFD-NEXT: lui a1, %hi(.LCPI11_0) +; RV64IFD-NEXT: fld ft1, %lo(.LCPI11_0)(a1) ; RV64IFD-NEXT: fmv.d.x ft0, a0 ; RV64IFD-NEXT: fle.d a0, ft1, ft0 ; RV64IFD-NEXT: addi a1, zero, -1 -; RV64IFD-NEXT: bnez a0, .LBB9_2 +; RV64IFD-NEXT: bnez a0, .LBB11_2 ; RV64IFD-NEXT: # %bb.1: # %start ; RV64IFD-NEXT: slli a0, a1, 63 -; RV64IFD-NEXT: j .LBB9_3 -; RV64IFD-NEXT: .LBB9_2: +; RV64IFD-NEXT: j .LBB11_3 +; RV64IFD-NEXT: .LBB11_2: ; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz -; RV64IFD-NEXT: .LBB9_3: # %start -; RV64IFD-NEXT: lui a2, %hi(.LCPI9_1) -; RV64IFD-NEXT: fld ft1, %lo(.LCPI9_1)(a2) +; RV64IFD-NEXT: .LBB11_3: # %start +; RV64IFD-NEXT: lui a2, %hi(.LCPI11_1) +; RV64IFD-NEXT: fld ft1, %lo(.LCPI11_1)(a2) ; RV64IFD-NEXT: flt.d a2, ft1, ft0 -; RV64IFD-NEXT: bnez a2, .LBB9_6 +; RV64IFD-NEXT: bnez a2, .LBB11_6 ; RV64IFD-NEXT: # %bb.4: # %start ; RV64IFD-NEXT: feq.d a1, ft0, ft0 -; RV64IFD-NEXT: beqz a1, .LBB9_7 -; RV64IFD-NEXT: .LBB9_5: # %start +; RV64IFD-NEXT: beqz a1, .LBB11_7 +; RV64IFD-NEXT: .LBB11_5: # %start ; RV64IFD-NEXT: ret -; RV64IFD-NEXT: .LBB9_6: +; RV64IFD-NEXT: .LBB11_6: ; RV64IFD-NEXT: srli a0, a1, 1 ; RV64IFD-NEXT: feq.d a1, ft0, ft0 -; RV64IFD-NEXT: bnez a1, .LBB9_5 -; RV64IFD-NEXT: .LBB9_7: # %start +; RV64IFD-NEXT: bnez a1, .LBB11_5 +; RV64IFD-NEXT: .LBB11_7: # %start ; RV64IFD-NEXT: mv a0, zero ; RV64IFD-NEXT: ret start: @@ -355,55 +401,55 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind { ; RV32IFD-NEXT: fcvt.d.w ft0, zero ; RV32IFD-NEXT: fle.d a4, ft0, ft1 ; RV32IFD-NEXT: mv a3, a0 -; RV32IFD-NEXT: bnez a4, .LBB11_2 +; RV32IFD-NEXT: bnez a4, .LBB13_2 ; RV32IFD-NEXT: # %bb.1: # %start ; RV32IFD-NEXT: mv a3, zero -; RV32IFD-NEXT: .LBB11_2: # %start -; RV32IFD-NEXT: lui a0, %hi(.LCPI11_0) -; RV32IFD-NEXT: fld ft0, %lo(.LCPI11_0)(a0) +; RV32IFD-NEXT: .LBB13_2: # %start +; RV32IFD-NEXT: lui a0, %hi(.LCPI13_0) +; RV32IFD-NEXT: fld ft0, %lo(.LCPI13_0)(a0) ; RV32IFD-NEXT: flt.d a5, ft0, ft1 ; RV32IFD-NEXT: addi a2, zero, -1 ; RV32IFD-NEXT: addi a0, zero, -1 -; RV32IFD-NEXT: beqz a5, .LBB11_7 +; RV32IFD-NEXT: beqz a5, .LBB13_7 ; RV32IFD-NEXT: # %bb.3: # %start -; RV32IFD-NEXT: beqz a4, .LBB11_8 -; RV32IFD-NEXT: .LBB11_4: # %start -; RV32IFD-NEXT: bnez a5, .LBB11_6 -; RV32IFD-NEXT: .LBB11_5: # %start +; RV32IFD-NEXT: beqz a4, .LBB13_8 +; RV32IFD-NEXT: .LBB13_4: # %start +; RV32IFD-NEXT: bnez a5, .LBB13_6 +; RV32IFD-NEXT: .LBB13_5: # %start ; RV32IFD-NEXT: mv a2, a1 -; RV32IFD-NEXT: .LBB11_6: # %start +; RV32IFD-NEXT: .LBB13_6: # %start ; RV32IFD-NEXT: mv a1, a2 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret -; RV32IFD-NEXT: .LBB11_7: # %start +; RV32IFD-NEXT: .LBB13_7: # %start ; RV32IFD-NEXT: mv a0, a3 -; RV32IFD-NEXT: bnez a4, .LBB11_4 -; RV32IFD-NEXT: .LBB11_8: # %start +; RV32IFD-NEXT: bnez a4, .LBB13_4 +; RV32IFD-NEXT: .LBB13_8: # %start ; RV32IFD-NEXT: mv a1, zero -; RV32IFD-NEXT: beqz a5, .LBB11_5 -; RV32IFD-NEXT: j .LBB11_6 +; RV32IFD-NEXT: beqz a5, .LBB13_5 +; RV32IFD-NEXT: j .LBB13_6 ; ; RV64IFD-LABEL: fcvt_lu_d_sat: ; RV64IFD: # %bb.0: # %start ; RV64IFD-NEXT: fmv.d.x ft0, a0 ; RV64IFD-NEXT: fmv.d.x ft1, zero ; RV64IFD-NEXT: fle.d a0, ft1, ft0 -; RV64IFD-NEXT: bnez a0, .LBB11_2 +; RV64IFD-NEXT: bnez a0, .LBB13_2 ; RV64IFD-NEXT: # %bb.1: # %start ; RV64IFD-NEXT: mv a1, zero -; RV64IFD-NEXT: j .LBB11_3 -; RV64IFD-NEXT: .LBB11_2: +; RV64IFD-NEXT: j .LBB13_3 +; RV64IFD-NEXT: .LBB13_2: ; RV64IFD-NEXT: fcvt.lu.d a1, ft0, rtz -; RV64IFD-NEXT: .LBB11_3: # %start -; RV64IFD-NEXT: lui a0, %hi(.LCPI11_0) -; RV64IFD-NEXT: fld ft1, %lo(.LCPI11_0)(a0) +; RV64IFD-NEXT: .LBB13_3: # %start +; RV64IFD-NEXT: lui a0, %hi(.LCPI13_0) +; RV64IFD-NEXT: fld ft1, %lo(.LCPI13_0)(a0) ; RV64IFD-NEXT: flt.d a2, ft1, ft0 ; RV64IFD-NEXT: addi a0, zero, -1 -; RV64IFD-NEXT: bnez a2, .LBB11_5 +; RV64IFD-NEXT: bnez a2, .LBB13_5 ; RV64IFD-NEXT: # %bb.4: # %start ; RV64IFD-NEXT: mv a0, a1 -; RV64IFD-NEXT: .LBB11_5: # %start +; RV64IFD-NEXT: .LBB13_5: # %start ; RV64IFD-NEXT: ret start: %0 = tail call i64 @llvm.fptoui.sat.i64.f64(double %a) diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll index c362093f4904c..a395743e27f15 100644 --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -196,6 +196,25 @@ define float @fcvt_s_w(i32 %a) nounwind { ret float %1 } +define float @fcvt_s_w_load(i32* %p) nounwind { +; RV32IF-LABEL: fcvt_s_w_load: +; RV32IF: # %bb.0: +; RV32IF-NEXT: lw a0, 0(a0) +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_w_load: +; RV64IF: # %bb.0: +; RV64IF-NEXT: lw a0, 0(a0) +; RV64IF-NEXT: fcvt.s.l ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %a = load i32, i32* %p + %1 = sitofp i32 %a to float + ret float %1 +} + define float @fcvt_s_wu(i32 %a) nounwind { ; RV32IF-LABEL: fcvt_s_wu: ; RV32IF: # %bb.0: @@ -212,6 +231,25 @@ define float @fcvt_s_wu(i32 %a) nounwind { ret float %1 } +define float @fcvt_s_wu_load(i32* %p) nounwind { +; RV32IF-LABEL: fcvt_s_wu_load: +; RV32IF: # %bb.0: +; RV32IF-NEXT: lw a0, 0(a0) +; RV32IF-NEXT: fcvt.s.wu ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_wu_load: +; RV64IF: # %bb.0: +; RV64IF-NEXT: lwu a0, 0(a0) +; RV64IF-NEXT: fcvt.s.lu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %a = load i32, i32* %p + %1 = uitofp i32 %a to float + ret float %1 +} + define float @fmv_w_x(i32 %a, i32 %b) nounwind { ; RV32IF-LABEL: fmv_w_x: ; RV32IF: # %bb.0: @@ -260,85 +298,85 @@ define i64 @fcvt_l_s_sat(float %a) nounwind { ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: lui a1, %hi(.LCPI9_0) -; RV32IF-NEXT: flw ft0, %lo(.LCPI9_0)(a1) +; RV32IF-NEXT: lui a1, %hi(.LCPI11_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI11_0)(a1) ; RV32IF-NEXT: fmv.w.x ft1, a0 ; RV32IF-NEXT: fsw ft1, 4(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fle.s s0, ft0, ft1 ; RV32IF-NEXT: call __fixsfdi@plt ; RV32IF-NEXT: mv a2, a0 -; RV32IF-NEXT: bnez s0, .LBB9_2 +; RV32IF-NEXT: bnez s0, .LBB11_2 ; RV32IF-NEXT: # %bb.1: # %start ; RV32IF-NEXT: mv a2, zero -; RV32IF-NEXT: .LBB9_2: # %start -; RV32IF-NEXT: lui a0, %hi(.LCPI9_1) -; RV32IF-NEXT: flw ft0, %lo(.LCPI9_1)(a0) +; RV32IF-NEXT: .LBB11_2: # %start +; RV32IF-NEXT: lui a0, %hi(.LCPI11_1) +; RV32IF-NEXT: flw ft0, %lo(.LCPI11_1)(a0) ; RV32IF-NEXT: flw ft1, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flt.s a3, ft0, ft1 ; RV32IF-NEXT: fmv.s ft0, ft1 ; RV32IF-NEXT: addi a0, zero, -1 -; RV32IF-NEXT: beqz a3, .LBB9_9 +; RV32IF-NEXT: beqz a3, .LBB11_9 ; RV32IF-NEXT: # %bb.3: # %start ; RV32IF-NEXT: feq.s a2, ft0, ft0 -; RV32IF-NEXT: beqz a2, .LBB9_10 -; RV32IF-NEXT: .LBB9_4: # %start +; RV32IF-NEXT: beqz a2, .LBB11_10 +; RV32IF-NEXT: .LBB11_4: # %start ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: beqz s0, .LBB9_11 -; RV32IF-NEXT: .LBB9_5: # %start -; RV32IF-NEXT: bnez a3, .LBB9_12 -; RV32IF-NEXT: .LBB9_6: # %start -; RV32IF-NEXT: bnez a2, .LBB9_8 -; RV32IF-NEXT: .LBB9_7: # %start +; RV32IF-NEXT: beqz s0, .LBB11_11 +; RV32IF-NEXT: .LBB11_5: # %start +; RV32IF-NEXT: bnez a3, .LBB11_12 +; RV32IF-NEXT: .LBB11_6: # %start +; RV32IF-NEXT: bnez a2, .LBB11_8 +; RV32IF-NEXT: .LBB11_7: # %start ; RV32IF-NEXT: mv a1, zero -; RV32IF-NEXT: .LBB9_8: # %start +; RV32IF-NEXT: .LBB11_8: # %start ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret -; RV32IF-NEXT: .LBB9_9: # %start +; RV32IF-NEXT: .LBB11_9: # %start ; RV32IF-NEXT: mv a0, a2 ; RV32IF-NEXT: feq.s a2, ft0, ft0 -; RV32IF-NEXT: bnez a2, .LBB9_4 -; RV32IF-NEXT: .LBB9_10: # %start +; RV32IF-NEXT: bnez a2, .LBB11_4 +; RV32IF-NEXT: .LBB11_10: # %start ; RV32IF-NEXT: mv a0, zero ; RV32IF-NEXT: lui a4, 524288 -; RV32IF-NEXT: bnez s0, .LBB9_5 -; RV32IF-NEXT: .LBB9_11: # %start +; RV32IF-NEXT: bnez s0, .LBB11_5 +; RV32IF-NEXT: .LBB11_11: # %start ; RV32IF-NEXT: lui a1, 524288 -; RV32IF-NEXT: beqz a3, .LBB9_6 -; RV32IF-NEXT: .LBB9_12: +; RV32IF-NEXT: beqz a3, .LBB11_6 +; RV32IF-NEXT: .LBB11_12: ; RV32IF-NEXT: addi a1, a4, -1 -; RV32IF-NEXT: beqz a2, .LBB9_7 -; RV32IF-NEXT: j .LBB9_8 +; RV32IF-NEXT: beqz a2, .LBB11_7 +; RV32IF-NEXT: j .LBB11_8 ; ; RV64IF-LABEL: fcvt_l_s_sat: ; RV64IF: # %bb.0: # %start -; RV64IF-NEXT: lui a1, %hi(.LCPI9_0) -; RV64IF-NEXT: flw ft1, %lo(.LCPI9_0)(a1) +; RV64IF-NEXT: lui a1, %hi(.LCPI11_0) +; RV64IF-NEXT: flw ft1, %lo(.LCPI11_0)(a1) ; RV64IF-NEXT: fmv.w.x ft0, a0 ; RV64IF-NEXT: fle.s a0, ft1, ft0 ; RV64IF-NEXT: addi a1, zero, -1 -; RV64IF-NEXT: bnez a0, .LBB9_2 +; RV64IF-NEXT: bnez a0, .LBB11_2 ; RV64IF-NEXT: # %bb.1: # %start ; RV64IF-NEXT: slli a0, a1, 63 -; RV64IF-NEXT: j .LBB9_3 -; RV64IF-NEXT: .LBB9_2: +; RV64IF-NEXT: j .LBB11_3 +; RV64IF-NEXT: .LBB11_2: ; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz -; RV64IF-NEXT: .LBB9_3: # %start -; RV64IF-NEXT: lui a2, %hi(.LCPI9_1) -; RV64IF-NEXT: flw ft1, %lo(.LCPI9_1)(a2) +; RV64IF-NEXT: .LBB11_3: # %start +; RV64IF-NEXT: lui a2, %hi(.LCPI11_1) +; RV64IF-NEXT: flw ft1, %lo(.LCPI11_1)(a2) ; RV64IF-NEXT: flt.s a2, ft1, ft0 -; RV64IF-NEXT: bnez a2, .LBB9_6 +; RV64IF-NEXT: bnez a2, .LBB11_6 ; RV64IF-NEXT: # %bb.4: # %start ; RV64IF-NEXT: feq.s a1, ft0, ft0 -; RV64IF-NEXT: beqz a1, .LBB9_7 -; RV64IF-NEXT: .LBB9_5: # %start +; RV64IF-NEXT: beqz a1, .LBB11_7 +; RV64IF-NEXT: .LBB11_5: # %start ; RV64IF-NEXT: ret -; RV64IF-NEXT: .LBB9_6: +; RV64IF-NEXT: .LBB11_6: ; RV64IF-NEXT: srli a0, a1, 1 ; RV64IF-NEXT: feq.s a1, ft0, ft0 -; RV64IF-NEXT: bnez a1, .LBB9_5 -; RV64IF-NEXT: .LBB9_7: # %start +; RV64IF-NEXT: bnez a1, .LBB11_5 +; RV64IF-NEXT: .LBB11_7: # %start ; RV64IF-NEXT: mv a0, zero ; RV64IF-NEXT: ret start: @@ -378,57 +416,57 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind { ; RV32IF-NEXT: fle.s s0, ft0, ft1 ; RV32IF-NEXT: call __fixunssfdi@plt ; RV32IF-NEXT: mv a3, a0 -; RV32IF-NEXT: bnez s0, .LBB11_2 +; RV32IF-NEXT: bnez s0, .LBB13_2 ; RV32IF-NEXT: # %bb.1: # %start ; RV32IF-NEXT: mv a3, zero -; RV32IF-NEXT: .LBB11_2: # %start -; RV32IF-NEXT: lui a0, %hi(.LCPI11_0) -; RV32IF-NEXT: flw ft0, %lo(.LCPI11_0)(a0) +; RV32IF-NEXT: .LBB13_2: # %start +; RV32IF-NEXT: lui a0, %hi(.LCPI13_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI13_0)(a0) ; RV32IF-NEXT: flw ft1, 4(sp) # 4-byte Folded Reload ; RV32IF-NEXT: flt.s a4, ft0, ft1 ; RV32IF-NEXT: addi a2, zero, -1 ; RV32IF-NEXT: addi a0, zero, -1 -; RV32IF-NEXT: beqz a4, .LBB11_7 +; RV32IF-NEXT: beqz a4, .LBB13_7 ; RV32IF-NEXT: # %bb.3: # %start -; RV32IF-NEXT: beqz s0, .LBB11_8 -; RV32IF-NEXT: .LBB11_4: # %start -; RV32IF-NEXT: bnez a4, .LBB11_6 -; RV32IF-NEXT: .LBB11_5: # %start +; RV32IF-NEXT: beqz s0, .LBB13_8 +; RV32IF-NEXT: .LBB13_4: # %start +; RV32IF-NEXT: bnez a4, .LBB13_6 +; RV32IF-NEXT: .LBB13_5: # %start ; RV32IF-NEXT: mv a2, a1 -; RV32IF-NEXT: .LBB11_6: # %start +; RV32IF-NEXT: .LBB13_6: # %start ; RV32IF-NEXT: mv a1, a2 ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret -; RV32IF-NEXT: .LBB11_7: # %start +; RV32IF-NEXT: .LBB13_7: # %start ; RV32IF-NEXT: mv a0, a3 -; RV32IF-NEXT: bnez s0, .LBB11_4 -; RV32IF-NEXT: .LBB11_8: # %start +; RV32IF-NEXT: bnez s0, .LBB13_4 +; RV32IF-NEXT: .LBB13_8: # %start ; RV32IF-NEXT: mv a1, zero -; RV32IF-NEXT: beqz a4, .LBB11_5 -; RV32IF-NEXT: j .LBB11_6 +; RV32IF-NEXT: beqz a4, .LBB13_5 +; RV32IF-NEXT: j .LBB13_6 ; ; RV64IF-LABEL: fcvt_lu_s_sat: ; RV64IF: # %bb.0: # %start ; RV64IF-NEXT: fmv.w.x ft0, a0 ; RV64IF-NEXT: fmv.w.x ft1, zero ; RV64IF-NEXT: fle.s a0, ft1, ft0 -; RV64IF-NEXT: bnez a0, .LBB11_2 +; RV64IF-NEXT: bnez a0, .LBB13_2 ; RV64IF-NEXT: # %bb.1: # %start ; RV64IF-NEXT: mv a1, zero -; RV64IF-NEXT: j .LBB11_3 -; RV64IF-NEXT: .LBB11_2: +; RV64IF-NEXT: j .LBB13_3 +; RV64IF-NEXT: .LBB13_2: ; RV64IF-NEXT: fcvt.lu.s a1, ft0, rtz -; RV64IF-NEXT: .LBB11_3: # %start -; RV64IF-NEXT: lui a0, %hi(.LCPI11_0) -; RV64IF-NEXT: flw ft1, %lo(.LCPI11_0)(a0) +; RV64IF-NEXT: .LBB13_3: # %start +; RV64IF-NEXT: lui a0, %hi(.LCPI13_0) +; RV64IF-NEXT: flw ft1, %lo(.LCPI13_0)(a0) ; RV64IF-NEXT: flt.s a2, ft1, ft0 ; RV64IF-NEXT: addi a0, zero, -1 -; RV64IF-NEXT: bnez a2, .LBB11_5 +; RV64IF-NEXT: bnez a2, .LBB13_5 ; RV64IF-NEXT: # %bb.4: # %start ; RV64IF-NEXT: mv a0, a1 -; RV64IF-NEXT: .LBB11_5: # %start +; RV64IF-NEXT: .LBB13_5: # %start ; RV64IF-NEXT: ret start: %0 = tail call i64 @llvm.fptoui.sat.i64.f32(float %a) diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll index 87b811727c395..d7d29674cb653 100644 --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -976,6 +976,35 @@ define half @fcvt_h_w(i32 %a) nounwind { ret half %1 } +define half @fcvt_h_w_load(i32* %p) nounwind { +; RV32IZFH-LABEL: fcvt_h_w_load: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: lw a0, 0(a0) +; RV32IZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_w_load: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: lw a0, 0(a0) +; RV32IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_w_load: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: lw a0, 0(a0) +; RV64IZFH-NEXT: fcvt.h.l fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_w_load: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: lw a0, 0(a0) +; RV64IDZFH-NEXT: fcvt.h.l fa0, a0 +; RV64IDZFH-NEXT: ret + %a = load i32, i32* %p + %1 = sitofp i32 %a to half + ret half %1 +} + define half @fcvt_h_wu(i32 %a) nounwind { ; RV32IZFH-LABEL: fcvt_h_wu: ; RV32IZFH: # %bb.0: @@ -1000,6 +1029,35 @@ define half @fcvt_h_wu(i32 %a) nounwind { ret half %1 } +define half @fcvt_h_wu_load(i32* %p) nounwind { +; RV32IZFH-LABEL: fcvt_h_wu_load: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: lw a0, 0(a0) +; RV32IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_wu_load: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: lw a0, 0(a0) +; RV32IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_wu_load: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: lwu a0, 0(a0) +; RV64IZFH-NEXT: fcvt.h.lu fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_wu_load: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: lwu a0, 0(a0) +; RV64IDZFH-NEXT: fcvt.h.lu fa0, a0 +; RV64IDZFH-NEXT: ret + %a = load i32, i32* %p + %1 = uitofp i32 %a to half + ret half %1 +} + define half @fcvt_h_l(i64 %a) nounwind { ; RV32IZFH-LABEL: fcvt_h_l: ; RV32IZFH: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll index 3aea386ec530b..3b3f4f9ee1d2d 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -1011,6 +1011,68 @@ define signext i32 @ctpop_i32(i32 signext %a) nounwind { ret i32 %1 } +define signext i32 @ctpop_i32_load(i32* %p) nounwind { +; RV64I-LABEL: ctpop_i32_load: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lwu a0, 0(a0) +; RV64I-NEXT: srli a1, a0, 1 +; RV64I-NEXT: lui a2, 349525 +; RV64I-NEXT: addiw a2, a2, 1365 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 2 +; RV64I-NEXT: lui a2, 13107 +; RV64I-NEXT: addiw a2, a2, 819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: slli a2, a2, 12 +; RV64I-NEXT: addi a2, a2, 819 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srli a1, a0, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lui a1, 3855 +; RV64I-NEXT: addiw a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, 241 +; RV64I-NEXT: slli a1, a1, 12 +; RV64I-NEXT: addi a1, a1, -241 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: addi a1, a1, 257 +; RV64I-NEXT: call __muldi3@plt +; RV64I-NEXT: srli a0, a0, 56 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IB-LABEL: ctpop_i32_load: +; RV64IB: # %bb.0: +; RV64IB-NEXT: lwu a0, 0(a0) +; RV64IB-NEXT: cpop a0, a0 +; RV64IB-NEXT: ret +; +; RV64IBB-LABEL: ctpop_i32_load: +; RV64IBB: # %bb.0: +; RV64IBB-NEXT: lwu a0, 0(a0) +; RV64IBB-NEXT: cpop a0, a0 +; RV64IBB-NEXT: ret + %a = load i32, i32* %p + %1 = call i32 @llvm.ctpop.i32(i32 %a) + ret i32 %1 +} + declare i64 @llvm.ctpop.i64(i64) define i64 @ctpop_i64(i64 %a) nounwind { @@ -1164,10 +1226,10 @@ define i64 @sexth_i64(i64 %a) nounwind { define signext i32 @min_i32(i32 signext %a, i32 signext %b) nounwind { ; RV64I-LABEL: min_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: blt a0, a1, .LBB17_2 +; RV64I-NEXT: blt a0, a1, .LBB18_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB17_2: +; RV64I-NEXT: .LBB18_2: ; RV64I-NEXT: ret ; ; RV64IB-LABEL: min_i32: @@ -1187,10 +1249,10 @@ define signext i32 @min_i32(i32 signext %a, i32 signext %b) nounwind { define i64 @min_i64(i64 %a, i64 %b) nounwind { ; RV64I-LABEL: min_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: blt a0, a1, .LBB18_2 +; RV64I-NEXT: blt a0, a1, .LBB19_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB18_2: +; RV64I-NEXT: .LBB19_2: ; RV64I-NEXT: ret ; ; RV64IB-LABEL: min_i64: @@ -1210,10 +1272,10 @@ define i64 @min_i64(i64 %a, i64 %b) nounwind { define signext i32 @max_i32(i32 signext %a, i32 signext %b) nounwind { ; RV64I-LABEL: max_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: blt a1, a0, .LBB19_2 +; RV64I-NEXT: blt a1, a0, .LBB20_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB19_2: +; RV64I-NEXT: .LBB20_2: ; RV64I-NEXT: ret ; ; RV64IB-LABEL: max_i32: @@ -1233,10 +1295,10 @@ define signext i32 @max_i32(i32 signext %a, i32 signext %b) nounwind { define i64 @max_i64(i64 %a, i64 %b) nounwind { ; RV64I-LABEL: max_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: blt a1, a0, .LBB20_2 +; RV64I-NEXT: blt a1, a0, .LBB21_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB20_2: +; RV64I-NEXT: .LBB21_2: ; RV64I-NEXT: ret ; ; RV64IB-LABEL: max_i64: @@ -1256,10 +1318,10 @@ define i64 @max_i64(i64 %a, i64 %b) nounwind { define signext i32 @minu_i32(i32 signext %a, i32 signext %b) nounwind { ; RV64I-LABEL: minu_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: bltu a0, a1, .LBB21_2 +; RV64I-NEXT: bltu a0, a1, .LBB22_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB21_2: +; RV64I-NEXT: .LBB22_2: ; RV64I-NEXT: ret ; ; RV64IB-LABEL: minu_i32: @@ -1279,10 +1341,10 @@ define signext i32 @minu_i32(i32 signext %a, i32 signext %b) nounwind { define i64 @minu_i64(i64 %a, i64 %b) nounwind { ; RV64I-LABEL: minu_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: bltu a0, a1, .LBB22_2 +; RV64I-NEXT: bltu a0, a1, .LBB23_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB22_2: +; RV64I-NEXT: .LBB23_2: ; RV64I-NEXT: ret ; ; RV64IB-LABEL: minu_i64: @@ -1302,10 +1364,10 @@ define i64 @minu_i64(i64 %a, i64 %b) nounwind { define signext i32 @maxu_i32(i32 signext %a, i32 signext %b) nounwind { ; RV64I-LABEL: maxu_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: bltu a1, a0, .LBB23_2 +; RV64I-NEXT: bltu a1, a0, .LBB24_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB23_2: +; RV64I-NEXT: .LBB24_2: ; RV64I-NEXT: ret ; ; RV64IB-LABEL: maxu_i32: @@ -1325,10 +1387,10 @@ define signext i32 @maxu_i32(i32 signext %a, i32 signext %b) nounwind { define i64 @maxu_i64(i64 %a, i64 %b) nounwind { ; RV64I-LABEL: maxu_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: bltu a1, a0, .LBB24_2 +; RV64I-NEXT: bltu a1, a0, .LBB25_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: .LBB24_2: +; RV64I-NEXT: .LBB25_2: ; RV64I-NEXT: ret ; ; RV64IB-LABEL: maxu_i64: