108 changes: 48 additions & 60 deletions llvm/test/CodeGen/RISCV/bfloat-convert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -456,92 +456,80 @@ define i64 @fcvt_l_bf16(bfloat %a) nounwind {
define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
; RV32IZFBFMIN-LABEL: fcvt_l_bf16_sat:
; RV32IZFBFMIN: # %bb.0: # %start
; RV32IZFBFMIN-NEXT: addi sp, sp, -32
; RV32IZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32IZFBFMIN-NEXT: addi sp, sp, -16
; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
; RV32IZFBFMIN-NEXT: flt.s s0, fa5, fs0
; RV32IZFBFMIN-NEXT: neg s1, s0
; RV32IZFBFMIN-NEXT: lui a0, 913408
; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a0
; RV32IZFBFMIN-NEXT: fle.s s2, fa5, fs0
; RV32IZFBFMIN-NEXT: neg s3, s2
; RV32IZFBFMIN-NEXT: fle.s s0, fa5, fs0
; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0
; RV32IZFBFMIN-NEXT: call __fixsfdi
; RV32IZFBFMIN-NEXT: and a0, s3, a0
; RV32IZFBFMIN-NEXT: or a0, s1, a0
; RV32IZFBFMIN-NEXT: feq.s a2, fs0, fs0
; RV32IZFBFMIN-NEXT: neg a2, a2
; RV32IZFBFMIN-NEXT: lui a4, 524288
; RV32IZFBFMIN-NEXT: lui a3, 524288
; RV32IZFBFMIN-NEXT: beqz s2, .LBB10_2
; RV32IZFBFMIN-NEXT: lui a2, 524288
; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_2
; RV32IZFBFMIN-NEXT: # %bb.1: # %start
; RV32IZFBFMIN-NEXT: mv a3, a1
; RV32IZFBFMIN-NEXT: mv a2, a1
; RV32IZFBFMIN-NEXT: .LBB10_2: # %start
; RV32IZFBFMIN-NEXT: and a0, a2, a0
; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_4
; RV32IZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
; RV32IZFBFMIN-NEXT: flt.s a3, fa5, fs0
; RV32IZFBFMIN-NEXT: beqz a3, .LBB10_4
; RV32IZFBFMIN-NEXT: # %bb.3:
; RV32IZFBFMIN-NEXT: addi a3, a4, -1
; RV32IZFBFMIN-NEXT: addi a2, a4, -1
; RV32IZFBFMIN-NEXT: .LBB10_4: # %start
; RV32IZFBFMIN-NEXT: and a1, a2, a3
; RV32IZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: addi sp, sp, 32
; RV32IZFBFMIN-NEXT: feq.s a1, fs0, fs0
; RV32IZFBFMIN-NEXT: neg a4, a1
; RV32IZFBFMIN-NEXT: and a1, a4, a2
; RV32IZFBFMIN-NEXT: neg a2, a3
; RV32IZFBFMIN-NEXT: neg a3, s0
; RV32IZFBFMIN-NEXT: and a0, a3, a0
; RV32IZFBFMIN-NEXT: or a0, a2, a0
; RV32IZFBFMIN-NEXT: and a0, a4, a0
; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
; RV32IZFBFMIN-NEXT: addi sp, sp, 16
; RV32IZFBFMIN-NEXT: ret
;
; R32IDZFBFMIN-LABEL: fcvt_l_bf16_sat:
; R32IDZFBFMIN: # %bb.0: # %start
; R32IDZFBFMIN-NEXT: addi sp, sp, -32
; R32IDZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: addi sp, sp, -16
; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; R32IDZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
; R32IDZFBFMIN-NEXT: flt.s s0, fa5, fs0
; R32IDZFBFMIN-NEXT: neg s1, s0
; R32IDZFBFMIN-NEXT: lui a0, 913408
; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a0
; R32IDZFBFMIN-NEXT: fle.s s2, fa5, fs0
; R32IDZFBFMIN-NEXT: neg s3, s2
; R32IDZFBFMIN-NEXT: fle.s s0, fa5, fs0
; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0
; R32IDZFBFMIN-NEXT: call __fixsfdi
; R32IDZFBFMIN-NEXT: and a0, s3, a0
; R32IDZFBFMIN-NEXT: or a0, s1, a0
; R32IDZFBFMIN-NEXT: feq.s a2, fs0, fs0
; R32IDZFBFMIN-NEXT: neg a2, a2
; R32IDZFBFMIN-NEXT: lui a4, 524288
; R32IDZFBFMIN-NEXT: lui a3, 524288
; R32IDZFBFMIN-NEXT: beqz s2, .LBB10_2
; R32IDZFBFMIN-NEXT: lui a2, 524288
; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_2
; R32IDZFBFMIN-NEXT: # %bb.1: # %start
; R32IDZFBFMIN-NEXT: mv a3, a1
; R32IDZFBFMIN-NEXT: mv a2, a1
; R32IDZFBFMIN-NEXT: .LBB10_2: # %start
; R32IDZFBFMIN-NEXT: and a0, a2, a0
; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_4
; R32IDZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
; R32IDZFBFMIN-NEXT: flt.s a3, fa5, fs0
; R32IDZFBFMIN-NEXT: beqz a3, .LBB10_4
; R32IDZFBFMIN-NEXT: # %bb.3:
; R32IDZFBFMIN-NEXT: addi a3, a4, -1
; R32IDZFBFMIN-NEXT: addi a2, a4, -1
; R32IDZFBFMIN-NEXT: .LBB10_4: # %start
; R32IDZFBFMIN-NEXT: and a1, a2, a3
; R32IDZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: feq.s a1, fs0, fs0
; R32IDZFBFMIN-NEXT: neg a4, a1
; R32IDZFBFMIN-NEXT: and a1, a4, a2
; R32IDZFBFMIN-NEXT: neg a2, a3
; R32IDZFBFMIN-NEXT: neg a3, s0
; R32IDZFBFMIN-NEXT: and a0, a3, a0
; R32IDZFBFMIN-NEXT: or a0, a2, a0
; R32IDZFBFMIN-NEXT: and a0, a4, a0
; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
; R32IDZFBFMIN-NEXT: addi sp, sp, 32
; R32IDZFBFMIN-NEXT: addi sp, sp, 16
; R32IDZFBFMIN-NEXT: ret
;
; RV32ID-LABEL: fcvt_l_bf16_sat:
Expand Down
111 changes: 53 additions & 58 deletions llvm/test/CodeGen/RISCV/double-convert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -692,28 +692,27 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
; RV32IFD-NEXT: fmv.d fs0, fa0
; RV32IFD-NEXT: fle.d s0, fa5, fa0
; RV32IFD-NEXT: call __fixdfdi
; RV32IFD-NEXT: lui a3, 524288
; RV32IFD-NEXT: li a4, 1
; RV32IFD-NEXT: lui a4, 524288
; RV32IFD-NEXT: lui a2, 524288
; RV32IFD-NEXT: bne s0, a4, .LBB12_2
; RV32IFD-NEXT: beqz s0, .LBB12_2
; RV32IFD-NEXT: # %bb.1: # %start
; RV32IFD-NEXT: mv a2, a1
; RV32IFD-NEXT: .LBB12_2: # %start
; RV32IFD-NEXT: lui a1, %hi(.LCPI12_1)
; RV32IFD-NEXT: fld fa5, %lo(.LCPI12_1)(a1)
; RV32IFD-NEXT: flt.d a4, fa5, fs0
; RV32IFD-NEXT: beqz a4, .LBB12_4
; RV32IFD-NEXT: flt.d a3, fa5, fs0
; RV32IFD-NEXT: beqz a3, .LBB12_4
; RV32IFD-NEXT: # %bb.3:
; RV32IFD-NEXT: addi a2, a3, -1
; RV32IFD-NEXT: addi a2, a4, -1
; RV32IFD-NEXT: .LBB12_4: # %start
; RV32IFD-NEXT: feq.d a1, fs0, fs0
; RV32IFD-NEXT: neg a3, a1
; RV32IFD-NEXT: and a1, a3, a2
; RV32IFD-NEXT: neg a2, a4
; RV32IFD-NEXT: neg a4, s0
; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: neg a4, a1
; RV32IFD-NEXT: and a1, a4, a2
; RV32IFD-NEXT: neg a2, a3
; RV32IFD-NEXT: neg a3, s0
; RV32IFD-NEXT: and a0, a3, a0
; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
Expand Down Expand Up @@ -790,33 +789,32 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a1
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: lui a3, 278016
; RV32I-NEXT: addi a3, a3, -1
; RV32I-NEXT: li a2, -1
; RV32I-NEXT: call __gtdf2
; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: lui a3, 802304
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: li a2, 0
; RV32I-NEXT: call __gedf2
; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __fixdfdi
; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: mv s4, a1
; RV32I-NEXT: lui s6, 524288
; RV32I-NEXT: bgez s2, .LBB12_2
; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: mv s5, a1
; RV32I-NEXT: lui a0, 524288
; RV32I-NEXT: bgez s3, .LBB12_2
; RV32I-NEXT: # %bb.1: # %start
; RV32I-NEXT: lui s4, 524288
; RV32I-NEXT: lui s5, 524288
; RV32I-NEXT: .LBB12_2: # %start
; RV32I-NEXT: lui a3, 278016
; RV32I-NEXT: addi a3, a3, -1
; RV32I-NEXT: li a2, -1
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
; RV32I-NEXT: call __gtdf2
; RV32I-NEXT: mv s5, a0
; RV32I-NEXT: blez a0, .LBB12_4
; RV32I-NEXT: blez s2, .LBB12_4
; RV32I-NEXT: # %bb.3: # %start
; RV32I-NEXT: addi s4, s6, -1
; RV32I-NEXT: addi s5, a0, -1
; RV32I-NEXT: .LBB12_4: # %start
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: mv a1, s0
Expand All @@ -825,11 +823,11 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
; RV32I-NEXT: call __unorddf2
; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: addi a0, a0, -1
; RV32I-NEXT: and a1, a0, s4
; RV32I-NEXT: slti a2, s2, 0
; RV32I-NEXT: and a1, a0, s5
; RV32I-NEXT: slti a2, s3, 0
; RV32I-NEXT: addi a2, a2, -1
; RV32I-NEXT: and a2, a2, s3
; RV32I-NEXT: sgtz a3, s5
; RV32I-NEXT: and a2, a2, s4
; RV32I-NEXT: sgtz a3, s2
; RV32I-NEXT: neg a3, a3
; RV32I-NEXT: or a2, a3, a2
; RV32I-NEXT: and a0, a0, a2
Expand All @@ -840,7 +838,6 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s5, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s6, 0(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
Expand Down Expand Up @@ -949,22 +946,23 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind {
; RV32IFD-NEXT: addi sp, sp, -16
; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: lui a0, %hi(.LCPI14_0)
; RV32IFD-NEXT: fld fa5, %lo(.LCPI14_0)(a0)
; RV32IFD-NEXT: flt.d a0, fa5, fa0
; RV32IFD-NEXT: neg s0, a0
; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fmv.d fs0, fa0
; RV32IFD-NEXT: fcvt.d.w fa5, zero
; RV32IFD-NEXT: fle.d a0, fa5, fa0
; RV32IFD-NEXT: neg s1, a0
; RV32IFD-NEXT: neg s0, a0
; RV32IFD-NEXT: call __fixunsdfdi
; RV32IFD-NEXT: and a0, s1, a0
; RV32IFD-NEXT: or a0, s0, a0
; RV32IFD-NEXT: and a1, s1, a1
; RV32IFD-NEXT: or a1, s0, a1
; RV32IFD-NEXT: lui a2, %hi(.LCPI14_0)
; RV32IFD-NEXT: fld fa5, %lo(.LCPI14_0)(a2)
; RV32IFD-NEXT: and a0, s0, a0
; RV32IFD-NEXT: flt.d a2, fa5, fs0
; RV32IFD-NEXT: neg a2, a2
; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: and a1, s0, a1
; RV32IFD-NEXT: or a1, a2, a1
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 16
; RV32IFD-NEXT: ret
;
Expand All @@ -983,27 +981,24 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind {
; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: mv s1, a1
; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero
; RV32IZFINXZDINX-NEXT: mv s0, a0
; RV32IZFINXZDINX-NEXT: fle.d a0, a2, s0
; RV32IZFINXZDINX-NEXT: neg s2, a0
; RV32IZFINXZDINX-NEXT: mv a0, s0
; RV32IZFINXZDINX-NEXT: call __fixunsdfdi
; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI14_0)
; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI14_0+4)(a2)
; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI14_0)(a2)
; RV32IZFINXZDINX-NEXT: and a0, s2, a0
; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0
; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero
; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI14_0)
; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI14_0+4)(a4)
; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI14_0)(a4)
; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0
; RV32IZFINXZDINX-NEXT: neg a2, a2
; RV32IZFINXZDINX-NEXT: or a0, a2, a0
; RV32IZFINXZDINX-NEXT: and a1, s2, a1
; RV32IZFINXZDINX-NEXT: or a1, a2, a1
; RV32IZFINXZDINX-NEXT: and a0, a2, a0
; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0
; RV32IZFINXZDINX-NEXT: neg a3, a3
; RV32IZFINXZDINX-NEXT: or a0, a3, a0
; RV32IZFINXZDINX-NEXT: and a1, a2, a1
; RV32IZFINXZDINX-NEXT: or a1, a3, a1
; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 16
; RV32IZFINXZDINX-NEXT: ret
;
Expand Down
138 changes: 66 additions & 72 deletions llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -53,28 +53,27 @@ define i64 @test_floor_si64(double %x) nounwind {
; RV32IFD-NEXT: fmv.d fs0, fa0
; RV32IFD-NEXT: fle.d s0, fa5, fa0
; RV32IFD-NEXT: call __fixdfdi
; RV32IFD-NEXT: lui a3, 524288
; RV32IFD-NEXT: li a4, 1
; RV32IFD-NEXT: lui a4, 524288
; RV32IFD-NEXT: lui a2, 524288
; RV32IFD-NEXT: bne s0, a4, .LBB1_2
; RV32IFD-NEXT: beqz s0, .LBB1_2
; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: mv a2, a1
; RV32IFD-NEXT: .LBB1_2:
; RV32IFD-NEXT: lui a1, %hi(.LCPI1_1)
; RV32IFD-NEXT: fld fa5, %lo(.LCPI1_1)(a1)
; RV32IFD-NEXT: flt.d a4, fa5, fs0
; RV32IFD-NEXT: beqz a4, .LBB1_4
; RV32IFD-NEXT: flt.d a3, fa5, fs0
; RV32IFD-NEXT: beqz a3, .LBB1_4
; RV32IFD-NEXT: # %bb.3:
; RV32IFD-NEXT: addi a2, a3, -1
; RV32IFD-NEXT: addi a2, a4, -1
; RV32IFD-NEXT: .LBB1_4:
; RV32IFD-NEXT: feq.d a1, fs0, fs0
; RV32IFD-NEXT: neg a3, a1
; RV32IFD-NEXT: and a1, a3, a2
; RV32IFD-NEXT: neg a2, a4
; RV32IFD-NEXT: neg a4, s0
; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: neg a4, a1
; RV32IFD-NEXT: and a1, a4, a2
; RV32IFD-NEXT: neg a2, a3
; RV32IFD-NEXT: neg a3, s0
; RV32IFD-NEXT: and a0, a3, a0
; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
Expand Down Expand Up @@ -299,28 +298,27 @@ define i64 @test_ceil_si64(double %x) nounwind {
; RV32IFD-NEXT: fmv.d fs0, fa0
; RV32IFD-NEXT: fle.d s0, fa5, fa0
; RV32IFD-NEXT: call __fixdfdi
; RV32IFD-NEXT: lui a3, 524288
; RV32IFD-NEXT: li a4, 1
; RV32IFD-NEXT: lui a4, 524288
; RV32IFD-NEXT: lui a2, 524288
; RV32IFD-NEXT: bne s0, a4, .LBB5_2
; RV32IFD-NEXT: beqz s0, .LBB5_2
; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: mv a2, a1
; RV32IFD-NEXT: .LBB5_2:
; RV32IFD-NEXT: lui a1, %hi(.LCPI5_1)
; RV32IFD-NEXT: fld fa5, %lo(.LCPI5_1)(a1)
; RV32IFD-NEXT: flt.d a4, fa5, fs0
; RV32IFD-NEXT: beqz a4, .LBB5_4
; RV32IFD-NEXT: flt.d a3, fa5, fs0
; RV32IFD-NEXT: beqz a3, .LBB5_4
; RV32IFD-NEXT: # %bb.3:
; RV32IFD-NEXT: addi a2, a3, -1
; RV32IFD-NEXT: addi a2, a4, -1
; RV32IFD-NEXT: .LBB5_4:
; RV32IFD-NEXT: feq.d a1, fs0, fs0
; RV32IFD-NEXT: neg a3, a1
; RV32IFD-NEXT: and a1, a3, a2
; RV32IFD-NEXT: neg a2, a4
; RV32IFD-NEXT: neg a4, s0
; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: neg a4, a1
; RV32IFD-NEXT: and a1, a4, a2
; RV32IFD-NEXT: neg a2, a3
; RV32IFD-NEXT: neg a3, s0
; RV32IFD-NEXT: and a0, a3, a0
; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
Expand Down Expand Up @@ -545,28 +543,27 @@ define i64 @test_trunc_si64(double %x) nounwind {
; RV32IFD-NEXT: fmv.d fs0, fa0
; RV32IFD-NEXT: fle.d s0, fa5, fa0
; RV32IFD-NEXT: call __fixdfdi
; RV32IFD-NEXT: lui a3, 524288
; RV32IFD-NEXT: li a4, 1
; RV32IFD-NEXT: lui a4, 524288
; RV32IFD-NEXT: lui a2, 524288
; RV32IFD-NEXT: bne s0, a4, .LBB9_2
; RV32IFD-NEXT: beqz s0, .LBB9_2
; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: mv a2, a1
; RV32IFD-NEXT: .LBB9_2:
; RV32IFD-NEXT: lui a1, %hi(.LCPI9_1)
; RV32IFD-NEXT: fld fa5, %lo(.LCPI9_1)(a1)
; RV32IFD-NEXT: flt.d a4, fa5, fs0
; RV32IFD-NEXT: beqz a4, .LBB9_4
; RV32IFD-NEXT: flt.d a3, fa5, fs0
; RV32IFD-NEXT: beqz a3, .LBB9_4
; RV32IFD-NEXT: # %bb.3:
; RV32IFD-NEXT: addi a2, a3, -1
; RV32IFD-NEXT: addi a2, a4, -1
; RV32IFD-NEXT: .LBB9_4:
; RV32IFD-NEXT: feq.d a1, fs0, fs0
; RV32IFD-NEXT: neg a3, a1
; RV32IFD-NEXT: and a1, a3, a2
; RV32IFD-NEXT: neg a2, a4
; RV32IFD-NEXT: neg a4, s0
; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: neg a4, a1
; RV32IFD-NEXT: and a1, a4, a2
; RV32IFD-NEXT: neg a2, a3
; RV32IFD-NEXT: neg a3, s0
; RV32IFD-NEXT: and a0, a3, a0
; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
Expand Down Expand Up @@ -791,28 +788,27 @@ define i64 @test_round_si64(double %x) nounwind {
; RV32IFD-NEXT: fmv.d fs0, fa0
; RV32IFD-NEXT: fle.d s0, fa5, fa0
; RV32IFD-NEXT: call __fixdfdi
; RV32IFD-NEXT: lui a3, 524288
; RV32IFD-NEXT: li a4, 1
; RV32IFD-NEXT: lui a4, 524288
; RV32IFD-NEXT: lui a2, 524288
; RV32IFD-NEXT: bne s0, a4, .LBB13_2
; RV32IFD-NEXT: beqz s0, .LBB13_2
; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: mv a2, a1
; RV32IFD-NEXT: .LBB13_2:
; RV32IFD-NEXT: lui a1, %hi(.LCPI13_1)
; RV32IFD-NEXT: fld fa5, %lo(.LCPI13_1)(a1)
; RV32IFD-NEXT: flt.d a4, fa5, fs0
; RV32IFD-NEXT: beqz a4, .LBB13_4
; RV32IFD-NEXT: flt.d a3, fa5, fs0
; RV32IFD-NEXT: beqz a3, .LBB13_4
; RV32IFD-NEXT: # %bb.3:
; RV32IFD-NEXT: addi a2, a3, -1
; RV32IFD-NEXT: addi a2, a4, -1
; RV32IFD-NEXT: .LBB13_4:
; RV32IFD-NEXT: feq.d a1, fs0, fs0
; RV32IFD-NEXT: neg a3, a1
; RV32IFD-NEXT: and a1, a3, a2
; RV32IFD-NEXT: neg a2, a4
; RV32IFD-NEXT: neg a4, s0
; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: neg a4, a1
; RV32IFD-NEXT: and a1, a4, a2
; RV32IFD-NEXT: neg a2, a3
; RV32IFD-NEXT: neg a3, s0
; RV32IFD-NEXT: and a0, a3, a0
; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
Expand Down Expand Up @@ -1037,28 +1033,27 @@ define i64 @test_roundeven_si64(double %x) nounwind {
; RV32IFD-NEXT: fmv.d fs0, fa0
; RV32IFD-NEXT: fle.d s0, fa5, fa0
; RV32IFD-NEXT: call __fixdfdi
; RV32IFD-NEXT: lui a3, 524288
; RV32IFD-NEXT: li a4, 1
; RV32IFD-NEXT: lui a4, 524288
; RV32IFD-NEXT: lui a2, 524288
; RV32IFD-NEXT: bne s0, a4, .LBB17_2
; RV32IFD-NEXT: beqz s0, .LBB17_2
; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: mv a2, a1
; RV32IFD-NEXT: .LBB17_2:
; RV32IFD-NEXT: lui a1, %hi(.LCPI17_1)
; RV32IFD-NEXT: fld fa5, %lo(.LCPI17_1)(a1)
; RV32IFD-NEXT: flt.d a4, fa5, fs0
; RV32IFD-NEXT: beqz a4, .LBB17_4
; RV32IFD-NEXT: flt.d a3, fa5, fs0
; RV32IFD-NEXT: beqz a3, .LBB17_4
; RV32IFD-NEXT: # %bb.3:
; RV32IFD-NEXT: addi a2, a3, -1
; RV32IFD-NEXT: addi a2, a4, -1
; RV32IFD-NEXT: .LBB17_4:
; RV32IFD-NEXT: feq.d a1, fs0, fs0
; RV32IFD-NEXT: neg a3, a1
; RV32IFD-NEXT: and a1, a3, a2
; RV32IFD-NEXT: neg a2, a4
; RV32IFD-NEXT: neg a4, s0
; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: neg a4, a1
; RV32IFD-NEXT: and a1, a4, a2
; RV32IFD-NEXT: neg a2, a3
; RV32IFD-NEXT: neg a3, s0
; RV32IFD-NEXT: and a0, a3, a0
; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
Expand Down Expand Up @@ -1283,28 +1278,27 @@ define i64 @test_rint_si64(double %x) nounwind {
; RV32IFD-NEXT: fmv.d fs0, fa0
; RV32IFD-NEXT: fle.d s0, fa5, fa0
; RV32IFD-NEXT: call __fixdfdi
; RV32IFD-NEXT: lui a3, 524288
; RV32IFD-NEXT: li a4, 1
; RV32IFD-NEXT: lui a4, 524288
; RV32IFD-NEXT: lui a2, 524288
; RV32IFD-NEXT: bne s0, a4, .LBB21_2
; RV32IFD-NEXT: beqz s0, .LBB21_2
; RV32IFD-NEXT: # %bb.1:
; RV32IFD-NEXT: mv a2, a1
; RV32IFD-NEXT: .LBB21_2:
; RV32IFD-NEXT: lui a1, %hi(.LCPI21_1)
; RV32IFD-NEXT: fld fa5, %lo(.LCPI21_1)(a1)
; RV32IFD-NEXT: flt.d a4, fa5, fs0
; RV32IFD-NEXT: beqz a4, .LBB21_4
; RV32IFD-NEXT: flt.d a3, fa5, fs0
; RV32IFD-NEXT: beqz a3, .LBB21_4
; RV32IFD-NEXT: # %bb.3:
; RV32IFD-NEXT: addi a2, a3, -1
; RV32IFD-NEXT: addi a2, a4, -1
; RV32IFD-NEXT: .LBB21_4:
; RV32IFD-NEXT: feq.d a1, fs0, fs0
; RV32IFD-NEXT: neg a3, a1
; RV32IFD-NEXT: and a1, a3, a2
; RV32IFD-NEXT: neg a2, a4
; RV32IFD-NEXT: neg a4, s0
; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: neg a4, a1
; RV32IFD-NEXT: and a1, a4, a2
; RV32IFD-NEXT: neg a2, a3
; RV32IFD-NEXT: neg a3, s0
; RV32IFD-NEXT: and a0, a3, a0
; RV32IFD-NEXT: or a0, a2, a0
; RV32IFD-NEXT: and a0, a4, a0
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
Expand Down
158 changes: 79 additions & 79 deletions llvm/test/CodeGen/RISCV/float-convert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -275,26 +275,24 @@ define i32 @fcvt_wu_s_sat(float %a) nounwind {
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lui a1, 325632
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: call __gtsf2
; RV32I-NEXT: sgtz a0, a0
; RV32I-NEXT: neg s1, a0
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __gesf2
; RV32I-NEXT: slti a0, a0, 0
; RV32I-NEXT: addi s2, a0, -1
; RV32I-NEXT: addi s1, a0, -1
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __fixunssfsi
; RV32I-NEXT: and a0, s2, a0
; RV32I-NEXT: or a0, s1, a0
; RV32I-NEXT: and s1, s1, a0
; RV32I-NEXT: lui a1, 325632
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __gtsf2
; RV32I-NEXT: sgtz a0, a0
; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: or a0, a0, s1
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
Expand Down Expand Up @@ -618,38 +616,36 @@ define i64 @fcvt_l_s_sat(float %a) nounwind {
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: lui a0, 913408
; RV32IF-NEXT: fmv.w.x fa5, a0
; RV32IF-NEXT: fle.s s0, fa5, fa0
; RV32IF-NEXT: neg s1, s0
; RV32IF-NEXT: call __fixsfdi
; RV32IF-NEXT: lui a2, %hi(.LCPI12_0)
; RV32IF-NEXT: flw fa5, %lo(.LCPI12_0)(a2)
; RV32IF-NEXT: and a0, s1, a0
; RV32IF-NEXT: flt.s a3, fa5, fs0
; RV32IF-NEXT: neg a2, a3
; RV32IF-NEXT: or a0, a2, a0
; RV32IF-NEXT: feq.s a2, fs0, fs0
; RV32IF-NEXT: neg a2, a2
; RV32IF-NEXT: lui a5, 524288
; RV32IF-NEXT: lui a4, 524288
; RV32IF-NEXT: lui a2, 524288
; RV32IF-NEXT: beqz s0, .LBB12_2
; RV32IF-NEXT: # %bb.1: # %start
; RV32IF-NEXT: mv a4, a1
; RV32IF-NEXT: mv a2, a1
; RV32IF-NEXT: .LBB12_2: # %start
; RV32IF-NEXT: and a0, a2, a0
; RV32IF-NEXT: lui a1, %hi(.LCPI12_0)
; RV32IF-NEXT: flw fa5, %lo(.LCPI12_0)(a1)
; RV32IF-NEXT: flt.s a3, fa5, fs0
; RV32IF-NEXT: beqz a3, .LBB12_4
; RV32IF-NEXT: # %bb.3:
; RV32IF-NEXT: addi a4, a5, -1
; RV32IF-NEXT: addi a2, a4, -1
; RV32IF-NEXT: .LBB12_4: # %start
; RV32IF-NEXT: and a1, a2, a4
; RV32IF-NEXT: feq.s a1, fs0, fs0
; RV32IF-NEXT: neg a4, a1
; RV32IF-NEXT: and a1, a4, a2
; RV32IF-NEXT: neg a2, s0
; RV32IF-NEXT: and a0, a2, a0
; RV32IF-NEXT: neg a2, a3
; RV32IF-NEXT: or a0, a2, a0
; RV32IF-NEXT: and a0, a4, a0
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload
; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
;
Expand Down Expand Up @@ -867,22 +863,23 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind {
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IF-NEXT: lui a0, %hi(.LCPI14_0)
; RV32IF-NEXT: flw fa5, %lo(.LCPI14_0)(a0)
; RV32IF-NEXT: flt.s a0, fa5, fa0
; RV32IF-NEXT: neg s0, a0
; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: fmv.w.x fa5, zero
; RV32IF-NEXT: fle.s a0, fa5, fa0
; RV32IF-NEXT: neg s1, a0
; RV32IF-NEXT: neg s0, a0
; RV32IF-NEXT: call __fixunssfdi
; RV32IF-NEXT: and a0, s1, a0
; RV32IF-NEXT: or a0, s0, a0
; RV32IF-NEXT: and a1, s1, a1
; RV32IF-NEXT: or a1, s0, a1
; RV32IF-NEXT: lui a2, %hi(.LCPI14_0)
; RV32IF-NEXT: flw fa5, %lo(.LCPI14_0)(a2)
; RV32IF-NEXT: and a0, s0, a0
; RV32IF-NEXT: flt.s a2, fa5, fs0
; RV32IF-NEXT: neg a2, a2
; RV32IF-NEXT: or a0, a2, a0
; RV32IF-NEXT: and a1, s0, a1
; RV32IF-NEXT: or a1, a2, a1
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
;
Expand All @@ -901,17 +898,19 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind {
; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: lui a1, %hi(.LCPI14_0)
; RV32IZFINX-NEXT: lw a1, %lo(.LCPI14_0)(a1)
; RV32IZFINX-NEXT: flt.s a1, a1, a0
; RV32IZFINX-NEXT: neg s0, a1
; RV32IZFINX-NEXT: fle.s a1, zero, a0
; RV32IZFINX-NEXT: neg s1, a1
; RV32IZFINX-NEXT: mv s0, a0
; RV32IZFINX-NEXT: fle.s a0, zero, a0
; RV32IZFINX-NEXT: neg s1, a0
; RV32IZFINX-NEXT: mv a0, s0
; RV32IZFINX-NEXT: call __fixunssfdi
; RV32IZFINX-NEXT: lui a2, %hi(.LCPI14_0)
; RV32IZFINX-NEXT: lw a2, %lo(.LCPI14_0)(a2)
; RV32IZFINX-NEXT: and a0, s1, a0
; RV32IZFINX-NEXT: or a0, s0, a0
; RV32IZFINX-NEXT: flt.s a2, a2, s0
; RV32IZFINX-NEXT: neg a2, a2
; RV32IZFINX-NEXT: or a0, a2, a0
; RV32IZFINX-NEXT: and a1, s1, a1
; RV32IZFINX-NEXT: or a1, s0, a1
; RV32IZFINX-NEXT: or a1, a2, a1
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
Expand All @@ -929,33 +928,36 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind {
;
; RV32I-LABEL: fcvt_lu_s_sat:
; RV32I: # %bb.0: # %start
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: addi sp, sp, -32
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lui a1, 391168
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: call __gtsf2
; RV32I-NEXT: sgtz a0, a0
; RV32I-NEXT: neg s1, a0
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __gesf2
; RV32I-NEXT: slti a0, a0, 0
; RV32I-NEXT: addi s2, a0, -1
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __fixunssfdi
; RV32I-NEXT: and a0, s2, a0
; RV32I-NEXT: or a0, s1, a0
; RV32I-NEXT: and a1, s2, a1
; RV32I-NEXT: or a1, s1, a1
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: and s3, s2, a0
; RV32I-NEXT: lui a1, 391168
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __gtsf2
; RV32I-NEXT: sgtz a0, a0
; RV32I-NEXT: neg a1, a0
; RV32I-NEXT: or a0, a1, s3
; RV32I-NEXT: and a2, s2, s1
; RV32I-NEXT: or a1, a1, a2
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
; RV64I-LABEL: fcvt_lu_s_sat:
Expand Down Expand Up @@ -2089,26 +2091,24 @@ define zeroext i32 @fcvt_wu_s_sat_zext(float %a) nounwind {
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lui a1, 325632
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: call __gtsf2
; RV32I-NEXT: sgtz a0, a0
; RV32I-NEXT: neg s1, a0
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __gesf2
; RV32I-NEXT: slti a0, a0, 0
; RV32I-NEXT: addi s2, a0, -1
; RV32I-NEXT: addi s1, a0, -1
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __fixunssfsi
; RV32I-NEXT: and a0, s2, a0
; RV32I-NEXT: or a0, s1, a0
; RV32I-NEXT: and s1, s1, a0
; RV32I-NEXT: lui a1, 325632
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __gtsf2
; RV32I-NEXT: sgtz a0, a0
; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: or a0, a0, s1
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
Expand Down
288 changes: 138 additions & 150 deletions llvm/test/CodeGen/RISCV/float-round-conv-sat.ll

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/forced-atomics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3567,8 +3567,8 @@ define i64 @rmw64_umax_seq_cst(ptr %p) nounwind {
; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1
; RV32-NEXT: neg a3, a0
; RV32-NEXT: and a3, a3, a1
; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: sw a4, 0(sp)
; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: mv a1, sp
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
Expand Down Expand Up @@ -3659,8 +3659,8 @@ define i64 @rmw64_umin_seq_cst(ptr %p) nounwind {
; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1
; RV32-NEXT: neg a3, a0
; RV32-NEXT: and a3, a3, a1
; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: sw a4, 0(sp)
; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: mv a1, sp
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/fpclamptosat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ define i32 @utest_f64i32(double %x) {
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: .cfi_offset ra, -4
; RV32IF-NEXT: call __fixunsdfdi
; RV32IF-NEXT: seqz a1, a1
; RV32IF-NEXT: sltiu a2, a0, -1
; RV32IF-NEXT: seqz a1, a1
; RV32IF-NEXT: and a1, a1, a2
; RV32IF-NEXT: addi a1, a1, -1
; RV32IF-NEXT: or a0, a1, a0
Expand Down Expand Up @@ -429,8 +429,8 @@ define i32 @utesth_f16i32(half %x) {
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: call __extendhfsf2
; RV32-NEXT: call __fixunssfdi
; RV32-NEXT: seqz a1, a1
; RV32-NEXT: sltiu a2, a0, -1
; RV32-NEXT: seqz a1, a1
; RV32-NEXT: and a1, a1, a2
; RV32-NEXT: addi a1, a1, -1
; RV32-NEXT: or a0, a1, a0
Expand Down
268 changes: 119 additions & 149 deletions llvm/test/CodeGen/RISCV/half-convert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2145,47 +2145,41 @@ define i64 @fcvt_l_h(half %a) nounwind {
define i64 @fcvt_l_h_sat(half %a) nounwind {
; RV32IZFH-LABEL: fcvt_l_h_sat:
; RV32IZFH: # %bb.0: # %start
; RV32IZFH-NEXT: addi sp, sp, -32
; RV32IZFH-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: lui a0, %hi(.LCPI10_0)
; RV32IZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32IZFH-NEXT: addi sp, sp, -16
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: fcvt.s.h fs0, fa0
; RV32IZFH-NEXT: flt.s s0, fa5, fs0
; RV32IZFH-NEXT: neg s1, s0
; RV32IZFH-NEXT: lui a0, 913408
; RV32IZFH-NEXT: fmv.w.x fa5, a0
; RV32IZFH-NEXT: fle.s s2, fa5, fs0
; RV32IZFH-NEXT: neg s3, s2
; RV32IZFH-NEXT: fle.s s0, fa5, fs0
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixsfdi
; RV32IZFH-NEXT: and a0, s3, a0
; RV32IZFH-NEXT: or a0, s1, a0
; RV32IZFH-NEXT: feq.s a2, fs0, fs0
; RV32IZFH-NEXT: neg a2, a2
; RV32IZFH-NEXT: lui a4, 524288
; RV32IZFH-NEXT: lui a3, 524288
; RV32IZFH-NEXT: beqz s2, .LBB10_2
; RV32IZFH-NEXT: lui a2, 524288
; RV32IZFH-NEXT: beqz s0, .LBB10_2
; RV32IZFH-NEXT: # %bb.1: # %start
; RV32IZFH-NEXT: mv a3, a1
; RV32IZFH-NEXT: mv a2, a1
; RV32IZFH-NEXT: .LBB10_2: # %start
; RV32IZFH-NEXT: and a0, a2, a0
; RV32IZFH-NEXT: beqz s0, .LBB10_4
; RV32IZFH-NEXT: lui a1, %hi(.LCPI10_0)
; RV32IZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
; RV32IZFH-NEXT: flt.s a3, fa5, fs0
; RV32IZFH-NEXT: beqz a3, .LBB10_4
; RV32IZFH-NEXT: # %bb.3:
; RV32IZFH-NEXT: addi a3, a4, -1
; RV32IZFH-NEXT: addi a2, a4, -1
; RV32IZFH-NEXT: .LBB10_4: # %start
; RV32IZFH-NEXT: and a1, a2, a3
; RV32IZFH-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: addi sp, sp, 32
; RV32IZFH-NEXT: feq.s a1, fs0, fs0
; RV32IZFH-NEXT: neg a4, a1
; RV32IZFH-NEXT: and a1, a4, a2
; RV32IZFH-NEXT: neg a2, a3
; RV32IZFH-NEXT: neg a3, s0
; RV32IZFH-NEXT: and a0, a3, a0
; RV32IZFH-NEXT: or a0, a2, a0
; RV32IZFH-NEXT: and a0, a4, a0
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: addi sp, sp, 16
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: fcvt_l_h_sat:
Expand All @@ -2199,47 +2193,41 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
;
; RV32IDZFH-LABEL: fcvt_l_h_sat:
; RV32IDZFH: # %bb.0: # %start
; RV32IDZFH-NEXT: addi sp, sp, -32
; RV32IDZFH-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IDZFH-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IDZFH-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IDZFH-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IDZFH-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IDZFH-NEXT: addi sp, sp, -16
; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IDZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IDZFH-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32IDZFH-NEXT: lui a0, %hi(.LCPI10_0)
; RV32IDZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32IDZFH-NEXT: fcvt.s.h fs0, fa0
; RV32IDZFH-NEXT: flt.s s0, fa5, fs0
; RV32IDZFH-NEXT: neg s1, s0
; RV32IDZFH-NEXT: lui a0, 913408
; RV32IDZFH-NEXT: fmv.w.x fa5, a0
; RV32IDZFH-NEXT: fle.s s2, fa5, fs0
; RV32IDZFH-NEXT: neg s3, s2
; RV32IDZFH-NEXT: fle.s s0, fa5, fs0
; RV32IDZFH-NEXT: fmv.s fa0, fs0
; RV32IDZFH-NEXT: call __fixsfdi
; RV32IDZFH-NEXT: and a0, s3, a0
; RV32IDZFH-NEXT: or a0, s1, a0
; RV32IDZFH-NEXT: feq.s a2, fs0, fs0
; RV32IDZFH-NEXT: neg a2, a2
; RV32IDZFH-NEXT: lui a4, 524288
; RV32IDZFH-NEXT: lui a3, 524288
; RV32IDZFH-NEXT: beqz s2, .LBB10_2
; RV32IDZFH-NEXT: lui a2, 524288
; RV32IDZFH-NEXT: beqz s0, .LBB10_2
; RV32IDZFH-NEXT: # %bb.1: # %start
; RV32IDZFH-NEXT: mv a3, a1
; RV32IDZFH-NEXT: mv a2, a1
; RV32IDZFH-NEXT: .LBB10_2: # %start
; RV32IDZFH-NEXT: and a0, a2, a0
; RV32IDZFH-NEXT: beqz s0, .LBB10_4
; RV32IDZFH-NEXT: lui a1, %hi(.LCPI10_0)
; RV32IDZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
; RV32IDZFH-NEXT: flt.s a3, fa5, fs0
; RV32IDZFH-NEXT: beqz a3, .LBB10_4
; RV32IDZFH-NEXT: # %bb.3:
; RV32IDZFH-NEXT: addi a3, a4, -1
; RV32IDZFH-NEXT: addi a2, a4, -1
; RV32IDZFH-NEXT: .LBB10_4: # %start
; RV32IDZFH-NEXT: and a1, a2, a3
; RV32IDZFH-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IDZFH-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IDZFH-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32IDZFH-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IDZFH-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IDZFH-NEXT: feq.s a1, fs0, fs0
; RV32IDZFH-NEXT: neg a4, a1
; RV32IDZFH-NEXT: and a1, a4, a2
; RV32IDZFH-NEXT: neg a2, a3
; RV32IDZFH-NEXT: neg a3, s0
; RV32IDZFH-NEXT: and a0, a3, a0
; RV32IDZFH-NEXT: or a0, a2, a0
; RV32IDZFH-NEXT: and a0, a4, a0
; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IDZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IDZFH-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
; RV32IDZFH-NEXT: addi sp, sp, 32
; RV32IDZFH-NEXT: addi sp, sp, 16
; RV32IDZFH-NEXT: ret
;
; RV64IDZFH-LABEL: fcvt_l_h_sat:
Expand Down Expand Up @@ -2515,47 +2503,41 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
;
; RV32ID-LABEL: fcvt_l_h_sat:
; RV32ID: # %bb.0: # %start
; RV32ID-NEXT: addi sp, sp, -32
; RV32ID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32ID-NEXT: call __extendhfsf2
; RV32ID-NEXT: lui a0, %hi(.LCPI10_0)
; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32ID-NEXT: fmv.s fs0, fa0
; RV32ID-NEXT: flt.s s0, fa5, fa0
; RV32ID-NEXT: neg s1, s0
; RV32ID-NEXT: lui a0, 913408
; RV32ID-NEXT: fmv.w.x fa5, a0
; RV32ID-NEXT: fle.s s2, fa5, fa0
; RV32ID-NEXT: neg s3, s2
; RV32ID-NEXT: fle.s s0, fa5, fa0
; RV32ID-NEXT: call __fixsfdi
; RV32ID-NEXT: and a0, s3, a0
; RV32ID-NEXT: or a0, s1, a0
; RV32ID-NEXT: feq.s a2, fs0, fs0
; RV32ID-NEXT: neg a2, a2
; RV32ID-NEXT: lui a4, 524288
; RV32ID-NEXT: lui a3, 524288
; RV32ID-NEXT: beqz s2, .LBB10_2
; RV32ID-NEXT: lui a2, 524288
; RV32ID-NEXT: beqz s0, .LBB10_2
; RV32ID-NEXT: # %bb.1: # %start
; RV32ID-NEXT: mv a3, a1
; RV32ID-NEXT: mv a2, a1
; RV32ID-NEXT: .LBB10_2: # %start
; RV32ID-NEXT: and a0, a2, a0
; RV32ID-NEXT: beqz s0, .LBB10_4
; RV32ID-NEXT: lui a1, %hi(.LCPI10_0)
; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
; RV32ID-NEXT: flt.s a3, fa5, fs0
; RV32ID-NEXT: beqz a3, .LBB10_4
; RV32ID-NEXT: # %bb.3:
; RV32ID-NEXT: addi a3, a4, -1
; RV32ID-NEXT: addi a2, a4, -1
; RV32ID-NEXT: .LBB10_4: # %start
; RV32ID-NEXT: and a1, a2, a3
; RV32ID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32ID-NEXT: feq.s a1, fs0, fs0
; RV32ID-NEXT: neg a4, a1
; RV32ID-NEXT: and a1, a4, a2
; RV32ID-NEXT: neg a2, s0
; RV32ID-NEXT: and a0, a2, a0
; RV32ID-NEXT: neg a2, a3
; RV32ID-NEXT: or a0, a2, a0
; RV32ID-NEXT: and a0, a4, a0
; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32ID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32ID-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
; RV32ID-NEXT: addi sp, sp, 32
; RV32ID-NEXT: addi sp, sp, 16
; RV32ID-NEXT: ret
;
; RV64ID-LABEL: fcvt_l_h_sat:
Expand All @@ -2574,47 +2556,41 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
;
; RV32IFZFHMIN-LABEL: fcvt_l_h_sat:
; RV32IFZFHMIN: # %bb.0: # %start
; RV32IFZFHMIN-NEXT: addi sp, sp, -32
; RV32IFZFHMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IFZFHMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IFZFHMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IFZFHMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IFZFHMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IFZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IFZFHMIN-NEXT: lui a0, %hi(.LCPI10_0)
; RV32IFZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32IFZFHMIN-NEXT: addi sp, sp, -16
; RV32IFZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IFZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IFZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
; RV32IFZFHMIN-NEXT: fcvt.s.h fs0, fa0
; RV32IFZFHMIN-NEXT: flt.s s0, fa5, fs0
; RV32IFZFHMIN-NEXT: neg s1, s0
; RV32IFZFHMIN-NEXT: lui a0, 913408
; RV32IFZFHMIN-NEXT: fmv.w.x fa5, a0
; RV32IFZFHMIN-NEXT: fle.s s2, fa5, fs0
; RV32IFZFHMIN-NEXT: neg s3, s2
; RV32IFZFHMIN-NEXT: fle.s s0, fa5, fs0
; RV32IFZFHMIN-NEXT: fmv.s fa0, fs0
; RV32IFZFHMIN-NEXT: call __fixsfdi
; RV32IFZFHMIN-NEXT: and a0, s3, a0
; RV32IFZFHMIN-NEXT: or a0, s1, a0
; RV32IFZFHMIN-NEXT: feq.s a2, fs0, fs0
; RV32IFZFHMIN-NEXT: neg a2, a2
; RV32IFZFHMIN-NEXT: lui a4, 524288
; RV32IFZFHMIN-NEXT: lui a3, 524288
; RV32IFZFHMIN-NEXT: beqz s2, .LBB10_2
; RV32IFZFHMIN-NEXT: lui a2, 524288
; RV32IFZFHMIN-NEXT: beqz s0, .LBB10_2
; RV32IFZFHMIN-NEXT: # %bb.1: # %start
; RV32IFZFHMIN-NEXT: mv a3, a1
; RV32IFZFHMIN-NEXT: mv a2, a1
; RV32IFZFHMIN-NEXT: .LBB10_2: # %start
; RV32IFZFHMIN-NEXT: and a0, a2, a0
; RV32IFZFHMIN-NEXT: beqz s0, .LBB10_4
; RV32IFZFHMIN-NEXT: lui a1, %hi(.LCPI10_0)
; RV32IFZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
; RV32IFZFHMIN-NEXT: flt.s a3, fa5, fs0
; RV32IFZFHMIN-NEXT: beqz a3, .LBB10_4
; RV32IFZFHMIN-NEXT: # %bb.3:
; RV32IFZFHMIN-NEXT: addi a3, a4, -1
; RV32IFZFHMIN-NEXT: addi a2, a4, -1
; RV32IFZFHMIN-NEXT: .LBB10_4: # %start
; RV32IFZFHMIN-NEXT: and a1, a2, a3
; RV32IFZFHMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFZFHMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IFZFHMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32IFZFHMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IFZFHMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IFZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IFZFHMIN-NEXT: addi sp, sp, 32
; RV32IFZFHMIN-NEXT: feq.s a1, fs0, fs0
; RV32IFZFHMIN-NEXT: neg a4, a1
; RV32IFZFHMIN-NEXT: and a1, a4, a2
; RV32IFZFHMIN-NEXT: neg a2, a3
; RV32IFZFHMIN-NEXT: neg a3, s0
; RV32IFZFHMIN-NEXT: and a0, a3, a0
; RV32IFZFHMIN-NEXT: or a0, a2, a0
; RV32IFZFHMIN-NEXT: and a0, a4, a0
; RV32IFZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IFZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
; RV32IFZFHMIN-NEXT: addi sp, sp, 16
; RV32IFZFHMIN-NEXT: ret
;
; CHECK64-IZFHMIN-LABEL: fcvt_l_h_sat:
Expand All @@ -2629,47 +2605,41 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
;
; RV32IDZFHMIN-LABEL: fcvt_l_h_sat:
; RV32IDZFHMIN: # %bb.0: # %start
; RV32IDZFHMIN-NEXT: addi sp, sp, -32
; RV32IDZFHMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IDZFHMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IDZFHMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IDZFHMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IDZFHMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IDZFHMIN-NEXT: addi sp, sp, -16
; RV32IDZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IDZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IDZFHMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32IDZFHMIN-NEXT: lui a0, %hi(.LCPI10_0)
; RV32IDZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32IDZFHMIN-NEXT: fcvt.s.h fs0, fa0
; RV32IDZFHMIN-NEXT: flt.s s0, fa5, fs0
; RV32IDZFHMIN-NEXT: neg s1, s0
; RV32IDZFHMIN-NEXT: lui a0, 913408
; RV32IDZFHMIN-NEXT: fmv.w.x fa5, a0
; RV32IDZFHMIN-NEXT: fle.s s2, fa5, fs0
; RV32IDZFHMIN-NEXT: neg s3, s2
; RV32IDZFHMIN-NEXT: fle.s s0, fa5, fs0
; RV32IDZFHMIN-NEXT: fmv.s fa0, fs0
; RV32IDZFHMIN-NEXT: call __fixsfdi
; RV32IDZFHMIN-NEXT: and a0, s3, a0
; RV32IDZFHMIN-NEXT: or a0, s1, a0
; RV32IDZFHMIN-NEXT: feq.s a2, fs0, fs0
; RV32IDZFHMIN-NEXT: neg a2, a2
; RV32IDZFHMIN-NEXT: lui a4, 524288
; RV32IDZFHMIN-NEXT: lui a3, 524288
; RV32IDZFHMIN-NEXT: beqz s2, .LBB10_2
; RV32IDZFHMIN-NEXT: lui a2, 524288
; RV32IDZFHMIN-NEXT: beqz s0, .LBB10_2
; RV32IDZFHMIN-NEXT: # %bb.1: # %start
; RV32IDZFHMIN-NEXT: mv a3, a1
; RV32IDZFHMIN-NEXT: mv a2, a1
; RV32IDZFHMIN-NEXT: .LBB10_2: # %start
; RV32IDZFHMIN-NEXT: and a0, a2, a0
; RV32IDZFHMIN-NEXT: beqz s0, .LBB10_4
; RV32IDZFHMIN-NEXT: lui a1, %hi(.LCPI10_0)
; RV32IDZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
; RV32IDZFHMIN-NEXT: flt.s a3, fa5, fs0
; RV32IDZFHMIN-NEXT: beqz a3, .LBB10_4
; RV32IDZFHMIN-NEXT: # %bb.3:
; RV32IDZFHMIN-NEXT: addi a3, a4, -1
; RV32IDZFHMIN-NEXT: addi a2, a4, -1
; RV32IDZFHMIN-NEXT: .LBB10_4: # %start
; RV32IDZFHMIN-NEXT: and a1, a2, a3
; RV32IDZFHMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IDZFHMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IDZFHMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32IDZFHMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IDZFHMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IDZFHMIN-NEXT: feq.s a1, fs0, fs0
; RV32IDZFHMIN-NEXT: neg a4, a1
; RV32IDZFHMIN-NEXT: and a1, a4, a2
; RV32IDZFHMIN-NEXT: neg a2, a3
; RV32IDZFHMIN-NEXT: neg a3, s0
; RV32IDZFHMIN-NEXT: and a0, a3, a0
; RV32IDZFHMIN-NEXT: or a0, a2, a0
; RV32IDZFHMIN-NEXT: and a0, a4, a0
; RV32IDZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IDZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IDZFHMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
; RV32IDZFHMIN-NEXT: addi sp, sp, 32
; RV32IDZFHMIN-NEXT: addi sp, sp, 16
; RV32IDZFHMIN-NEXT: ret
;
; CHECK32-IZHINXMIN-LABEL: fcvt_l_h_sat:
Expand Down
624 changes: 300 additions & 324 deletions llvm/test/CodeGen/RISCV/half-round-conv-sat.ll

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/iabs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -630,8 +630,8 @@ define void @zext16_abs8(i8 %x, ptr %p) {
; RV32I-LABEL: zext16_abs8:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: srai a0, a0, 24
; RV32I-NEXT: srai a2, a0, 31
; RV32I-NEXT: srai a0, a0, 24
; RV32I-NEXT: xor a0, a0, a2
; RV32I-NEXT: sub a0, a0, a2
; RV32I-NEXT: sh a0, 0(a1)
Expand All @@ -648,8 +648,8 @@ define void @zext16_abs8(i8 %x, ptr %p) {
; RV64I-LABEL: zext16_abs8:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 56
; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: srai a2, a0, 63
; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: xor a0, a0, a2
; RV64I-NEXT: subw a0, a0, a2
; RV64I-NEXT: sh a0, 0(a1)
Expand Down
521 changes: 263 additions & 258 deletions llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll

Large diffs are not rendered by default.

54 changes: 26 additions & 28 deletions llvm/test/CodeGen/X86/abdu-vector-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -715,43 +715,41 @@ define <2 x i64> @abd_cmp_v2i64_multiuse_cmp(<2 x i64> %a, <2 x i64> %b) nounwin
;
; SSE42-LABEL: abd_cmp_v2i64_multiuse_cmp:
; SSE42: # %bb.0:
; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; SSE42-NEXT: movdqa %xmm0, %xmm2
; SSE42-NEXT: psubq %xmm1, %xmm2
; SSE42-NEXT: movdqa %xmm1, %xmm3
; SSE42-NEXT: pxor %xmm2, %xmm3
; SSE42-NEXT: pxor %xmm0, %xmm2
; SSE42-NEXT: pcmpgtq %xmm3, %xmm2
; SSE42-NEXT: movdqa %xmm0, %xmm3
; SSE42-NEXT: psubq %xmm1, %xmm3
; SSE42-NEXT: psubq %xmm0, %xmm1
; SSE42-NEXT: movdqa %xmm2, %xmm0
; SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
; SSE42-NEXT: paddq %xmm1, %xmm2
; SSE42-NEXT: movdqa %xmm2, %xmm0
; SSE42-NEXT: psubq %xmm0, %xmm3
; SSE42-NEXT: movdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
; SSE42-NEXT: pxor %xmm4, %xmm1
; SSE42-NEXT: pxor %xmm4, %xmm0
; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
; SSE42-NEXT: paddq %xmm3, %xmm0
; SSE42-NEXT: retq
;
; AVX1-LABEL: abd_cmp_v2i64_multiuse_cmp:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: # xmm2 = mem[0,0]
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm3
; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vblendvpd %xmm2, %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm2
; AVX1-NEXT: vpsubq %xmm0, %xmm1, %xmm3
; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: # xmm4 = mem[0,0]
; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm1
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: abd_cmp_v2i64_multiuse_cmp:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm3
; AVX2-NEXT: vpsubq %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vblendvpd %xmm2, %xmm3, %xmm0, %xmm0
; AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm2
; AVX2-NEXT: vpsubq %xmm0, %xmm1, %xmm3
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
; AVX2-NEXT: vpxor %xmm4, %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm4, %xmm0, %xmm0
; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm1
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: abd_cmp_v2i64_multiuse_cmp:
Expand Down
10 changes: 2 additions & 8 deletions llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,7 @@ alloca_21:
define i32 @kmovrk_1(<4 x ptr> %arg) {
; AVX512-LABEL: kmovrk_1:
; AVX512: # %bb.0: # %bb
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0]
; AVX512-NEXT: kmovw %k0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x93,0xc0]
; AVX512-NEXT: testb $15, %al # encoding: [0xa8,0x0f]
; AVX512-NEXT: vptest %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x17,0xc0]
; AVX512-NEXT: jne .LBB2_1 # encoding: [0x75,A]
; AVX512-NEXT: # fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1
; AVX512-NEXT: # %bb.2: # %bb3
Expand All @@ -66,10 +63,7 @@ define i32 @kmovrk_1(<4 x ptr> %arg) {
;
; AVX512BW-LABEL: kmovrk_1:
; AVX512BW: # %bb.0: # %bb
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc0]
; AVX512BW-NEXT: kmovd %k0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x93,0xc0]
; AVX512BW-NEXT: testb $15, %al # encoding: [0xa8,0x0f]
; AVX512BW-NEXT: vptest %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x17,0xc0]
; AVX512BW-NEXT: jne .LBB2_1 # encoding: [0x75,A]
; AVX512BW-NEXT: # fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1
; AVX512BW-NEXT: # %bb.2: # %bb3
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/X86/avx512-broadcast-arith.ll
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@ define <64 x i8> @add_v64i8_broadcasts(<64 x i8> %a0, i64 %a1, i8 %a2) {
; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-NEXT: vpternlogq $216, %zmm2, %zmm1, %zmm0
; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm3
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; AVX512F-NEXT: vpaddb %ymm1, %ymm3, %ymm3
; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm4
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm4
; AVX512F-NEXT: vpaddb %ymm1, %ymm3, %ymm3
; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vpaddb %ymm1, %ymm3, %ymm1
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
; AVX512F-NEXT: vpternlogq $226, %zmm4, %zmm2, %zmm0
; AVX512F-NEXT: retq
;
Expand Down
8 changes: 3 additions & 5 deletions llvm/test/CodeGen/X86/combine-mul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,13 @@ define <4 x i32> @combine_vec_mul_pow2b(<4 x i32> %x) {
define <4 x i64> @combine_vec_mul_pow2c(<4 x i64> %x) {
; SSE-LABEL: combine_vec_mul_pow2c:
; SSE: # %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm2
; SSE-NEXT: paddq %xmm0, %xmm2
; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
; SSE-NEXT: movdqa %xmm1, %xmm2
; SSE-NEXT: psllq $4, %xmm2
; SSE-NEXT: psllq $2, %xmm1
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
; SSE-NEXT: movdqa %xmm0, %xmm2
; SSE-NEXT: paddq %xmm0, %xmm2
; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_mul_pow2c:
Expand Down Expand Up @@ -399,14 +399,12 @@ define i64 @combine_mul_self_demandedbits(i64 %x) {
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: imulq %rdi, %rax
; SSE-NEXT: andq $-3, %rax
; SSE-NEXT: retq
;
; AVX-LABEL: combine_mul_self_demandedbits:
; AVX: # %bb.0:
; AVX-NEXT: movq %rdi, %rax
; AVX-NEXT: imulq %rdi, %rax
; AVX-NEXT: andq $-3, %rax
; AVX-NEXT: retq
%1 = mul i64 %x, %x
%2 = and i64 %1, -3
Expand Down
337 changes: 170 additions & 167 deletions llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll

Large diffs are not rendered by default.

4 changes: 1 addition & 3 deletions llvm/test/CodeGen/X86/fold-masked-merge.ll
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,7 @@ define i8 @masked_merge2(i8 %a0, i8 %a1, i8 %a2) {
; NOBMI-LABEL: masked_merge2:
; NOBMI: # %bb.0:
; NOBMI-NEXT: movl %esi, %eax
; NOBMI-NEXT: xorb %sil, %al
; NOBMI-NEXT: andb %dil, %al
; NOBMI-NEXT: xorb %sil, %al
; NOBMI-NEXT: # kill: def $al killed $al killed $eax
; NOBMI-NEXT: retq
;
; BMI-LABEL: masked_merge2:
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/freeze-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,21 @@
define i32 @const() {
; CHECK-LABEL: name: const
; CHECK: bb.0 (%ir-block.0):
; CHECK: [[MOV32ri:%[0-9]+]]:gr32 = MOV32ri 1
; CHECK: $eax = COPY [[MOV32ri]]
; CHECK: RET 0, $eax
; CHECK-NEXT: [[MOV32ri:%[0-9]+]]:gr32 = MOV32ri 1
; CHECK-NEXT: $eax = COPY [[MOV32ri]]
; CHECK-NEXT: RET 0, $eax
%y = freeze i32 1
ret i32 %y
}

define i32 @fold(i32 %x) {
; CHECK-LABEL: name: fold
; CHECK: bb.0 (%ir-block.0):
; CHECK: liveins: $edi
; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi
; CHECK: [[COPY1:%[0-9]+]]:gr32 = COPY [[COPY]]
; CHECK: $eax = COPY [[COPY1]]
; CHECK: RET 0, $eax
; CHECK-NEXT: liveins: $edi
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi
; CHECK-NEXT: $eax = COPY [[COPY]]
; CHECK-NEXT: RET 0, $eax
%y = freeze i32 %x
%z = freeze i32 %y
ret i32 %z
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/gfni-funnel-shifts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -522,17 +522,17 @@ declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
define <16 x i8> @splatconstant_fshr_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; GFNISSE-LABEL: splatconstant_fshr_v16i8:
; GFNISSE: # %bb.0:
; GFNISSE-NEXT: paddb %xmm0, %xmm0
; GFNISSE-NEXT: psrlw $7, %xmm1
; GFNISSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; GFNISSE-NEXT: paddb %xmm0, %xmm0
; GFNISSE-NEXT: por %xmm1, %xmm0
; GFNISSE-NEXT: retq
;
; GFNIAVX1OR2-LABEL: splatconstant_fshr_v16i8:
; GFNIAVX1OR2: # %bb.0:
; GFNIAVX1OR2-NEXT: vpaddb %xmm0, %xmm0, %xmm0
; GFNIAVX1OR2-NEXT: vpsrlw $7, %xmm1, %xmm1
; GFNIAVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; GFNIAVX1OR2-NEXT: vpaddb %xmm0, %xmm0, %xmm0
; GFNIAVX1OR2-NEXT: vpor %xmm1, %xmm0, %xmm0
; GFNIAVX1OR2-NEXT: retq
;
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/X86/gfni-rotates.ll
Original file line number Diff line number Diff line change
Expand Up @@ -421,18 +421,18 @@ define <16 x i8> @splatconstant_rotr_v16i8(<16 x i8> %a) nounwind {
; GFNISSE-LABEL: splatconstant_rotr_v16i8:
; GFNISSE: # %bb.0:
; GFNISSE-NEXT: movdqa %xmm0, %xmm1
; GFNISSE-NEXT: psrlw $7, %xmm1
; GFNISSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; GFNISSE-NEXT: paddb %xmm0, %xmm0
; GFNISSE-NEXT: paddb %xmm0, %xmm1
; GFNISSE-NEXT: psrlw $7, %xmm0
; GFNISSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; GFNISSE-NEXT: por %xmm1, %xmm0
; GFNISSE-NEXT: retq
;
; GFNIAVX1OR2-LABEL: splatconstant_rotr_v16i8:
; GFNIAVX1OR2: # %bb.0:
; GFNIAVX1OR2-NEXT: vpsrlw $7, %xmm0, %xmm1
; GFNIAVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; GFNIAVX1OR2-NEXT: vpaddb %xmm0, %xmm0, %xmm0
; GFNIAVX1OR2-NEXT: vpor %xmm1, %xmm0, %xmm0
; GFNIAVX1OR2-NEXT: vpaddb %xmm0, %xmm0, %xmm1
; GFNIAVX1OR2-NEXT: vpsrlw $7, %xmm0, %xmm0
; GFNIAVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; GFNIAVX1OR2-NEXT: vpor %xmm0, %xmm1, %xmm0
; GFNIAVX1OR2-NEXT: retq
;
; GFNIAVX512-LABEL: splatconstant_rotr_v16i8:
Expand Down
30 changes: 18 additions & 12 deletions llvm/test/CodeGen/X86/known-never-zero.ll
Original file line number Diff line number Diff line change
Expand Up @@ -676,12 +676,13 @@ define i32 @rotr_known_nonzero(i32 %xx, i32 %y) {
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: orl $256, %edi # imm = 0x100
; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: rorl %cl, %edi
; X64-NEXT: rorl %cl, %eax
; X64-NEXT: testl %edi, %edi
; X64-NEXT: je .LBB22_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
; X64-NEXT: .LBB22_1:
; X64-NEXT: movl $32, %eax
Expand Down Expand Up @@ -713,12 +714,13 @@ define i32 @rotr_maybe_zero(i32 %x, i32 %y) {
; X64-LABEL: rotr_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: rorl %cl, %edi
; X64-NEXT: rorl %cl, %eax
; X64-NEXT: testl %edi, %edi
; X64-NEXT: je .LBB23_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
; X64-NEXT: .LBB23_1:
; X64-NEXT: movl $32, %eax
Expand Down Expand Up @@ -773,12 +775,13 @@ define i32 @rotr_with_fshr_maybe_zero(i32 %x, i32 %y) {
; X64-LABEL: rotr_with_fshr_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: rorl %cl, %edi
; X64-NEXT: rorl %cl, %eax
; X64-NEXT: testl %edi, %edi
; X64-NEXT: je .LBB25_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
; X64-NEXT: .LBB25_1:
; X64-NEXT: movl $32, %eax
Expand Down Expand Up @@ -808,12 +811,13 @@ define i32 @rotl_known_nonzero(i32 %xx, i32 %y) {
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: orl $256, %edi # imm = 0x100
; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: roll %cl, %edi
; X64-NEXT: roll %cl, %eax
; X64-NEXT: testl %edi, %edi
; X64-NEXT: je .LBB26_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
; X64-NEXT: .LBB26_1:
; X64-NEXT: movl $32, %eax
Expand Down Expand Up @@ -845,12 +849,13 @@ define i32 @rotl_maybe_zero(i32 %x, i32 %y) {
; X64-LABEL: rotl_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: roll %cl, %edi
; X64-NEXT: roll %cl, %eax
; X64-NEXT: testl %edi, %edi
; X64-NEXT: je .LBB27_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
; X64-NEXT: .LBB27_1:
; X64-NEXT: movl $32, %eax
Expand Down Expand Up @@ -905,12 +910,13 @@ define i32 @rotl_with_fshl_maybe_zero(i32 %x, i32 %y) {
; X64-LABEL: rotl_with_fshl_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: roll %cl, %edi
; X64-NEXT: roll %cl, %eax
; X64-NEXT: testl %edi, %edi
; X64-NEXT: je .LBB29_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
; X64-NEXT: .LBB29_1:
; X64-NEXT: movl $32, %eax
Expand Down
94 changes: 46 additions & 48 deletions llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -893,27 +893,26 @@ define <2 x i64> @vec128_i64_signed_reg_reg(<2 x i64> %a1, <2 x i64> %a2) nounwi
; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
; SSE41-NEXT: pand %xmm5, %xmm3
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
; SSE41-NEXT: por %xmm3, %xmm6
; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6
; SSE41-NEXT: pand %xmm5, %xmm0
; SSE41-NEXT: por %xmm4, %xmm0
; SSE41-NEXT: movdqa %xmm2, %xmm3
; SSE41-NEXT: psubq %xmm1, %xmm3
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
; SSE41-NEXT: por %xmm3, %xmm0
; SSE41-NEXT: pmovsxbq {{.*#+}} xmm3 = [1,1]
; SSE41-NEXT: por %xmm0, %xmm3
; SSE41-NEXT: movdqa %xmm2, %xmm4
; SSE41-NEXT: psubq %xmm1, %xmm4
; SSE41-NEXT: psubq %xmm2, %xmm1
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1
; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1
; SSE41-NEXT: movapd %xmm1, %xmm0
; SSE41-NEXT: psrlq $1, %xmm0
; SSE41-NEXT: psrlq $33, %xmm1
; SSE41-NEXT: pmuludq %xmm6, %xmm1
; SSE41-NEXT: movdqa %xmm6, %xmm3
; SSE41-NEXT: psrlq $32, %xmm3
; SSE41-NEXT: pmuludq %xmm0, %xmm3
; SSE41-NEXT: paddq %xmm1, %xmm3
; SSE41-NEXT: psllq $32, %xmm3
; SSE41-NEXT: pmuludq %xmm6, %xmm0
; SSE41-NEXT: pmuludq %xmm3, %xmm1
; SSE41-NEXT: movdqa %xmm3, %xmm4
; SSE41-NEXT: psrlq $32, %xmm4
; SSE41-NEXT: pmuludq %xmm0, %xmm4
; SSE41-NEXT: paddq %xmm1, %xmm4
; SSE41-NEXT: psllq $32, %xmm4
; SSE41-NEXT: pmuludq %xmm3, %xmm0
; SSE41-NEXT: paddq %xmm2, %xmm0
; SSE41-NEXT: paddq %xmm3, %xmm0
; SSE41-NEXT: paddq %xmm4, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: vec128_i64_signed_reg_reg:
Expand Down Expand Up @@ -1077,27 +1076,26 @@ define <2 x i64> @vec128_i64_unsigned_reg_reg(<2 x i64> %a1, <2 x i64> %a2) noun
; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
; SSE41-NEXT: pand %xmm5, %xmm3
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,1,3,3]
; SSE41-NEXT: por %xmm3, %xmm6
; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6
; SSE41-NEXT: pand %xmm5, %xmm0
; SSE41-NEXT: por %xmm4, %xmm0
; SSE41-NEXT: movdqa %xmm2, %xmm3
; SSE41-NEXT: psubq %xmm1, %xmm3
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
; SSE41-NEXT: por %xmm3, %xmm0
; SSE41-NEXT: pmovsxbq {{.*#+}} xmm3 = [1,1]
; SSE41-NEXT: por %xmm0, %xmm3
; SSE41-NEXT: movdqa %xmm2, %xmm4
; SSE41-NEXT: psubq %xmm1, %xmm4
; SSE41-NEXT: psubq %xmm2, %xmm1
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1
; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1
; SSE41-NEXT: movapd %xmm1, %xmm0
; SSE41-NEXT: psrlq $1, %xmm0
; SSE41-NEXT: psrlq $33, %xmm1
; SSE41-NEXT: pmuludq %xmm6, %xmm1
; SSE41-NEXT: movdqa %xmm6, %xmm3
; SSE41-NEXT: psrlq $32, %xmm3
; SSE41-NEXT: pmuludq %xmm0, %xmm3
; SSE41-NEXT: paddq %xmm1, %xmm3
; SSE41-NEXT: psllq $32, %xmm3
; SSE41-NEXT: pmuludq %xmm6, %xmm0
; SSE41-NEXT: pmuludq %xmm3, %xmm1
; SSE41-NEXT: movdqa %xmm3, %xmm4
; SSE41-NEXT: psrlq $32, %xmm4
; SSE41-NEXT: pmuludq %xmm0, %xmm4
; SSE41-NEXT: paddq %xmm1, %xmm4
; SSE41-NEXT: psllq $32, %xmm4
; SSE41-NEXT: pmuludq %xmm3, %xmm0
; SSE41-NEXT: paddq %xmm2, %xmm0
; SSE41-NEXT: paddq %xmm3, %xmm0
; SSE41-NEXT: paddq %xmm4, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: vec128_i64_unsigned_reg_reg:
Expand Down Expand Up @@ -1993,14 +1991,14 @@ define <8 x i16> @vec128_i16_unsigned_reg_reg(<8 x i16> %a1, <8 x i16> %a2) noun
;
; AVX512VL-FALLBACK-LABEL: vec128_i16_unsigned_reg_reg:
; AVX512VL-FALLBACK: # %bb.0:
; AVX512VL-FALLBACK-NEXT: vpminuw %xmm1, %xmm0, %xmm2
; AVX512VL-FALLBACK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
; AVX512VL-FALLBACK-NEXT: vpsubw %xmm2, %xmm1, %xmm1
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512VL-FALLBACK-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2
; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %xmm2, %xmm2, %xmm2
; AVX512VL-FALLBACK-NEXT: vpxor %xmm2, %xmm1, %xmm1
; AVX512VL-FALLBACK-NEXT: vpsubw %xmm2, %xmm1, %xmm1
; AVX512VL-FALLBACK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2
; AVX512VL-FALLBACK-NEXT: vpminuw %xmm1, %xmm0, %xmm1
; AVX512VL-FALLBACK-NEXT: vpsubw %xmm1, %xmm2, %xmm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm2, %xmm2
; AVX512VL-FALLBACK-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1
; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1
; AVX512VL-FALLBACK-NEXT: vpxor %xmm1, %xmm2, %xmm2
; AVX512VL-FALLBACK-NEXT: vpsubw %xmm1, %xmm2, %xmm1
; AVX512VL-FALLBACK-NEXT: vpaddw %xmm0, %xmm1, %xmm0
; AVX512VL-FALLBACK-NEXT: retq
;
Expand Down Expand Up @@ -2786,14 +2784,14 @@ define <16 x i8> @vec128_i8_unsigned_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounw
;
; AVX512VL-FALLBACK-LABEL: vec128_i8_unsigned_reg_reg:
; AVX512VL-FALLBACK: # %bb.0:
; AVX512VL-FALLBACK-NEXT: vpminub %xmm1, %xmm0, %xmm2
; AVX512VL-FALLBACK-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm2, %xmm1, %xmm1
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX512VL-FALLBACK-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %xmm2, %xmm2, %xmm2
; AVX512VL-FALLBACK-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm1
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm2, %xmm1, %xmm1
; AVX512VL-FALLBACK-NEXT: vpmaxub %xmm1, %xmm0, %xmm2
; AVX512VL-FALLBACK-NEXT: vpminub %xmm1, %xmm0, %xmm1
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm1, %xmm2, %xmm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm2, %xmm2
; AVX512VL-FALLBACK-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1
; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1
; AVX512VL-FALLBACK-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm2
; AVX512VL-FALLBACK-NEXT: vpsubb %xmm1, %xmm2, %xmm1
; AVX512VL-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
; AVX512VL-FALLBACK-NEXT: retq
;
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/X86/midpoint-int-vec-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1445,14 +1445,14 @@ define <16 x i16> @vec256_i16_unsigned_reg_reg(<16 x i16> %a1, <16 x i16> %a2) n
;
; AVX512VL-FALLBACK-LABEL: vec256_i16_unsigned_reg_reg:
; AVX512VL-FALLBACK: # %bb.0:
; AVX512VL-FALLBACK-NEXT: vpminuw %ymm1, %ymm0, %ymm2
; AVX512VL-FALLBACK-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm2, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %ymm2, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vpxor %ymm2, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm2, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2
; AVX512VL-FALLBACK-NEXT: vpminuw %ymm1, %ymm0, %ymm1
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1
; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %ymm1, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vpxor %ymm1, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vpsubw %ymm1, %ymm2, %ymm1
; AVX512VL-FALLBACK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
; AVX512VL-FALLBACK-NEXT: retq
;
Expand Down Expand Up @@ -2210,14 +2210,14 @@ define <32 x i8> @vec256_i8_unsigned_reg_reg(<32 x i8> %a1, <32 x i8> %a2) nounw
;
; AVX512VL-FALLBACK-LABEL: vec256_i8_unsigned_reg_reg:
; AVX512VL-FALLBACK: # %bb.0:
; AVX512VL-FALLBACK-NEXT: vpminub %ymm1, %ymm0, %ymm2
; AVX512VL-FALLBACK-NEXT: vpmaxub %ymm1, %ymm0, %ymm1
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %ymm2, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm1
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vpmaxub %ymm1, %ymm0, %ymm2
; AVX512VL-FALLBACK-NEXT: vpminub %ymm1, %ymm0, %ymm1
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm1, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX512VL-FALLBACK-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm1
; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %ymm1, %ymm1, %ymm1
; AVX512VL-FALLBACK-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm2
; AVX512VL-FALLBACK-NEXT: vpsubb %ymm1, %ymm2, %ymm1
; AVX512VL-FALLBACK-NEXT: vpaddb %ymm0, %ymm1, %ymm0
; AVX512VL-FALLBACK-NEXT: retq
;
Expand Down
202 changes: 101 additions & 101 deletions llvm/test/CodeGen/X86/pr38539.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ define void @f() nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $176, %esp
; X86-NEXT: subl $160, %esp
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
Expand All @@ -47,55 +47,54 @@ define void @f() nounwind {
; X86-NEXT: testl %edi, %edi
; X86-NEXT: jne .LBB0_1
; X86-NEXT: # %bb.2: # %BB_udiv-special-cases
; X86-NEXT: bsrl %esi, %ecx
; X86-NEXT: xorl $31, %ecx
; X86-NEXT: addl $32, %ecx
; X86-NEXT: bsrl %esi, %eax
; X86-NEXT: xorl $31, %eax
; X86-NEXT: addl $32, %eax
; X86-NEXT: jmp .LBB0_3
; X86-NEXT: .LBB0_1:
; X86-NEXT: bsrl %edi, %ecx
; X86-NEXT: xorl $31, %ecx
; X86-NEXT: bsrl %edi, %eax
; X86-NEXT: xorl $31, %eax
; X86-NEXT: .LBB0_3: # %BB_udiv-special-cases
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: testl %edx, %edx
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: jne .LBB0_4
; X86-NEXT: # %bb.5: # %BB_udiv-special-cases
; X86-NEXT: addl $64, %ecx
; X86-NEXT: addl $64, %eax
; X86-NEXT: jmp .LBB0_6
; X86-NEXT: .LBB0_4:
; X86-NEXT: bsrl %edx, %ecx
; X86-NEXT: xorl $31, %ecx
; X86-NEXT: addl $32, %ecx
; X86-NEXT: bsrl %edx, %eax
; X86-NEXT: xorl $31, %eax
; X86-NEXT: addl $32, %eax
; X86-NEXT: .LBB0_6: # %BB_udiv-special-cases
; X86-NEXT: subl $62, %ecx
; X86-NEXT: subl $62, %eax
; X86-NEXT: movl $0, %ebx
; X86-NEXT: sbbl %ebx, %ebx
; X86-NEXT: sbbl %eax, %eax
; X86-NEXT: addl $-66, %ecx
; X86-NEXT: sbbl %ecx, %ecx
; X86-NEXT: addl $-66, %eax
; X86-NEXT: adcl $-1, %ebx
; X86-NEXT: adcl $3, %eax
; X86-NEXT: movl %eax, %edi
; X86-NEXT: movb $1, %al
; X86-NEXT: testb %al, %al
; X86-NEXT: adcl $3, %ecx
; X86-NEXT: movl %ecx, %esi
; X86-NEXT: movb $1, %cl
; X86-NEXT: testb %cl, %cl
; X86-NEXT: jne .LBB0_11
; X86-NEXT: # %bb.7: # %BB_udiv-special-cases
; X86-NEXT: andl $3, %edi
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: xorl $65, %eax
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %edi, %eax
; X86-NEXT: orl %ebx, %eax
; X86-NEXT: andl $3, %esi
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: xorl $65, %ecx
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %esi, %ecx
; X86-NEXT: orl %ebx, %ecx
; X86-NEXT: je .LBB0_11
; X86-NEXT: # %bb.8: # %udiv-bb1
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: addl $1, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: adcl $0, %esi
; X86-NEXT: andl $3, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: adcl $0, %ebx
; X86-NEXT: andl $3, %ebx
; X86-NEXT: movb $65, %cl
; X86-NEXT: subb %al, %cl
; X86-NEXT: movb %cl, %ch
Expand All @@ -112,29 +111,31 @@ define void @f() nounwind {
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 136(%esp,%eax), %edx
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 120(%esp,%eax), %edi
; X86-NEXT: movb %ch, %cl
; X86-NEXT: shll %cl, %edx
; X86-NEXT: shll %cl, %edi
; X86-NEXT: notb %cl
; X86-NEXT: movl 128(%esp,%eax), %edi
; X86-NEXT: movl 132(%esp,%eax), %esi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: movl 112(%esp,%eax), %esi
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 116(%esp,%eax), %edx
; X86-NEXT: movl %edx, %eax
; X86-NEXT: shrl %eax
; X86-NEXT: shrl %cl, %eax
; X86-NEXT: movb %ch, %cl
; X86-NEXT: shldl %cl, %edi, %esi
; X86-NEXT: shldl %cl, %esi, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shll %cl, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shll %cl, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %ebx, %ecx
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: je .LBB0_11
; X86-NEXT: # %bb.9: # %udiv-preheader
; X86-NEXT: orl %eax, %edx
; X86-NEXT: andl $3, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %eax, %edi
; X86-NEXT: andl $3, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
Expand All @@ -149,20 +150,20 @@ define void @f() nounwind {
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $15, %al
; X86-NEXT: movzbl %al, %esi
; X86-NEXT: movl 80(%esp,%esi), %edx
; X86-NEXT: movl 84(%esp,%esi), %eax
; X86-NEXT: movl %eax, %edi
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: movl 64(%esp,%eax), %edi
; X86-NEXT: movl 68(%esp,%eax), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: movb %ch, %cl
; X86-NEXT: shrl %cl, %edi
; X86-NEXT: shrl %cl, %esi
; X86-NEXT: notb %cl
; X86-NEXT: movl 88(%esp,%esi), %esi
; X86-NEXT: addl %esi, %esi
; X86-NEXT: shll %cl, %esi
; X86-NEXT: orl %edi, %esi
; X86-NEXT: movl 72(%esp,%eax), %ebx
; X86-NEXT: addl %ebx, %ebx
; X86-NEXT: shll %cl, %ebx
; X86-NEXT: orl %esi, %ebx
; X86-NEXT: movb %ch, %cl
; X86-NEXT: shrdl %cl, %eax, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shrdl %cl, %edx, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: addl $-1, %eax
Expand All @@ -179,63 +180,62 @@ define void @f() nounwind {
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: .LBB0_10: # %udiv-do-while
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shldl $1, %esi, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: shldl $1, %edx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ebx, %edi
; X86-NEXT: shldl $1, %ebx, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: andl $2, %eax
; X86-NEXT: shrl %eax
; X86-NEXT: leal (%eax,%edx,2), %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: shldl $1, %edi, %ebx
; X86-NEXT: shldl $1, %ebx, %edi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: orl %esi, %ebx
; X86-NEXT: movl %esi, %edx
; X86-NEXT: andl $2, %edx
; X86-NEXT: shrl %edx
; X86-NEXT: leal (%edx,%ebx,2), %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: shldl $1, %edx, %esi
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shldl $1, %eax, %edi
; X86-NEXT: orl %esi, %edi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shldl $1, %eax, %edx
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl %eax, %eax
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: andl $3, %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: cmpl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: sbbl %esi, %eax
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: sbbl %ecx, %ebx
; X86-NEXT: shll $30, %ebx
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: sarl $30, %eax
; X86-NEXT: sarl $31, %ebx
; X86-NEXT: shrdl $1, %ebx, %eax
; X86-NEXT: movl %eax, %edi
; X86-NEXT: andl $1, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: andl $3, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: cmpl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: sbbl %edi, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: sbbl %ecx, %esi
; X86-NEXT: shll $30, %esi
; X86-NEXT: movl %esi, %edx
; X86-NEXT: sarl $30, %edx
; X86-NEXT: sarl $31, %esi
; X86-NEXT: shrdl $1, %esi, %edx
; X86-NEXT: movl %edx, %eax
; X86-NEXT: andl $1, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl %esi, %eax
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %ebx, %edi
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: subl %eax, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl %ebx, %esi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: sbbl %edi, %ecx
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
; X86-NEXT: subl %edx, %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl %esi, %edi
; X86-NEXT: movl %edi, %ebx
; X86-NEXT: sbbl %eax, %ecx
; X86-NEXT: andl $3, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: addl $-1, %eax
; X86-NEXT: adcl $-1, %ebx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: adcl $3, %edi
; X86-NEXT: andl $3, %edi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: adcl $-1, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: adcl $3, %esi
; X86-NEXT: andl $3, %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %edi, %eax
; X86-NEXT: orl %ebx, %eax
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %esi, %eax
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %edx, %eax
; X86-NEXT: jne .LBB0_10
; X86-NEXT: .LBB0_11: # %udiv-end
; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
Expand Down
31 changes: 15 additions & 16 deletions llvm/test/CodeGen/X86/pr62286.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,20 @@ define i64 @PR62286(i32 %a) {
; SSE-LABEL: PR62286:
; SSE: # %bb.0:
; SSE-NEXT: movd %edi, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,0]
; SSE-NEXT: paddd %xmm1, %xmm1
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,1,0]
; SSE-NEXT: paddd %xmm0, %xmm0
; SSE-NEXT: pxor %xmm2, %xmm2
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE-NEXT: pxor %xmm3, %xmm3
; SSE-NEXT: pcmpgtd %xmm1, %xmm3
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0]
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: pcmpgtd %xmm0, %xmm2
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE-NEXT: paddq %xmm1, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; SSE-NEXT: pxor %xmm3, %xmm3
; SSE-NEXT: pcmpgtd %xmm0, %xmm3
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: pcmpgtd %xmm1, %xmm2
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE-NEXT: paddq %xmm0, %xmm1
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
; SSE-NEXT: paddq %xmm1, %xmm0
; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: retq
;
; AVX1-LABEL: PR62286:
Expand All @@ -47,10 +46,10 @@ define i64 @PR62286(i32 %a) {
; AVX2-LABEL: PR62286:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovd %edi, %xmm0
; AVX2-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
; AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm1
; AVX2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/scheduler-backtracking.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ define i256 @test1(i256 %a) nounwind {
; ILP: # %bb.0:
; ILP-NEXT: movq %rdi, %rax
; ILP-NEXT: leal (%rsi,%rsi), %ecx
; ILP-NEXT: addb $3, %cl
; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp)
Expand All @@ -23,6 +22,7 @@ define i256 @test1(i256 %a) nounwind {
; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; ILP-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; ILP-NEXT: addb $3, %cl
; ILP-NEXT: movl %ecx, %edx
; ILP-NEXT: shrb $3, %dl
; ILP-NEXT: andb $7, %cl
Expand Down
250 changes: 125 additions & 125 deletions llvm/test/CodeGen/X86/sdiv_fix_sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -563,227 +563,227 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: subq $120, %rsp
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,1,3,3]
; X64-NEXT: psllq $32, %xmm3
; X64-NEXT: pxor %xmm3, %xmm3
; X64-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3]
; X64-NEXT: psrad $31, %xmm2
; X64-NEXT: psrlq $31, %xmm3
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm0, %rbx
; X64-NEXT: movq %rbx, %r13
; X64-NEXT: sarq $63, %r13
; X64-NEXT: shldq $31, %rbx, %r13
; X64-NEXT: movq %xmm0, %rbp
; X64-NEXT: movq %rbp, %r14
; X64-NEXT: sarq $63, %r14
; X64-NEXT: shldq $31, %rbp, %r14
; X64-NEXT: movq %rbp, %r15
; X64-NEXT: shlq $31, %r15
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; X64-NEXT: pxor %xmm0, %xmm0
; X64-NEXT: pcmpgtd %xmm1, %xmm0
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm1, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: sarq $63, %r15
; X64-NEXT: movq %rbx, %r12
; X64-NEXT: shlq $31, %r12
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq %r15, %rcx
; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: sarq $63, %rbx
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r14, %rsi
; X64-NEXT: movq %rbx, %rcx
; X64-NEXT: callq __divti3@PLT
; X64-NEXT: movq %rax, %rbp
; X64-NEXT: movq %rax, %r13
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, %r12
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %rbp
; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %r15d, %ebx
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: subq $1, %r13
; X64-NEXT: sbbq $0, %r12
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r14, %rsi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: movq %r15, %rcx
; X64-NEXT: movq %rbx, %rcx
; X64-NEXT: callq __modti3@PLT
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: setne %al
; X64-NEXT: shrq $63, %rbp
; X64-NEXT: xorl %ebp, %ebx
; X64-NEXT: testb %bl, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: movl $4294967295, %edx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rdx, %rbp
; X64-NEXT: movq %r14, %rax
; X64-NEXT: cmpq %rdx, %r13
; X64-NEXT: movq %r12, %rax
; X64-NEXT: sbbq $0, %rax
; X64-NEXT: cmovgeq %rcx, %r14
; X64-NEXT: cmovgeq %rdx, %rbp
; X64-NEXT: cmovgeq %rdx, %r13
; X64-NEXT: cmovgeq %rcx, %r12
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rbp, %rcx
; X64-NEXT: cmpq %r13, %rcx
; X64-NEXT: movq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; X64-NEXT: movq $-1, %rax
; X64-NEXT: sbbq %r14, %rax
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movq %rbp, %xmm0
; X64-NEXT: sbbq %r12, %rax
; X64-NEXT: cmovgeq %rcx, %r13
; X64-NEXT: movq %r13, %xmm0
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,2,3]
; X64-NEXT: movq %xmm0, %rbx
; X64-NEXT: movq %rbx, %r13
; X64-NEXT: sarq $63, %r13
; X64-NEXT: shldq $31, %rbx, %r13
; X64-NEXT: movq %xmm0, %rbp
; X64-NEXT: movq %rbp, %r14
; X64-NEXT: sarq $63, %r14
; X64-NEXT: shldq $31, %rbp, %r14
; X64-NEXT: movq %rbp, %r15
; X64-NEXT: shlq $31, %r15
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,2,3]
; X64-NEXT: movq %xmm0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: sarq $63, %r15
; X64-NEXT: movq %rbx, %r12
; X64-NEXT: shlq $31, %r12
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq %r15, %rcx
; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: sarq $63, %rbx
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r14, %rsi
; X64-NEXT: movq %rbx, %rcx
; X64-NEXT: callq __divti3@PLT
; X64-NEXT: movq %rax, %rbp
; X64-NEXT: movq %rax, %r13
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, %r12
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %rbp
; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %r15d, %ebx
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: subq $1, %r13
; X64-NEXT: sbbq $0, %r12
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r14, %rsi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: movq %r15, %rcx
; X64-NEXT: movq %rbx, %rcx
; X64-NEXT: callq __modti3@PLT
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: setne %al
; X64-NEXT: shrq $63, %rbp
; X64-NEXT: xorl %ebp, %ebx
; X64-NEXT: testb %bl, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rcx, %rbp
; X64-NEXT: movq %r14, %rax
; X64-NEXT: cmpq %rcx, %r13
; X64-NEXT: movq %r12, %rax
; X64-NEXT: sbbq $0, %rax
; X64-NEXT: cmovgeq %rcx, %r13
; X64-NEXT: movl $0, %eax
; X64-NEXT: cmovgeq %rax, %r14
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: cmovgeq %rax, %r12
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rbp, %rcx
; X64-NEXT: cmpq %r13, %rcx
; X64-NEXT: movq $-1, %rax
; X64-NEXT: sbbq %r14, %rax
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movq %rbp, %xmm0
; X64-NEXT: sbbq %r12, %rax
; X64-NEXT: cmovgeq %rcx, %r13
; X64-NEXT: movq %r13, %xmm0
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X64-NEXT: psrlq $1, %xmm1
; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: pshufd $212, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[0,1,1,3]
; X64-NEXT: psllq $32, %xmm0
; X64-NEXT: pxor %xmm0, %xmm0
; X64-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
; X64-NEXT: psrad $31, %xmm1
; X64-NEXT: psrlq $31, %xmm0
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm0, %rbx
; X64-NEXT: movq %rbx, %r13
; X64-NEXT: sarq $63, %r13
; X64-NEXT: shldq $31, %rbx, %r13
; X64-NEXT: movq %xmm0, %rbp
; X64-NEXT: movq %rbp, %r14
; X64-NEXT: sarq $63, %r14
; X64-NEXT: shldq $31, %rbp, %r14
; X64-NEXT: movq %rbp, %r15
; X64-NEXT: shlq $31, %r15
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; X64-NEXT: pxor %xmm1, %xmm1
; X64-NEXT: pcmpgtd %xmm0, %xmm1
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: movq %xmm0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: sarq $63, %r15
; X64-NEXT: movq %rbx, %r12
; X64-NEXT: shlq $31, %r12
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq %r15, %rcx
; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: sarq $63, %rbx
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r14, %rsi
; X64-NEXT: movq %rbx, %rcx
; X64-NEXT: callq __divti3@PLT
; X64-NEXT: movq %rax, %rbp
; X64-NEXT: movq %rax, %r13
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, %r12
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %rbp
; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %r15d, %ebx
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: subq $1, %r13
; X64-NEXT: sbbq $0, %r12
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r14, %rsi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: movq %r15, %rcx
; X64-NEXT: movq %rbx, %rcx
; X64-NEXT: callq __modti3@PLT
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: setne %al
; X64-NEXT: shrq $63, %rbp
; X64-NEXT: xorl %ebp, %ebx
; X64-NEXT: testb %bl, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rcx, %rbp
; X64-NEXT: movq %r14, %rax
; X64-NEXT: cmpq %rcx, %r13
; X64-NEXT: movq %r12, %rax
; X64-NEXT: sbbq $0, %rax
; X64-NEXT: cmovgeq %rcx, %r13
; X64-NEXT: movl $0, %eax
; X64-NEXT: cmovgeq %rax, %r14
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: cmovgeq %rax, %r12
; X64-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rbp, %rcx
; X64-NEXT: cmpq %r13, %rcx
; X64-NEXT: movq $-1, %rax
; X64-NEXT: sbbq %r14, %rax
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: movq %rbp, %xmm0
; X64-NEXT: sbbq %r12, %rax
; X64-NEXT: cmovgeq %rcx, %r13
; X64-NEXT: movq %r13, %xmm0
; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,2,3]
; X64-NEXT: movq %xmm0, %rbx
; X64-NEXT: movq %rbx, %r13
; X64-NEXT: sarq $63, %r13
; X64-NEXT: shldq $31, %rbx, %r13
; X64-NEXT: movq %xmm0, %rbp
; X64-NEXT: movq %rbp, %r14
; X64-NEXT: sarq $63, %r14
; X64-NEXT: shldq $31, %rbp, %r14
; X64-NEXT: movq %rbp, %r15
; X64-NEXT: shlq $31, %r15
; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; X64-NEXT: # xmm0 = mem[2,3,2,3]
; X64-NEXT: movq %xmm0, %rdx
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r15
; X64-NEXT: sarq $63, %r15
; X64-NEXT: movq %rbx, %r12
; X64-NEXT: shlq $31, %r12
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: movq %r15, %rcx
; X64-NEXT: movq %rdx, %rbx
; X64-NEXT: sarq $63, %rbx
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r14, %rsi
; X64-NEXT: movq %rbx, %rcx
; X64-NEXT: callq __divti3@PLT
; X64-NEXT: movq %rax, %rbp
; X64-NEXT: movq %rax, %r13
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: movq %rdx, %r14
; X64-NEXT: movq %rdx, %r12
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: subq $1, %rbp
; X64-NEXT: sbbq $0, %r14
; X64-NEXT: shrq $63, %rbx
; X64-NEXT: xorl %r15d, %ebx
; X64-NEXT: movq %r12, %rdi
; X64-NEXT: movq %r13, %rsi
; X64-NEXT: subq $1, %r13
; X64-NEXT: sbbq $0, %r12
; X64-NEXT: movq %r15, %rdi
; X64-NEXT: movq %r14, %rsi
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; X64-NEXT: movq %r15, %rcx
; X64-NEXT: movq %rbx, %rcx
; X64-NEXT: callq __modti3@PLT
; X64-NEXT: orq %rax, %rdx
; X64-NEXT: setne %al
; X64-NEXT: shrq $63, %rbp
; X64-NEXT: xorl %ebp, %ebx
; X64-NEXT: testb %bl, %al
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
; X64-NEXT: cmpq %rcx, %rbp
; X64-NEXT: movq %r14, %rax
; X64-NEXT: cmpq %rcx, %r13
; X64-NEXT: movq %r12, %rax
; X64-NEXT: sbbq $0, %rax
; X64-NEXT: cmovgeq %rcx, %r13
; X64-NEXT: movl $0, %eax
; X64-NEXT: cmovgeq %rax, %r14
; X64-NEXT: cmovgeq %rcx, %rbp
; X64-NEXT: cmovgeq %rax, %r12
; X64-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
; X64-NEXT: cmpq %rbp, %rax
; X64-NEXT: sbbq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; X64-NEXT: cmovgeq %rax, %rbp
; X64-NEXT: movq %rbp, %xmm1
; X64-NEXT: cmpq %r13, %rax
; X64-NEXT: sbbq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; X64-NEXT: cmovgeq %rax, %r13
; X64-NEXT: movq %r13, %xmm1
; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: psrlq $1, %xmm0
Expand Down
Loading