226 changes: 113 additions & 113 deletions llvm/test/CodeGen/AMDGPU/bypass-div.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ define i64 @sdiv64(i64 %a, i64 %b) {
; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v10
; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, 0, v11
; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v10, vcc
; GFX9-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
; GFX9-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2
; GFX9-NEXT: v_rcp_f32_e32 v2, v2
; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; GFX9-NEXT: v_trunc_f32_e32 v3, v3
; GFX9-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
; GFX9-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v2
; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v3
; GFX9-NEXT: v_mul_lo_u32 v4, v8, v6
Expand Down Expand Up @@ -171,12 +171,12 @@ define i64 @udiv64(i64 %a, i64 %b) {
; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3
; GFX9-NEXT: v_sub_co_u32_e32 v10, vcc, 0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc
; GFX9-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; GFX9-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4
; GFX9-NEXT: v_rcp_f32_e32 v4, v4
; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; GFX9-NEXT: v_trunc_f32_e32 v5, v5
; GFX9-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5
; GFX9-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4
; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v5
; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v4
; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8
Expand Down Expand Up @@ -312,12 +312,12 @@ define i64 @srem64(i64 %a, i64 %b) {
; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v9
; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, 0, v10
; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v9, vcc
; GFX9-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
; GFX9-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2
; GFX9-NEXT: v_rcp_f32_e32 v2, v2
; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; GFX9-NEXT: v_trunc_f32_e32 v3, v3
; GFX9-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
; GFX9-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v2
; GFX9-NEXT: v_cvt_u32_f32_e32 v11, v3
; GFX9-NEXT: v_mul_lo_u32 v4, v8, v6
Expand Down Expand Up @@ -454,12 +454,12 @@ define i64 @urem64(i64 %a, i64 %b) {
; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3
; GFX9-NEXT: v_sub_co_u32_e32 v10, vcc, 0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc
; GFX9-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; GFX9-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4
; GFX9-NEXT: v_rcp_f32_e32 v4, v4
; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; GFX9-NEXT: v_trunc_f32_e32 v5, v5
; GFX9-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5
; GFX9-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4
; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v5
; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v4
; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8
Expand Down Expand Up @@ -709,118 +709,118 @@ define <2 x i64> @sdivrem64(i64 %a, i64 %b) {
; GFX9-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz .LBB8_2
; GFX9-NEXT: ; %bb.1:
; GFX9-NEXT: v_ashrrev_i32_e32 v11, 31, v3
; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v2, v11
; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v3, v11, vcc
; GFX9-NEXT: v_xor_b32_e32 v2, v2, v11
; GFX9-NEXT: v_xor_b32_e32 v3, v4, v11
; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v3
; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v2
; GFX9-NEXT: v_sub_co_u32_e32 v9, vcc, 0, v3
; GFX9-NEXT: v_subb_co_u32_e32 v10, vcc, 0, v2, vcc
; GFX9-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; GFX9-NEXT: v_rcp_f32_e32 v4, v4
; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; GFX9-NEXT: v_trunc_f32_e32 v5, v5
; GFX9-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5
; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v4
; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v5
; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v9, v8, 0
; GFX9-NEXT: v_mul_lo_u32 v7, v9, v12
; GFX9-NEXT: v_mul_hi_u32 v13, v8, v4
; GFX9-NEXT: v_add3_u32 v7, v5, v7, v6
; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v7, 0
; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v13, v5
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v4, 0
; GFX9-NEXT: v_addc_co_u32_e32 v14, vcc, 0, v6, vcc
; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v7, 0
; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v13, v4
; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v14, v5, vcc
; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v7, vcc
; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v6
; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v8, v4
; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, v12, v5, vcc
; GFX9-NEXT: v_mul_lo_u32 v6, v9, v12
; GFX9-NEXT: v_mul_lo_u32 v7, v10, v13
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v9, v13, 0
; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7
; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v12, v7, 0
; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v13, v7, 0
; GFX9-NEXT: v_mul_hi_u32 v14, v13, v4
; GFX9-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v4, 0
; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v14, v7
; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v8, vcc
; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9
; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v10, vcc
; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v5
; GFX9-NEXT: v_ashrrev_i32_e32 v9, 31, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v9
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v9, vcc
; GFX9-NEXT: v_xor_b32_e32 v10, v3, v9
; GFX9-NEXT: v_xor_b32_e32 v11, v2, v9
; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v11
; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v10
; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, 0, v11
; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v10, vcc
; GFX9-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2
; GFX9-NEXT: v_rcp_f32_e32 v2, v2
; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; GFX9-NEXT: v_trunc_f32_e32 v3, v3
; GFX9-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v2
; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v3
; GFX9-NEXT: v_mul_lo_u32 v4, v8, v6
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, 0
; GFX9-NEXT: v_mul_lo_u32 v5, v7, v12
; GFX9-NEXT: v_mul_hi_u32 v13, v6, v2
; GFX9-NEXT: v_add3_u32 v5, v3, v5, v4
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0
; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v13, v3
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v2, 0
; GFX9-NEXT: v_addc_co_u32_e32 v14, vcc, 0, v4, vcc
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v5, 0
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v2
; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v14, v3, vcc
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v5, vcc
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v6, v2
; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, v12, v3, vcc
; GFX9-NEXT: v_mul_lo_u32 v4, v7, v12
; GFX9-NEXT: v_mul_lo_u32 v5, v8, v13
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v13, 0
; GFX9-NEXT: v_add3_u32 v5, v3, v4, v5
; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v5, 0
; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v5, 0
; GFX9-NEXT: v_mul_hi_u32 v14, v13, v2
; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v2, 0
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v14, v5
; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc
; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v13, v4
; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v12, v5, vcc
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7
; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v5, v8, vcc
; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v2
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v12, v3, vcc
; GFX9-NEXT: v_ashrrev_i32_e32 v7, 31, v1
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v7
; GFX9-NEXT: v_xor_b32_e32 v8, v0, v7
; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v1, v7, vcc
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v5, 0
; GFX9-NEXT: v_mul_hi_u32 v9, v8, v4
; GFX9-NEXT: v_xor_b32_e32 v6, v6, v7
; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, v9, v0
; GFX9-NEXT: v_addc_co_u32_e32 v10, vcc, 0, v1, vcc
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v4, 0
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v5, 0
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v9, v0
; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v10, v1, vcc
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v5, vcc
; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v0, v4
; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
; GFX9-NEXT: v_mul_lo_u32 v9, v2, v4
; GFX9-NEXT: v_mul_lo_u32 v10, v3, v5
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v4, 0
; GFX9-NEXT: v_add3_u32 v1, v1, v10, v9
; GFX9-NEXT: v_sub_u32_e32 v9, v6, v1
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v8, v0
; GFX9-NEXT: v_subb_co_u32_e64 v8, s[4:5], v9, v2, vcc
; GFX9-NEXT: v_sub_co_u32_e64 v9, s[4:5], v0, v3
; GFX9-NEXT: v_subbrev_co_u32_e64 v10, s[6:7], 0, v8, s[4:5]
; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v10, v2
; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[6:7]
; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v3
; GFX9-NEXT: v_xor_b32_e32 v5, v0, v7
; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v7, vcc
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v3, 0
; GFX9-NEXT: v_mul_hi_u32 v6, v5, v2
; GFX9-NEXT: v_xor_b32_e32 v4, v4, v7
; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v6, v0
; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0
; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v8, v1, vcc
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
; GFX9-NEXT: v_mul_lo_u32 v6, v10, v2
; GFX9-NEXT: v_mul_lo_u32 v8, v11, v3
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v11, v2, 0
; GFX9-NEXT: v_add3_u32 v1, v1, v8, v6
; GFX9-NEXT: v_sub_u32_e32 v6, v4, v1
; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v5, v0
; GFX9-NEXT: v_subb_co_u32_e64 v6, s[4:5], v6, v10, vcc
; GFX9-NEXT: v_sub_co_u32_e64 v8, s[4:5], v0, v11
; GFX9-NEXT: v_subbrev_co_u32_e64 v12, s[6:7], 0, v6, s[4:5]
; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v10
; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7]
; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v8, v11
; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7]
; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v10, v2
; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, v13, s[6:7]
; GFX9-NEXT: v_add_co_u32_e64 v13, s[6:7], 2, v4
; GFX9-NEXT: v_addc_co_u32_e64 v14, s[6:7], 0, v5, s[6:7]
; GFX9-NEXT: v_add_co_u32_e64 v15, s[6:7], 1, v4
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v6, v1, vcc
; GFX9-NEXT: v_addc_co_u32_e64 v16, s[6:7], 0, v5, s[6:7]
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
; GFX9-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v12
; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
; GFX9-NEXT: v_cndmask_b32_e64 v12, v16, v14, s[6:7]
; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v12, v10
; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[6:7]
; GFX9-NEXT: v_add_co_u32_e64 v13, s[6:7], 2, v2
; GFX9-NEXT: v_addc_co_u32_e64 v14, s[6:7], 0, v3, s[6:7]
; GFX9-NEXT: v_add_co_u32_e64 v15, s[6:7], 1, v2
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v4, v1, vcc
; GFX9-NEXT: v_addc_co_u32_e64 v16, s[6:7], 0, v3, s[6:7]
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10
; GFX9-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v5
; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v11
; GFX9-NEXT: v_cndmask_b32_e64 v5, v16, v14, s[6:7]
; GFX9-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v8, v2, s[4:5]
; GFX9-NEXT: v_sub_co_u32_e64 v3, s[4:5], v9, v3
; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v10
; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GFX9-NEXT: v_cndmask_b32_e64 v4, v15, v13, s[6:7]
; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; GFX9-NEXT: v_xor_b32_e32 v5, v7, v9
; GFX9-NEXT: v_xor_b32_e32 v2, v2, v5
; GFX9-NEXT: v_xor_b32_e32 v3, v3, v5
; GFX9-NEXT: v_sub_co_u32_e64 v4, s[8:9], v2, v5
; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v6, v10, s[4:5]
; GFX9-NEXT: v_subb_co_u32_e64 v5, s[8:9], v3, v5, s[8:9]
; GFX9-NEXT: v_sub_co_u32_e64 v3, s[4:5], v8, v11
; GFX9-NEXT: v_subbrev_co_u32_e64 v2, s[4:5], 0, v2, s[4:5]
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
; GFX9-NEXT: v_cndmask_b32_e64 v6, v15, v13, s[6:7]
; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[6:7]
; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
; GFX9-NEXT: v_xor_b32_e32 v6, v7, v11
; GFX9-NEXT: v_cndmask_b32_e64 v2, v12, v2, s[6:7]
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v2, v9, v3, s[6:7]
; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v12, vcc
; GFX9-NEXT: v_xor_b32_e32 v4, v4, v6
; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, v3, s[6:7]
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX9-NEXT: v_xor_b32_e32 v5, v5, v6
; GFX9-NEXT: v_sub_co_u32_e64 v4, s[8:9], v4, v6
; GFX9-NEXT: v_xor_b32_e32 v0, v0, v7
; GFX9-NEXT: v_subb_co_u32_e64 v5, s[8:9], v5, v6, s[8:9]
; GFX9-NEXT: v_xor_b32_e32 v1, v1, v7
; GFX9-NEXT: v_sub_co_u32_e32 v6, vcc, v0, v7
; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v1, v7, vcc
Expand Down Expand Up @@ -884,12 +884,12 @@ define <2 x i64> @udivrem64(i64 %a, i64 %b) {
; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3
; GFX9-NEXT: v_sub_co_u32_e32 v10, vcc, 0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc
; GFX9-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; GFX9-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4
; GFX9-NEXT: v_rcp_f32_e32 v4, v4
; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; GFX9-NEXT: v_trunc_f32_e32 v5, v5
; GFX9-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5
; GFX9-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4
; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v5
; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v4
; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/AMDGPU/carryout-selection.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1804,12 +1804,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
; CISI-NEXT: v_cvt_f32_u32_e32 v1, s3
; CISI-NEXT: s_sub_u32 s0, 0, s2
; CISI-NEXT: s_subb_u32 s1, 0, s3
; CISI-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; CISI-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; CISI-NEXT: v_rcp_f32_e32 v0, v0
; CISI-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; CISI-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; CISI-NEXT: v_trunc_f32_e32 v1, v1
; CISI-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; CISI-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; CISI-NEXT: v_cvt_u32_f32_e32 v1, v1
; CISI-NEXT: v_cvt_u32_f32_e32 v0, v0
; CISI-NEXT: v_mul_lo_u32 v2, s0, v1
Expand Down Expand Up @@ -1954,12 +1954,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
; VI-NEXT: v_cvt_f32_u32_e32 v1, s3
; VI-NEXT: s_sub_u32 s8, 0, s2
; VI-NEXT: s_subb_u32 s9, 0, s3
; VI-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; VI-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; VI-NEXT: v_rcp_f32_e32 v0, v0
; VI-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; VI-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; VI-NEXT: v_trunc_f32_e32 v1, v1
; VI-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; VI-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; VI-NEXT: v_cvt_u32_f32_e32 v4, v1
; VI-NEXT: v_cvt_u32_f32_e32 v5, v0
; VI-NEXT: v_mul_lo_u32 v2, s8, v4
Expand Down Expand Up @@ -2111,12 +2111,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s3
; GFX9-NEXT: s_sub_u32 s0, 0, s2
; GFX9-NEXT: s_subb_u32 s1, 0, s3
; GFX9-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; GFX9-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GFX9-NEXT: v_rcp_f32_e32 v0, v0
; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX9-NEXT: v_trunc_f32_e32 v1, v1
; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; GFX9-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX9-NEXT: v_readfirstlane_b32 s10, v1
Expand Down Expand Up @@ -2279,12 +2279,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GFX1010-NEXT: v_cvt_f32_u32_e32 v1, s3
; GFX1010-NEXT: s_sub_u32 s9, 0, s2
; GFX1010-NEXT: s_subb_u32 s10, 0, s3
; GFX1010-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; GFX1010-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GFX1010-NEXT: v_rcp_f32_e32 v0, v0
; GFX1010-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX1010-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX1010-NEXT: v_trunc_f32_e32 v1, v1
; GFX1010-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; GFX1010-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX1010-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX1010-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX1010-NEXT: v_readfirstlane_b32 s0, v1
Expand Down Expand Up @@ -2441,12 +2441,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GFX1030W32-NEXT: v_cvt_f32_u32_e32 v1, s3
; GFX1030W32-NEXT: s_sub_u32 s9, 0, s2
; GFX1030W32-NEXT: s_subb_u32 s10, 0, s3
; GFX1030W32-NEXT: v_fmac_f32_e32 v0, 0x4f800000, v1
; GFX1030W32-NEXT: v_fmamk_f32 v0, v1, 0x4f800000, v0
; GFX1030W32-NEXT: v_rcp_f32_e32 v0, v0
; GFX1030W32-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX1030W32-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX1030W32-NEXT: v_trunc_f32_e32 v1, v1
; GFX1030W32-NEXT: v_fmac_f32_e32 v0, 0xcf800000, v1
; GFX1030W32-NEXT: v_fmamk_f32 v0, v1, 0xcf800000, v0
; GFX1030W32-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX1030W32-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX1030W32-NEXT: v_readfirstlane_b32 s0, v1
Expand Down Expand Up @@ -2603,12 +2603,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GFX1030W64-NEXT: v_cvt_f32_u32_e32 v1, s3
; GFX1030W64-NEXT: s_sub_u32 s9, 0, s2
; GFX1030W64-NEXT: s_subb_u32 s10, 0, s3
; GFX1030W64-NEXT: v_fmac_f32_e32 v0, 0x4f800000, v1
; GFX1030W64-NEXT: v_fmamk_f32 v0, v1, 0x4f800000, v0
; GFX1030W64-NEXT: v_rcp_f32_e32 v0, v0
; GFX1030W64-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX1030W64-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX1030W64-NEXT: v_trunc_f32_e32 v1, v1
; GFX1030W64-NEXT: v_fmac_f32_e32 v0, 0xcf800000, v1
; GFX1030W64-NEXT: v_fmamk_f32 v0, v1, 0xcf800000, v0
; GFX1030W64-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX1030W64-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX1030W64-NEXT: v_readfirstlane_b32 s8, v1
Expand Down Expand Up @@ -2766,15 +2766,15 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GFX11-NEXT: s_sub_u32 s9, 0, s2
; GFX11-NEXT: s_subb_u32 s10, 0, s3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fmac_f32_e32 v0, 0x4f800000, v1
; GFX11-NEXT: v_fmamk_f32 v0, v1, 0x4f800000, v0
; GFX11-NEXT: v_rcp_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX11-NEXT: v_trunc_f32_e32 v1, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_fmac_f32_e32 v0, 0xcf800000, v1
; GFX11-NEXT: v_fmamk_f32 v0, v1, 0xcf800000, v0
; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
Expand Down
79 changes: 38 additions & 41 deletions llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: image_sample v[0:1], v[0:1], s[0:7], s[0:3] dmask:0x3 dim:SQ_RSRC_IMG_2D
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: v_mov_b32_e32 v7, 0x3ca3d70a
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: image_sample v2, v[0:1], s[0:7], s[0:3] dmask:0x4 dim:SQ_RSRC_IMG_2D
Expand Down Expand Up @@ -36,42 +37,41 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
; GFX10-NEXT: v_fma_f32 v1, v1, v5, s28
; GFX10-NEXT: v_max_f32_e64 v6, s0, s0 clamp
; GFX10-NEXT: v_add_f32_e64 v5, s29, -1.0
; GFX10-NEXT: v_sub_f32_e32 v8, s0, v1
; GFX10-NEXT: v_fma_f32 v7, -s2, v6, s6
; GFX10-NEXT: v_sub_f32_e32 v9, s0, v1
; GFX10-NEXT: v_fma_f32 v8, -s2, v6, s6
; GFX10-NEXT: v_fma_f32 v5, v6, v5, 1.0
; GFX10-NEXT: v_mad_f32 v10, s2, v6, v2
; GFX10-NEXT: s_mov_b32 s0, 0x3c23d70a
; GFX10-NEXT: v_fmac_f32_e32 v1, v6, v8
; GFX10-NEXT: v_fmac_f32_e32 v10, v7, v6
; GFX10-NEXT: v_mad_f32 v11, s2, v6, v2
; GFX10-NEXT: v_fmac_f32_e32 v1, v6, v9
; GFX10-NEXT: v_fmac_f32_e32 v11, v8, v6
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mul_f32_e32 v9, s10, v0
; GFX10-NEXT: v_mul_f32_e32 v10, s10, v0
; GFX10-NEXT: v_fma_f32 v0, -v0, s10, s14
; GFX10-NEXT: v_mul_f32_e32 v8, s18, v2
; GFX10-NEXT: v_mul_f32_e32 v9, s18, v2
; GFX10-NEXT: v_mul_f32_e32 v3, s22, v3
; GFX10-NEXT: v_fmac_f32_e32 v9, v0, v6
; GFX10-NEXT: v_fmac_f32_e32 v10, v0, v6
; GFX10-NEXT: v_sub_f32_e32 v0, v1, v5
; GFX10-NEXT: v_mul_f32_e32 v1, v8, v6
; GFX10-NEXT: v_mul_f32_e32 v7, v6, v3
; GFX10-NEXT: v_fma_f32 v3, -v6, v3, v9
; GFX10-NEXT: v_mul_f32_e32 v1, v9, v6
; GFX10-NEXT: v_mul_f32_e32 v8, v6, v3
; GFX10-NEXT: v_fma_f32 v3, -v6, v3, v10
; GFX10-NEXT: v_fmac_f32_e32 v5, v0, v6
; GFX10-NEXT: v_fma_f32 v0, v2, s26, -v1
; GFX10-NEXT: v_fmac_f32_e32 v7, v3, v6
; GFX10-NEXT: v_fmac_f32_e32 v8, v3, v6
; GFX10-NEXT: v_fmac_f32_e32 v1, v0, v6
; GFX10-NEXT: v_mul_f32_e32 v0, v2, v6
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_add_f32_e32 v4, v4, v10
; GFX10-NEXT: v_add_f32_e32 v4, v4, v11
; GFX10-NEXT: v_mul_f32_e32 v3, v4, v6
; GFX10-NEXT: v_fmaak_f32 v4, s0, v5, 0x3ca3d70a
; GFX10-NEXT: v_fmamk_f32 v4, v5, 0x3c23d70a, v7
; GFX10-NEXT: v_mul_f32_e32 v1, v3, v1
; GFX10-NEXT: v_mul_f32_e32 v2, v7, v4
; GFX10-NEXT: v_mul_f32_e32 v2, v8, v4
; GFX10-NEXT: v_fmac_f32_e32 v1, v2, v0
; GFX10-NEXT: v_max_f32_e32 v0, 0, v1
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: _amdgpu_ps_main:
; GFX11: ; %bb.0: ; %.entry
; GFX11-NEXT: image_sample v[0:1], v[0:1], s[0:7], s[0:3] dmask:0x3 dim:SQ_RSRC_IMG_2D
; GFX11-NEXT: v_mov_b32_e32 v4, 0
; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v7, 0x3ca3d70a
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: image_sample v2, v[0:1], s[0:7], s[0:3] dmask:0x4 dim:SQ_RSRC_IMG_2D
Expand All @@ -96,43 +96,40 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
; GFX11-NEXT: s_buffer_load_b128 s[20:23], s[0:3], 0x70
; GFX11-NEXT: v_fma_f32 v1, v1, v5, s28
; GFX11-NEXT: v_max_f32_e64 v6, s0, s0 clamp
; GFX11-NEXT: s_buffer_load_b128 s[24:27], s[0:3], 0x10
; GFX11-NEXT: v_add_f32_e64 v5, s29, -1.0
; GFX11-NEXT: s_buffer_load_b128 s[24:27], s[0:3], 0x10
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_sub_f32_e32 v8, s0, v1
; GFX11-NEXT: v_fma_f32 v7, -s2, v6, s6
; GFX11-NEXT: v_fma_f32 v10, s2, v6, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-NEXT: v_sub_f32_e32 v9, s0, v1
; GFX11-NEXT: v_fma_f32 v8, -s2, v6, s6
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-NEXT: v_fma_f32 v5, v6, v5, 1.0
; GFX11-NEXT: s_mov_b32 s0, 0x3c23d70a
; GFX11-NEXT: v_fma_f32 v11, s2, v6, v2
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mul_f32_e32 v9, s10, v0
; GFX11-NEXT: v_mul_f32_e32 v10, s10, v0
; GFX11-NEXT: v_fma_f32 v0, -v0, s10, s14
; GFX11-NEXT: v_mul_f32_e32 v3, s22, v3
; GFX11-NEXT: v_dual_fmac_f32 v1, v6, v8 :: v_dual_mul_f32 v8, s18, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_fmac_f32_e32 v9, v0, v6
; GFX11-NEXT: v_dual_fmac_f32 v10, v7, v6 :: v_dual_mul_f32 v7, v6, v3
; GFX11-NEXT: v_fmac_f32_e32 v1, v6, v9
; GFX11-NEXT: v_mul_f32_e32 v9, s18, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_fmac_f32_e32 v10, v0, v6
; GFX11-NEXT: v_sub_f32_e32 v0, v1, v5
; GFX11-NEXT: v_fma_f32 v3, -v6, v3, v9
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_fmac_f32_e32 v7, v3, v6
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fmac_f32_e32 v5, v0, v6
; GFX11-NEXT: v_mul_f32_e32 v1, v8, v6
; GFX11-NEXT: v_mul_f32_e32 v3, s22, v3
; GFX11-NEXT: v_dual_fmac_f32 v11, v8, v6 :: v_dual_mul_f32 v8, v6, v3
; GFX11-NEXT: v_mul_f32_e32 v1, v9, v6
; GFX11-NEXT: v_fma_f32 v3, -v6, v3, v10
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_add_f32_e32 v4, v4, v10
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_dual_mul_f32 v3, v4, v6 :: v_dual_fmaak_f32 v4, s0, v5, 0x3ca3d70a
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_add_f32_e32 v4, v4, v11
; GFX11-NEXT: v_fma_f32 v0, v2, s26, -v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fmac_f32_e32 v1, v0, v6
; GFX11-NEXT: v_mul_f32_e32 v0, v2, v6
; GFX11-NEXT: v_mul_f32_e32 v2, v7, v4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_f32_e32 v1, v3, v1
; GFX11-NEXT: v_fmac_f32_e32 v8, v3, v6
; GFX11-NEXT: v_dual_mul_f32 v3, v4, v6 :: v_dual_fmamk_f32 v4, v5, 0x3c23d70a, v7
; GFX11-NEXT: v_dual_mul_f32 v1, v3, v1 :: v_dual_mul_f32 v2, v8, v4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fmac_f32_e32 v1, v2, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_max_f32_e32 v0, 0, v1
; GFX11-NEXT: ; return to shader part epilog
.entry:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ define amdgpu_kernel void @mad_f16_imm_a(
}

; GCN-LABEL: {{^}}mad_f16_imm_b:
; GCN: v_mac_f16_e32 {{v[0-9]+}}, 0x4800, {{v[0-9]+$}}
; GCN: v_madmk_f16 {{v[0-9]+}}, {{v[0-9]+}}, 0x4800, {{v[0-9]+$}}
define amdgpu_kernel void @mad_f16_imm_b(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_a(
; SI-NEXT: s_mov_b32 s1, s5
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-NEXT: v_mac_f32_e32 v1, 0x40400000, v0
; SI-NEXT: v_cvt_f16_f32_e32 v0, v1
; SI-NEXT: v_madmk_f32 v0, v0, 0x40400000, v1
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
Expand All @@ -280,8 +280,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_a(
; VI-FLUSH-NEXT: s_waitcnt vmcnt(0)
; VI-FLUSH-NEXT: s_mov_b32 s0, s4
; VI-FLUSH-NEXT: s_mov_b32 s1, s5
; VI-FLUSH-NEXT: v_mac_f16_e32 v1, 0x4200, v0
; VI-FLUSH-NEXT: buffer_store_short v1, off, s[0:3], 0
; VI-FLUSH-NEXT: v_madmk_f16 v0, v0, 0x4200, v1
; VI-FLUSH-NEXT: buffer_store_short v0, off, s[0:3], 0
; VI-FLUSH-NEXT: s_endpgm
;
; VI-DENORM-LABEL: fmuladd_f16_imm_a:
Expand Down Expand Up @@ -353,8 +353,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_a(
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
; GFX10-DENORM-NEXT: s_mov_b32 s0, s4
; GFX10-DENORM-NEXT: s_mov_b32 s1, s5
; GFX10-DENORM-NEXT: v_fmac_f16_e32 v1, 0x4200, v0
; GFX10-DENORM-NEXT: buffer_store_short v1, off, s[0:3], 0
; GFX10-DENORM-NEXT: v_fmamk_f16 v0, v0, 0x4200, v1
; GFX10-DENORM-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX10-DENORM-NEXT: s_endpgm
;
; GFX11-FLUSH-LABEL: fmuladd_f16_imm_a:
Expand Down Expand Up @@ -442,8 +442,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_b(
; SI-NEXT: s_mov_b32 s1, s5
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
; SI-NEXT: v_mac_f32_e32 v1, 0x40400000, v0
; SI-NEXT: v_cvt_f16_f32_e32 v0, v1
; SI-NEXT: v_madmk_f32 v0, v0, 0x40400000, v1
; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
Expand All @@ -466,8 +466,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_b(
; VI-FLUSH-NEXT: s_waitcnt vmcnt(0)
; VI-FLUSH-NEXT: s_mov_b32 s0, s4
; VI-FLUSH-NEXT: s_mov_b32 s1, s5
; VI-FLUSH-NEXT: v_mac_f16_e32 v1, 0x4200, v0
; VI-FLUSH-NEXT: buffer_store_short v1, off, s[0:3], 0
; VI-FLUSH-NEXT: v_madmk_f16 v0, v0, 0x4200, v1
; VI-FLUSH-NEXT: buffer_store_short v0, off, s[0:3], 0
; VI-FLUSH-NEXT: s_endpgm
;
; VI-DENORM-LABEL: fmuladd_f16_imm_b:
Expand Down Expand Up @@ -539,8 +539,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_b(
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
; GFX10-DENORM-NEXT: s_mov_b32 s0, s4
; GFX10-DENORM-NEXT: s_mov_b32 s1, s5
; GFX10-DENORM-NEXT: v_fmac_f16_e32 v1, 0x4200, v0
; GFX10-DENORM-NEXT: buffer_store_short v1, off, s[0:3], 0
; GFX10-DENORM-NEXT: v_fmamk_f16 v0, v0, 0x4200, v1
; GFX10-DENORM-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX10-DENORM-NEXT: s_endpgm
;
; GFX11-FLUSH-LABEL: fmuladd_f16_imm_b:
Expand Down
73 changes: 31 additions & 42 deletions llvm/test/CodeGen/AMDGPU/llvm.log.ll

Large diffs are not rendered by default.

73 changes: 31 additions & 42 deletions llvm/test/CodeGen/AMDGPU/llvm.log10.ll

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions llvm/test/CodeGen/AMDGPU/madmk.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone
; GCN-LABEL: {{^}}madmk_f32:
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 glc{{$}}
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; GCN: v_mac_f32_e32 [[VB]], 0x41200000, [[VA]]
; GCN: v_madmk_f32 {{v[0-9]+}}, [[VA]], 0x41200000, [[VB]]
define amdgpu_kernel void @madmk_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
Expand Down Expand Up @@ -96,7 +96,7 @@ define amdgpu_kernel void @s_s_madmk_f32(ptr addrspace(1) noalias %out, [8 x i32
; GCN-DAG: s_load_dword [[SREG:s[0-9]+]]
; GCN-DAG: buffer_load_dword [[VREG1:v[0-9]+]]
; GCN: v_mov_b32_e32 [[VREG2:v[0-9]+]], [[SREG]]
; GCN: v_mac_f32_e32 [[VREG2]], 0x41200000, [[VREG1]]
; GCN: v_madmk_f32 {{v[0-9]+}}, [[VREG1]], 0x41200000, [[VREG2]]
; GCN: s_endpgm
define amdgpu_kernel void @v_s_madmk_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, float %b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
Expand Down Expand Up @@ -171,8 +171,9 @@ define amdgpu_kernel void @no_madmk_src2_modifier_f32(ptr addrspace(1) noalias %

; GCN-LABEL: {{^}}madmk_add_inline_imm_f32:
; GCN: buffer_load_dword [[A:v[0-9]+]]
; GCN: s_mov_b32 [[SK:s[0-9]+]], 0x41200000
; GCN: v_mad_f32 {{v[0-9]+}}, [[A]], [[SK]], 2.0
; GCN: v_mov_b32_e32 [[B:v[0-9]+]], 2.0
; GCN: v_madmk_f32 {{v[0-9]+}}, [[A]], 0x41200000, [[B]]

define amdgpu_kernel void @madmk_add_inline_imm_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/operand-folding.ll
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ entry:
; A subregister use operand should not be tied.
; CHECK-LABEL: {{^}}no_fold_tied_subregister:
; CHECK: buffer_load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; CHECK: v_mac_f32_e32 v[[LO]], 0x41200000, v[[HI]]
; CHECK: buffer_store_dword v[[LO]]
; CHECK: v_madmk_f32 v[[RES:[0-9]+]], v[[HI]], 0x41200000, v[[LO]]
; CHECK: buffer_store_dword v[[RES]]
define amdgpu_kernel void @no_fold_tied_subregister() #1 {
%tmp1 = load volatile <2 x float>, ptr addrspace(1) undef
%tmp2 = extractelement <2 x float> %tmp1, i32 0
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/AMDGPU/sdiv64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GCN-NEXT: s_sub_u32 s4, 0, s10
; GCN-NEXT: s_subb_u32 s5, 0, s11
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_ashr_i32 s12, s3, 31
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: s_add_u32 s2, s2, s12
Expand Down Expand Up @@ -247,12 +247,12 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) {
; GCN-NEXT: v_cvt_f32_u32_e32 v6, v2
; GCN-NEXT: v_sub_i32_e32 v7, vcc, 0, v3
; GCN-NEXT: v_subb_u32_e32 v8, vcc, 0, v2, vcc
; GCN-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6
; GCN-NEXT: v_madmk_f32 v5, v6, 0x4f800000, v5
; GCN-NEXT: v_rcp_f32_e32 v5, v5
; GCN-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
; GCN-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5
; GCN-NEXT: v_trunc_f32_e32 v6, v6
; GCN-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6
; GCN-NEXT: v_madmk_f32 v5, v6, 0xcf800000, v5
; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5
; GCN-NEXT: v_cvt_u32_f32_e32 v6, v6
; GCN-NEXT: v_mul_hi_u32 v9, v7, v5
Expand Down Expand Up @@ -1093,12 +1093,12 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: v_cvt_f32_u32_e32 v1, s3
; GCN-NEXT: s_sub_u32 s4, 0, s2
; GCN-NEXT: s_subb_u32 s5, 0, s3
; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_mul_lo_u32 v2, s4, v1
Expand Down Expand Up @@ -1287,12 +1287,12 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) {
; GCN-NEXT: v_cvt_f32_u32_e32 v4, v1
; GCN-NEXT: v_sub_i32_e32 v5, vcc, 0, v0
; GCN-NEXT: v_subb_u32_e32 v6, vcc, 0, v1, vcc
; GCN-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4
; GCN-NEXT: v_madmk_f32 v3, v4, 0x4f800000, v3
; GCN-NEXT: v_rcp_f32_e32 v3, v3
; GCN-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3
; GCN-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3
; GCN-NEXT: v_trunc_f32_e32 v4, v4
; GCN-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4
; GCN-NEXT: v_madmk_f32 v3, v4, 0xcf800000, v3
; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3
; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4
; GCN-NEXT: v_mul_hi_u32 v7, v5, v3
Expand Down Expand Up @@ -1484,12 +1484,12 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) {
; GCN-NEXT: v_cvt_f32_u32_e32 v4, v1
; GCN-NEXT: v_sub_i32_e32 v5, vcc, 0, v0
; GCN-NEXT: v_subb_u32_e32 v6, vcc, 0, v1, vcc
; GCN-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4
; GCN-NEXT: v_madmk_f32 v3, v4, 0x4f800000, v3
; GCN-NEXT: v_rcp_f32_e32 v3, v3
; GCN-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3
; GCN-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3
; GCN-NEXT: v_trunc_f32_e32 v4, v4
; GCN-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4
; GCN-NEXT: v_madmk_f32 v3, v4, 0xcf800000, v3
; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3
; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4
; GCN-NEXT: v_mul_hi_u32 v7, v5, v3
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/srem64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GCN-NEXT: s_sub_u32 s0, 0, s12
; GCN-NEXT: s_subb_u32 s1, 0, s13
; GCN-NEXT: s_mov_b32 s4, s8
; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: s_mov_b32 s5, s9
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_mul_lo_u32 v2, s0, v1
Expand Down Expand Up @@ -226,12 +226,12 @@ define i64 @v_test_srem(i64 %x, i64 %y) {
; GCN-NEXT: v_cvt_f32_u32_e32 v5, v2
; GCN-NEXT: v_sub_i32_e32 v6, vcc, 0, v3
; GCN-NEXT: v_subb_u32_e32 v7, vcc, 0, v2, vcc
; GCN-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; GCN-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4
; GCN-NEXT: v_rcp_f32_e32 v4, v4
; GCN-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GCN-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; GCN-NEXT: v_trunc_f32_e32 v5, v5
; GCN-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5
; GCN-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4
; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4
; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5
; GCN-NEXT: v_mul_hi_u32 v8, v6, v4
Expand Down Expand Up @@ -894,15 +894,15 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
; GCN-NEXT: s_sub_u32 s0, 0, s12
; GCN-NEXT: s_subb_u32 s1, 0, s13
; GCN-NEXT: s_ashr_i32 s6, s7, 31
; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: s_mov_b32 s7, s6
; GCN-NEXT: s_mov_b32 s8, s4
; GCN-NEXT: s_mov_b32 s9, s5
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_mul_lo_u32 v2, s0, v1
Expand Down Expand Up @@ -1290,13 +1290,13 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: s_sub_u32 s2, 0, s8
; GCN-NEXT: s_subb_u32 s3, 0, s9
; GCN-NEXT: s_mov_b32 s4, s0
; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: s_mov_b32 s5, s1
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_mul_lo_u32 v2, s2, v1
Expand Down Expand Up @@ -1481,12 +1481,12 @@ define i64 @v_test_srem_k_num_i64(i64 %x) {
; GCN-NEXT: v_cvt_f32_u32_e32 v3, v1
; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0, v0
; GCN-NEXT: v_subb_u32_e32 v5, vcc, 0, v1, vcc
; GCN-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
; GCN-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2
; GCN-NEXT: v_rcp_f32_e32 v2, v2
; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; GCN-NEXT: v_trunc_f32_e32 v3, v3
; GCN-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
; GCN-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3
; GCN-NEXT: v_mul_hi_u32 v6, v4, v2
Expand Down Expand Up @@ -1676,12 +1676,12 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) {
; GCN-NEXT: v_cvt_f32_u32_e32 v3, v1
; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0, v0
; GCN-NEXT: v_subb_u32_e32 v5, vcc, 0, v1, vcc
; GCN-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
; GCN-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2
; GCN-NEXT: v_rcp_f32_e32 v2, v2
; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; GCN-NEXT: v_trunc_f32_e32 v3, v3
; GCN-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
; GCN-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3
; GCN-NEXT: v_mul_hi_u32 v6, v4, v2
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/udiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2527,7 +2527,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
; SI-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; SI-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; SI-NEXT: v_trunc_f32_e32 v3, v3
; SI-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
; SI-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; SI-NEXT: v_cvt_u32_f32_e32 v2, v2
; SI-NEXT: v_cvt_u32_f32_e32 v3, v3
; SI-NEXT: v_mul_hi_u32 v4, v2, s4
Expand Down Expand Up @@ -2626,7 +2626,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
; VI-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; VI-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; VI-NEXT: v_trunc_f32_e32 v3, v3
; VI-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
; VI-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; VI-NEXT: v_cvt_u32_f32_e32 v6, v2
; VI-NEXT: v_cvt_u32_f32_e32 v7, v3
; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0
Expand Down Expand Up @@ -2713,7 +2713,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; GCN-NEXT: v_trunc_f32_e32 v3, v3
; GCN-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
; GCN-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v6, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v7, v3
; GCN-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/AMDGPU/udiv64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ define amdgpu_kernel void @s_test_udiv_i64(ptr addrspace(1) %out, i64 %x, i64 %y
; GCN-NEXT: v_cvt_f32_u32_e32 v1, s9
; GCN-NEXT: s_sub_u32 s4, 0, s8
; GCN-NEXT: s_subb_u32 s5, 0, s9
; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_mul_lo_u32 v2, s4, v1
Expand Down Expand Up @@ -211,12 +211,12 @@ define i64 @v_test_udiv_i64(i64 %x, i64 %y) {
; GCN-NEXT: v_cvt_f32_u32_e32 v5, v3
; GCN-NEXT: v_sub_i32_e32 v6, vcc, 0, v2
; GCN-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc
; GCN-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; GCN-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4
; GCN-NEXT: v_rcp_f32_e32 v4, v4
; GCN-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GCN-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; GCN-NEXT: v_trunc_f32_e32 v5, v5
; GCN-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5
; GCN-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4
; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5
; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4
; GCN-NEXT: v_mul_lo_u32 v8, v6, v5
Expand Down Expand Up @@ -688,7 +688,7 @@ define amdgpu_kernel void @s_test_udiv24_i48(ptr addrspace(1) %out, i48 %x, i48
; GCN-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1
; GCN-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1
; GCN-NEXT: v_trunc_f32_e32 v2, v2
; GCN-NEXT: v_mac_f32_e32 v1, 0xcf800000, v2
; GCN-NEXT: v_madmk_f32 v1, v2, 0xcf800000, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: s_mov_b32 s2, -1
Expand Down Expand Up @@ -886,12 +886,12 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: v_cvt_f32_u32_e32 v1, s3
; GCN-NEXT: s_sub_u32 s4, 0, s2
; GCN-NEXT: s_subb_u32 s5, 0, s3
; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_mul_lo_u32 v2, s4, v1
Expand Down Expand Up @@ -1067,12 +1067,12 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) {
; GCN-NEXT: v_cvt_f32_u32_e32 v3, v1
; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0, v0
; GCN-NEXT: v_subb_u32_e32 v5, vcc, 0, v1, vcc
; GCN-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
; GCN-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2
; GCN-NEXT: v_rcp_f32_e32 v2, v2
; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; GCN-NEXT: v_trunc_f32_e32 v3, v3
; GCN-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
; GCN-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3
; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2
; GCN-NEXT: v_mul_lo_u32 v6, v4, v3
Expand Down Expand Up @@ -1335,7 +1335,7 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: s_mov_b32 s2, -1
Expand Down Expand Up @@ -1509,7 +1509,7 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) {
; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; GCN-NEXT: v_trunc_f32_e32 v3, v3
; GCN-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
; GCN-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3
; GCN-NEXT: v_mul_hi_u32 v4, v2, s4
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/AMDGPU/urem64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y
; GCN-NEXT: s_sub_u32 s0, 0, s12
; GCN-NEXT: s_subb_u32 s1, 0, s13
; GCN-NEXT: s_mov_b32 s4, s8
; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: s_mov_b32 s5, s9
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_mul_lo_u32 v2, s0, v1
Expand Down Expand Up @@ -221,12 +221,12 @@ define i64 @v_test_urem_i64(i64 %x, i64 %y) {
; GCN-NEXT: v_cvt_f32_u32_e32 v5, v3
; GCN-NEXT: v_sub_i32_e32 v6, vcc, 0, v2
; GCN-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc
; GCN-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
; GCN-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4
; GCN-NEXT: v_rcp_f32_e32 v4, v4
; GCN-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GCN-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; GCN-NEXT: v_trunc_f32_e32 v5, v5
; GCN-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5
; GCN-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4
; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5
; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4
; GCN-NEXT: v_mul_lo_u32 v8, v6, v5
Expand Down Expand Up @@ -716,13 +716,13 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: s_sub_u32 s0, 0, s6
; GCN-NEXT: s_subb_u32 s1, 0, s7
; GCN-NEXT: s_mov_b32 s8, s4
; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: s_mov_b32 s9, s5
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_mul_lo_u32 v2, s0, v1
Expand Down Expand Up @@ -903,7 +903,7 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -1086,12 +1086,12 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) {
; GCN-NEXT: v_cvt_f32_u32_e32 v3, v1
; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0, v0
; GCN-NEXT: v_subb_u32_e32 v5, vcc, 0, v1, vcc
; GCN-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
; GCN-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2
; GCN-NEXT: v_rcp_f32_e32 v2, v2
; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; GCN-NEXT: v_trunc_f32_e32 v3, v3
; GCN-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
; GCN-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3
; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2
; GCN-NEXT: v_mul_lo_u32 v6, v4, v3
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/v_mac.ll
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ bb:

; SI: v_add_f32_e32 [[TMP2:v[0-9]+]], [[CVT_A]], [[CVT_A]]
; SI: v_mad_f32 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
; SI: v_mac_f32_e32 v{{[0-9]+}}, 0x41000000, v{{[0-9]+}}
; SI: v_madmk_f32 v{{[0-9]+}}, v{{[0-9]+}}, 0x41000000, v{{[0-9]+}}

; VI-FLUSH: v_add_f16_e32 [[TMP2:v[0-9]+]], [[A]], [[A]]
; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/wave32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -743,12 +743,12 @@ define amdgpu_kernel void @test_udiv64(ptr addrspace(1) %arg) #0 {
; GFX1032-NEXT: v_cvt_f32_u32_e32 v1, s5
; GFX1032-NEXT: s_sub_u32 s9, 0, s4
; GFX1032-NEXT: s_subb_u32 s10, 0, s5
; GFX1032-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; GFX1032-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GFX1032-NEXT: v_rcp_f32_e32 v0, v0
; GFX1032-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX1032-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX1032-NEXT: v_trunc_f32_e32 v1, v1
; GFX1032-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; GFX1032-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX1032-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX1032-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX1032-NEXT: v_readfirstlane_b32 s0, v1
Expand Down Expand Up @@ -905,12 +905,12 @@ define amdgpu_kernel void @test_udiv64(ptr addrspace(1) %arg) #0 {
; GFX1064-NEXT: v_cvt_f32_u32_e32 v1, s5
; GFX1064-NEXT: s_sub_u32 s9, 0, s4
; GFX1064-NEXT: s_subb_u32 s10, 0, s5
; GFX1064-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
; GFX1064-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GFX1064-NEXT: v_rcp_f32_e32 v0, v0
; GFX1064-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX1064-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX1064-NEXT: v_trunc_f32_e32 v1, v1
; GFX1064-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
; GFX1064-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX1064-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX1064-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX1064-NEXT: v_readfirstlane_b32 s8, v1
Expand Down