diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td index 8eecb1c1019ae..9319d43e2ce1f 100644 --- a/llvm/lib/Target/AMDGPU/SISchedule.td +++ b/llvm/lib/Target/AMDGPU/SISchedule.td @@ -172,7 +172,6 @@ multiclass SICommonWriteRes { def : HWVALUWriteRes; def : HWVALUWriteRes; - def : HWVALUWriteRes; def : HWVALUWriteRes; let ReleaseAtCycles = [4] in @@ -231,6 +230,7 @@ def : HWVALUWriteRes; def : HWVALUWriteRes; def : HWVALUWriteRes; def : HWVALUWriteRes; +def : HWVALUWriteRes; def : HWVALUWriteRes; } // End RetireOOO = 1 @@ -249,6 +249,7 @@ def : HWVALUWriteRes; def : HWVALUWriteRes; def : HWVALUWriteRes; def : HWVALUWriteRes; +def : HWVALUWriteRes; def : HWVALUWriteRes; } // End RetireOOO = 1 @@ -269,6 +270,7 @@ def : HWVALUWriteRes; def : HWVALUWriteRes; def : HWVALUWriteRes; def : HWVALUWriteRes; +def : HWVALUWriteRes; def : HWVALUWriteRes; def : HWVALUWriteRes; def : HWVALUWriteRes; @@ -292,6 +294,7 @@ def : HWVALUWriteRes; def : HWVALUWriteRes; def : HWVALUWriteRes; def : HWVALUWriteRes; +def : HWVALUWriteRes; def : HWVALUWriteRes; def : HWVALUWriteRes; def : HWVALUWriteRes; @@ -326,6 +329,7 @@ def : HWVALUWriteRes; def : HWVALUWriteRes; def : HWVALUWriteRes; def : HWVALUWriteRes; +def : HWVALUWriteRes; def : HWVALUWriteRes; def : HWVALUWriteRes; def : HWVALUWriteRes; diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index 10e523d1a0cf1..e8a4329e7f5cf 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -30434,15 +30434,15 @@ define bfloat @v_exp_bf16(bfloat %a) { ; GFX950-NEXT: v_sub_f32_e32 v3, v1, v2 ; GFX950-NEXT: v_fma_f32 v1, v0, s0, -v1 ; GFX950-NEXT: v_fmamk_f32 v1, v0, 0x32a5705f, v1 +; GFX950-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX950-NEXT: v_add_f32_e32 v1, v3, v1 ; GFX950-NEXT: v_exp_f32_e32 v1, v1 -; GFX950-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX950-NEXT: s_mov_b32 s0, 0xc2ce8ed0 +; GFX950-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX950-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 ; GFX950-NEXT: s_mov_b32 s0, 0x42b17218 -; GFX950-NEXT: v_ldexp_f32 v1, v1, v2 -; GFX950-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX950-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX950-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX950-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v0 ; GFX950-NEXT: s_nop 1 ; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc @@ -30834,15 +30834,15 @@ define bfloat @v_exp10_bf16(bfloat %a) { ; GFX950-NEXT: v_sub_f32_e32 v3, v1, v2 ; GFX950-NEXT: v_fma_f32 v1, v0, s0, -v1 ; GFX950-NEXT: v_fmamk_f32 v1, v0, 0x33979a37, v1 +; GFX950-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX950-NEXT: v_add_f32_e32 v1, v3, v1 ; GFX950-NEXT: v_exp_f32_e32 v1, v1 -; GFX950-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX950-NEXT: s_mov_b32 s0, 0xc23369f4 +; GFX950-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX950-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 ; GFX950-NEXT: s_mov_b32 s0, 0x421a209b -; GFX950-NEXT: v_ldexp_f32 v1, v1, v2 -; GFX950-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX950-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX950-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX950-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v0 ; GFX950-NEXT: s_nop 1 ; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc