diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 250e3e350c02e..2a40129661102 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1445,13 +1445,13 @@ def FeatureISAVersion11_Common : FeatureSet< FeatureFlatAtomicFaddF32Inst, FeatureImageInsts, FeaturePackedTID, - FeatureVcmpxPermlaneHazard, - FeatureMADIntraFwdBug]>; + FeatureVcmpxPermlaneHazard]>; def FeatureISAVersion11_0_Common : FeatureSet< !listconcat(FeatureISAVersion11_Common.Features, [FeatureMSAALoadDstSelBug, - FeatureVALUTransUseHazard])>; + FeatureVALUTransUseHazard, + FeatureMADIntraFwdBug])>; def FeatureISAVersion11_0_0 : FeatureSet< !listconcat(FeatureISAVersion11_0_Common.Features, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir index d07434dcd38c1..0e2ea18c74e4a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-mad_64_32.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX11 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX1100 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX1150 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX12 %s --- @@ -20,14 +21,23 @@ body: | ; GFX10-NEXT: [[V_MAD_U64_U32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_U64_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_U64_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_U64_U32_e64_]], implicit [[V_MAD_U64_U32_e64_1]] ; - ; GFX11-LABEL: name: mad_u64_u32_vvv - ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3 - ; GFX11-NEXT: [[V_MAD_U64_U32_gfx11_e64_:%[0-9]+]]:vreg_64, [[V_MAD_U64_U32_gfx11_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_U64_U32_gfx11_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAD_U64_U32_gfx11_e64_]], implicit [[V_MAD_U64_U32_gfx11_e64_1]] + ; GFX1100-LABEL: name: mad_u64_u32_vvv + ; GFX1100: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX1100-NEXT: {{ $}} + ; GFX1100-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX1100-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX1100-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3 + ; GFX1100-NEXT: [[V_MAD_U64_U32_gfx11_e64_:%[0-9]+]]:vreg_64, [[V_MAD_U64_U32_gfx11_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_U64_U32_gfx11_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX1100-NEXT: S_ENDPGM 0, implicit [[V_MAD_U64_U32_gfx11_e64_]], implicit [[V_MAD_U64_U32_gfx11_e64_1]] + ; + ; GFX1150-LABEL: name: mad_u64_u32_vvv + ; GFX1150: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX1150-NEXT: {{ $}} + ; GFX1150-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3 + ; GFX1150-NEXT: [[V_MAD_U64_U32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_U64_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_U64_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX1150-NEXT: S_ENDPGM 0, implicit [[V_MAD_U64_U32_e64_]], implicit [[V_MAD_U64_U32_e64_1]] ; ; GFX12-LABEL: name: mad_u64_u32_vvv ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 @@ -63,14 +73,23 @@ body: | ; GFX10-NEXT: [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_I64_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_MAD_I64_I32_e64_]], implicit [[V_MAD_I64_I32_e64_1]] ; - ; GFX11-LABEL: name: mad_i64_i32_vvv - ; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3 - ; GFX11-NEXT: [[V_MAD_I64_I32_gfx11_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_gfx11_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_I64_I32_gfx11_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec - ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MAD_I64_I32_gfx11_e64_]], implicit [[V_MAD_I64_I32_gfx11_e64_1]] + ; GFX1100-LABEL: name: mad_i64_i32_vvv + ; GFX1100: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX1100-NEXT: {{ $}} + ; GFX1100-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX1100-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX1100-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3 + ; GFX1100-NEXT: [[V_MAD_I64_I32_gfx11_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_gfx11_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_I64_I32_gfx11_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX1100-NEXT: S_ENDPGM 0, implicit [[V_MAD_I64_I32_gfx11_e64_]], implicit [[V_MAD_I64_I32_gfx11_e64_1]] + ; + ; GFX1150-LABEL: name: mad_i64_i32_vvv + ; GFX1150: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX1150-NEXT: {{ $}} + ; GFX1150-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX1150-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX1150-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3 + ; GFX1150-NEXT: [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_I64_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX1150-NEXT: S_ENDPGM 0, implicit [[V_MAD_I64_I32_e64_]], implicit [[V_MAD_I64_I32_e64_1]] ; ; GFX12-LABEL: name: mad_i64_i32_vvv ; GFX12: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll index 450ce59069142..14bcc4f994f89 100644 --- a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll +++ b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll @@ -2,7 +2,8 @@ ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI %s ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=SI %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX1100 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX1150 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s ; On GFX11, ensure vdst and src2 do not partially overlap. Full overlap is ok. @@ -29,13 +30,19 @@ define i64 @mad_i64_i32_sextops(i32 %arg0, i32 %arg1, i64 %arg2) #0 { ; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: mad_i64_i32_sextops: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: mad_i64_i32_sextops: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3] +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; +; GFX1150-LABEL: mad_i64_i32_sextops: +; GFX1150: ; %bb.0: +; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, v[2:3] +; GFX1150-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: mad_i64_i32_sextops: ; GFX12: ; %bb.0: @@ -75,13 +82,19 @@ define i64 @mad_i64_i32_sextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) #0 { ; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: mad_i64_i32_sextops_commute: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: mad_i64_i32_sextops_commute: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3] +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; +; GFX1150-LABEL: mad_i64_i32_sextops_commute: +; GFX1150: ; %bb.0: +; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, v[2:3] +; GFX1150-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: mad_i64_i32_sextops_commute: ; GFX12: ; %bb.0: @@ -121,13 +134,19 @@ define i64 @mad_u64_u32_zextops(i32 %arg0, i32 %arg1, i64 %arg2) #0 { ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: mad_u64_u32_zextops: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: mad_u64_u32_zextops: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3] +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; +; GFX1150-LABEL: mad_u64_u32_zextops: +; GFX1150: ; %bb.0: +; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[2:3] +; GFX1150-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: mad_u64_u32_zextops: ; GFX12: ; %bb.0: @@ -167,13 +186,19 @@ define i64 @mad_u64_u32_zextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) #0 { ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: mad_u64_u32_zextops_commute: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: mad_u64_u32_zextops_commute: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3] +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; +; GFX1150-LABEL: mad_u64_u32_zextops_commute: +; GFX1150: ; %bb.0: +; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[2:3] +; GFX1150-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: mad_u64_u32_zextops_commute: ; GFX12: ; %bb.0: @@ -277,35 +302,65 @@ define i128 @mad_i64_i32_sextops_i32_i128(i32 %arg0, i32 %arg1, i128 %arg2) #0 { ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v8, v5, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: mad_i64_i32_sextops_i32_i128: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, v0, v1, 0 -; GFX11-NEXT: v_mov_b32_e32 v8, 0 -; GFX11-NEXT: v_ashrrev_i32_e32 v14, 31, v0 -; GFX11-NEXT: v_ashrrev_i32_e32 v15, 31, v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_mad_u64_u32 v[9:10], null, v14, v1, v[7:8] -; GFX11-NEXT: v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v10, v8 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_mad_u64_u32 v[7:8], null, v0, v15, v[9:10] -; GFX11-NEXT: v_mov_b32_e32 v10, v8 -; GFX11-NEXT: v_mad_i64_i32 v[8:9], null, v1, v14, 0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_add_co_u32 v10, s0, v11, v10 -; GFX11-NEXT: v_add_co_ci_u32_e64 v11, null, 0, 0, s0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mad_i64_i32 v[12:13], null, v15, v0, v[8:9] -; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v14, v15, v[10:11] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_add_co_u32 v8, vcc_lo, v0, v12 -; GFX11-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v1, v13, vcc_lo -; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v6, v2 -; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v7, v3, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v8, v4, vcc_lo -; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v9, v5, vcc_lo -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: mad_i64_i32_sextops_i32_i128: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mad_u64_u32 v[6:7], null, v0, v1, 0 +; GFX1100-NEXT: v_mov_b32_e32 v8, 0 +; GFX1100-NEXT: v_ashrrev_i32_e32 v14, 31, v0 +; GFX1100-NEXT: v_ashrrev_i32_e32 v15, 31, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mad_u64_u32 v[9:10], null, v14, v1, v[7:8] +; GFX1100-NEXT: v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v10, v8 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mad_u64_u32 v[7:8], null, v0, v15, v[9:10] +; GFX1100-NEXT: v_mov_b32_e32 v10, v8 +; GFX1100-NEXT: v_mad_i64_i32 v[8:9], null, v1, v14, 0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_add_co_u32 v10, s0, v11, v10 +; GFX1100-NEXT: v_add_co_ci_u32_e64 v11, null, 0, 0, s0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_mad_i64_i32 v[12:13], null, v15, v0, v[8:9] +; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v14, v15, v[10:11] +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_add_co_u32 v8, vcc_lo, v0, v12 +; GFX1100-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v1, v13, vcc_lo +; GFX1100-NEXT: v_add_co_u32 v0, vcc_lo, v6, v2 +; GFX1100-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v7, v3, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1100-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v8, v4, vcc_lo +; GFX1100-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v9, v5, vcc_lo +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; +; GFX1150-LABEL: mad_i64_i32_sextops_i32_i128: +; GFX1150: ; %bb.0: +; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1150-NEXT: v_mad_u64_u32 v[6:7], null, v0, v1, 0 +; GFX1150-NEXT: v_mov_b32_e32 v8, 0 +; GFX1150-NEXT: v_ashrrev_i32_e32 v12, 31, v0 +; GFX1150-NEXT: v_ashrrev_i32_e32 v13, 31, v1 +; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1150-NEXT: v_mad_u64_u32 v[9:10], null, v12, v1, v[7:8] +; GFX1150-NEXT: v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v10, v8 +; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1150-NEXT: v_mad_u64_u32 v[7:8], null, v0, v13, v[9:10] +; GFX1150-NEXT: v_mov_b32_e32 v10, v8 +; GFX1150-NEXT: v_mad_i64_i32 v[8:9], null, v1, v12, 0 +; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1150-NEXT: v_add_co_u32 v10, s0, v11, v10 +; GFX1150-NEXT: v_add_co_ci_u32_e64 v11, null, 0, 0, s0 +; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v13, v0, v[8:9] +; GFX1150-NEXT: v_mad_u64_u32 v[8:9], null, v12, v13, v[10:11] +; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1150-NEXT: v_add_co_u32 v8, vcc_lo, v8, v0 +; GFX1150-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v9, v1, vcc_lo +; GFX1150-NEXT: v_add_co_u32 v0, vcc_lo, v6, v2 +; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v7, v3, vcc_lo +; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1150-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v8, v4, vcc_lo +; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v9, v5, vcc_lo +; GFX1150-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: mad_i64_i32_sextops_i32_i128: ; GFX12: ; %bb.0: @@ -369,13 +424,19 @@ define i63 @mad_i64_i32_sextops_i32_i63(i32 %arg0, i32 %arg1, i63 %arg2) #0 { ; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: mad_i64_i32_sextops_i32_i63: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: mad_i64_i32_sextops_i32_i63: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3] +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; +; GFX1150-LABEL: mad_i64_i32_sextops_i32_i63: +; GFX1150: ; %bb.0: +; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, v[2:3] +; GFX1150-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: mad_i64_i32_sextops_i32_i63: ; GFX12: ; %bb.0: @@ -423,14 +484,23 @@ define i63 @mad_i64_i32_sextops_i31_i63(i31 %arg0, i31 %arg1, i63 %arg2) #0 { ; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v1, v[2:3] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: mad_i64_i32_sextops_i31_i63: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_bfe_i32 v4, v1, 0, 31 -; GFX11-NEXT: v_bfe_i32 v5, v0, 0, 31 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: mad_i64_i32_sextops_i31_i63: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_bfe_i32 v4, v1, 0, 31 +; GFX1100-NEXT: v_bfe_i32 v5, v0, 0, 31 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3] +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; +; GFX1150-LABEL: mad_i64_i32_sextops_i31_i63: +; GFX1150: ; %bb.0: +; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1150-NEXT: v_bfe_i32 v1, v1, 0, 31 +; GFX1150-NEXT: v_bfe_i32 v0, v0, 0, 31 +; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, v[2:3] +; GFX1150-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: mad_i64_i32_sextops_i31_i63: ; GFX12: ; %bb.0: @@ -484,17 +554,27 @@ define i64 @mad_i64_i32_extops_i32_i64(i32 %arg0, i32 %arg1, i64 %arg2) #0 { ; GFX9-NEXT: v_mov_b32_e32 v1, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: mad_i64_i32_extops_i32_i64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3] -; GFX11-NEXT: v_ashrrev_i32_e32 v5, 31, v5 -; GFX11-NEXT: v_mov_b32_e32 v3, v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mad_u64_u32 v[1:2], null, v5, v4, v[3:4] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: mad_i64_i32_extops_i32_i64: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3] +; GFX1100-NEXT: v_ashrrev_i32_e32 v5, 31, v5 +; GFX1100-NEXT: v_mov_b32_e32 v3, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mad_u64_u32 v[1:2], null, v5, v4, v[3:4] +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; +; GFX1150-LABEL: mad_i64_i32_extops_i32_i64: +; GFX1150: ; %bb.0: +; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1150-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0 +; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3] +; GFX1150-NEXT: v_ashrrev_i32_e32 v2, 31, v5 +; GFX1150-NEXT: v_mad_u64_u32 v[1:2], null, v2, v4, v[1:2] +; GFX1150-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: mad_i64_i32_extops_i32_i64: ; GFX12: ; %bb.0: @@ -538,13 +618,19 @@ define i64 @mad_u64_u32_bitops(i64 %arg0, i64 %arg1, i64 %arg2) #0 { ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[4:5] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: mad_u64_u32_bitops: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v3, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, v[4:5] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: mad_u64_u32_bitops: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; +; GFX1150-LABEL: mad_u64_u32_bitops: +; GFX1150: ; %bb.0: +; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v0, v2, v[4:5] +; GFX1150-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: mad_u64_u32_bitops: ; GFX12: ; %bb.0: @@ -594,17 +680,27 @@ define i64 @mad_u64_u32_bitops_lhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) # ; GFX9-NEXT: v_mov_b32_e32 v1, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: mad_u64_u32_bitops_lhs_mask_small: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v0 -; GFX11-NEXT: v_mov_b32_e32 v6, v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v2, v3, v[4:5] -; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_and_b32 v5, 1, v6 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mad_u64_u32 v[1:2], null, v5, v3, v[4:5] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: mad_u64_u32_bitops_lhs_mask_small: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v0 +; GFX1100-NEXT: v_mov_b32_e32 v6, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v2, v3, v[4:5] +; GFX1100-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_and_b32 v5, 1, v6 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mad_u64_u32 v[1:2], null, v5, v3, v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; +; GFX1150-LABEL: mad_u64_u32_bitops_lhs_mask_small: +; GFX1150: ; %bb.0: +; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1150-NEXT: v_mov_b32_e32 v3, v1 +; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v0, v2, v[4:5] +; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1150-NEXT: v_and_b32_e32 v3, 1, v3 +; GFX1150-NEXT: v_mad_u64_u32 v[1:2], null, v3, v2, v[1:2] +; GFX1150-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: mad_u64_u32_bitops_lhs_mask_small: ; GFX12: ; %bb.0: @@ -660,16 +756,26 @@ define i64 @mad_u64_u32_bitops_rhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) # ; GFX9-NEXT: v_mov_b32_e32 v1, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: mad_u64_u32_bitops_rhs_mask_small: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v6, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v6, v2, v[4:5] -; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v4, 1, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mad_u64_u32 v[1:2], null, v6, v4, v[3:4] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: mad_u64_u32_bitops_rhs_mask_small: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v6, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_mad_u64_u32 v[0:1], null, v6, v2, v[4:5] +; GFX1100-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v4, 1, v3 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mad_u64_u32 v[1:2], null, v6, v4, v[3:4] +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; +; GFX1150-LABEL: mad_u64_u32_bitops_rhs_mask_small: +; GFX1150: ; %bb.0: +; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1150-NEXT: v_mov_b32_e32 v6, v0 +; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v6, v2, v[4:5] +; GFX1150-NEXT: v_and_b32_e32 v2, 1, v3 +; GFX1150-NEXT: v_mad_u64_u32 v[1:2], null, v6, v2, v[1:2] +; GFX1150-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: mad_u64_u32_bitops_rhs_mask_small: ; GFX12: ; %bb.0: @@ -713,13 +819,19 @@ define i64 @mad_i64_i32_bitops(i64 %arg0, i64 %arg1, i64 %arg2) #0 { ; GFX9-NEXT: v_mad_i64_i32 v[0:1], s[4:5], v0, v2, v[4:5] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: mad_i64_i32_bitops: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v3, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_mad_i64_i32 v[0:1], null, v3, v2, v[4:5] -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: mad_i64_i32_bitops: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v3, v0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_mad_i64_i32 v[0:1], null, v3, v2, v[4:5] +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; +; GFX1150-LABEL: mad_i64_i32_bitops: +; GFX1150: ; %bb.0: +; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v2, v[4:5] +; GFX1150-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: mad_i64_i32_bitops: ; GFX12: ; %bb.0: @@ -762,13 +874,19 @@ define i64 @mad_i64_i32_unpack_i64ops(i64 %arg0) #0 { ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, v0, v[0:1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: mad_i64_i32_unpack_i64ops: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v1, v0, v[0:1] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: mad_i64_i32_unpack_i64ops: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mad_u64_u32 v[2:3], null, v1, v0, v[0:1] +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_dual_mov_b32 v0, v2 :: v_dual_mov_b32 v1, v3 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; +; GFX1150-LABEL: mad_i64_i32_unpack_i64ops: +; GFX1150: ; %bb.0: +; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1150-NEXT: v_mad_u64_u32 v[0:1], null, v1, v0, v[0:1] +; GFX1150-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: mad_i64_i32_unpack_i64ops: ; GFX12: ; %bb.0: @@ -912,15 +1030,25 @@ define i64 @mad_i64_i32_twice(i32 %arg0, i32 %arg1, i64 %arg2, i64 %arg3) #0 { ; GFX9-NEXT: v_xor_b32_e32 v0, v2, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: mad_i64_i32_twice: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_mad_i64_i32 v[6:7], null, v0, v1, v[2:3] -; GFX11-NEXT: v_mad_i64_i32 v[2:3], null, v0, v1, v[4:5] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_xor_b32_e32 v0, v6, v2 -; GFX11-NEXT: v_xor_b32_e32 v1, v7, v3 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: mad_i64_i32_twice: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mad_i64_i32 v[6:7], null, v0, v1, v[2:3] +; GFX1100-NEXT: v_mad_i64_i32 v[2:3], null, v0, v1, v[4:5] +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_xor_b32_e32 v0, v6, v2 +; GFX1100-NEXT: v_xor_b32_e32 v1, v7, v3 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; +; GFX1150-LABEL: mad_i64_i32_twice: +; GFX1150: ; %bb.0: +; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1150-NEXT: v_mad_i64_i32 v[2:3], null, v0, v1, v[2:3] +; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, v[4:5] +; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1150-NEXT: v_xor_b32_e32 v0, v2, v0 +; GFX1150-NEXT: v_xor_b32_e32 v1, v3, v1 +; GFX1150-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: mad_i64_i32_twice: ; GFX12: ; %bb.0: @@ -990,24 +1118,43 @@ define i64 @mad_i64_i32_thrice(i32 %arg0, i32 %arg1, i64 %arg2, i64 %arg3, i64 % ; GFX9-NEXT: v_xor_b32_e32 v0, v2, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: mad_i64_i32_thrice: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_mad_i64_i32 v[8:9], null, v0, v1, 0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v8, v2 -; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v9, v3, vcc_lo -; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v8, v4 -; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v9, v5, vcc_lo -; GFX11-NEXT: v_add_co_u32 v4, vcc_lo, v8, v6 -; GFX11-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v9, v7, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-NEXT: v_xor_b32_e32 v0, v0, v2 -; GFX11-NEXT: v_xor_b32_e32 v1, v1, v3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_xor_b32_e32 v0, v0, v4 -; GFX11-NEXT: v_xor_b32_e32 v1, v1, v5 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: mad_i64_i32_thrice: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mad_i64_i32 v[8:9], null, v0, v1, 0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_add_co_u32 v0, vcc_lo, v8, v2 +; GFX1100-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v9, v3, vcc_lo +; GFX1100-NEXT: v_add_co_u32 v2, vcc_lo, v8, v4 +; GFX1100-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v9, v5, vcc_lo +; GFX1100-NEXT: v_add_co_u32 v4, vcc_lo, v8, v6 +; GFX1100-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v9, v7, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1100-NEXT: v_xor_b32_e32 v0, v0, v2 +; GFX1100-NEXT: v_xor_b32_e32 v1, v1, v3 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_xor_b32_e32 v0, v0, v4 +; GFX1100-NEXT: v_xor_b32_e32 v1, v1, v5 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; +; GFX1150-LABEL: mad_i64_i32_thrice: +; GFX1150: ; %bb.0: +; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, 0 +; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1150-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2 +; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo +; GFX1150-NEXT: v_add_co_u32 v4, vcc_lo, v0, v4 +; GFX1150-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v5, vcc_lo +; GFX1150-NEXT: v_add_co_u32 v0, vcc_lo, v0, v6 +; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v7, vcc_lo +; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1150-NEXT: v_xor_b32_e32 v2, v2, v4 +; GFX1150-NEXT: v_xor_b32_e32 v3, v3, v5 +; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1150-NEXT: v_xor_b32_e32 v0, v2, v0 +; GFX1150-NEXT: v_xor_b32_e32 v1, v3, v1 +; GFX1150-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: mad_i64_i32_thrice: ; GFX12: ; %bb.0: @@ -1073,17 +1220,29 @@ define i64 @mad_i64_i32_secondary_use(i32 %arg0, i32 %arg1, i64 %arg2) #0 { ; GFX9-NEXT: v_xor_b32_e32 v0, v0, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: mad_i64_i32_secondary_use: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_mad_i64_i32 v[4:5], null, v0, v1, 0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v4, v2 -; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v5, v3, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_xor_b32_e32 v0, v0, v4 -; GFX11-NEXT: v_xor_b32_e32 v1, v1, v5 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX1100-LABEL: mad_i64_i32_secondary_use: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mad_i64_i32 v[4:5], null, v0, v1, 0 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_add_co_u32 v0, vcc_lo, v4, v2 +; GFX1100-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v5, v3, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_xor_b32_e32 v0, v0, v4 +; GFX1100-NEXT: v_xor_b32_e32 v1, v1, v5 +; GFX1100-NEXT: s_setpc_b64 s[30:31] +; +; GFX1150-LABEL: mad_i64_i32_secondary_use: +; GFX1150: ; %bb.0: +; GFX1150-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1150-NEXT: v_mad_i64_i32 v[0:1], null, v0, v1, 0 +; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1150-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2 +; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo +; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1150-NEXT: v_xor_b32_e32 v0, v2, v0 +; GFX1150-NEXT: v_xor_b32_e32 v1, v3, v1 +; GFX1150-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: mad_i64_i32_secondary_use: ; GFX12: ; %bb.0: