diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index fd1e57b1e95120..57ed89278b2dd0 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -48,6 +48,7 @@ class SIShrinkInstructions : public MachineFunctionPass { void shrinkMIMG(MachineInstr &MI) const; void shrinkMadFma(MachineInstr &MI) const; bool shrinkScalarLogicOp(MachineInstr &MI) const; + bool tryReplaceDeadSDST(MachineInstr &MI) const; bool instAccessReg(iterator_range &&R, Register Reg, unsigned SubReg) const; bool instReadsReg(const MachineInstr *MI, unsigned Reg, @@ -689,6 +690,22 @@ MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const { return nullptr; } +// If an instruction has dead sdst replace it with NULL register on gfx10+ +bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const { + if (ST->getGeneration() < AMDGPUSubtarget::GFX10) + return false; + + MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); + if (!Op) + return false; + Register SDstReg = Op->getReg(); + if (SDstReg.isPhysical() || !MRI->use_nodbg_empty(SDstReg)) + return false; + + Op->setReg(ST->isWave32() ? AMDGPU::SGPR_NULL : AMDGPU::SGPR_NULL64); + return true; +} + bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -822,15 +839,21 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { continue; } - if (!TII->hasVALU32BitEncoding(MI.getOpcode())) + if (!TII->hasVALU32BitEncoding(MI.getOpcode())) { + // If there is no chance we will shrink it and use VCC as sdst to get + // a 32 bit form try to replace dead sdst with NULL. + tryReplaceDeadSDST(MI); continue; + } if (!TII->canShrink(MI, *MRI)) { // Try commuting the instruction and see if that enables us to shrink // it. if (!MI.isCommutable() || !TII->commuteInstruction(MI) || - !TII->canShrink(MI, *MRI)) + !TII->canShrink(MI, *MRI)) { + tryReplaceDeadSDST(MI); continue; + } } int Op32 = AMDGPU::getVOPe32(MI.getOpcode()); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll index 5f83beeceaaddc..af5a1a21f6ae25 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll @@ -220,7 +220,7 @@ define amdgpu_ps float @div_scale_s_s_true(float inreg %src0, float inreg %src1) ; ; GFX10-LABEL: div_scale_s_s_true: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_div_scale_f32 v0, s0, s2, s3, s2 +; GFX10-NEXT: v_div_scale_f32 v0, null, s2, s3, s2 ; GFX10-NEXT: ; return to shader part epilog %div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 true) %result = extractvalue { float, i1 } %div.scale, 0 @@ -236,7 +236,7 @@ define amdgpu_ps float @div_scale_s_s_false(float inreg %src0, float inreg %src1 ; ; GFX10-LABEL: div_scale_s_s_false: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_div_scale_f32 v0, s0, s3, s3, s2 +; GFX10-NEXT: v_div_scale_f32 v0, null, s3, s3, s2 ; GFX10-NEXT: ; return to shader part epilog %div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 false) %result = extractvalue { float, i1 } %div.scale, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll index 9e1dc06de93317..a7ec5a1279b438 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll @@ -84,7 +84,7 @@ define float @v_fdiv_f32(float %a, float %b) { ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v3, v2 ; GFX10-IEEE-NEXT: v_fma_f32 v4, -v2, v3, 1.0 @@ -101,7 +101,7 @@ define float @v_fdiv_f32(float %a, float %b) { ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLUSH-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 +; GFX10-FLUSH-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 ; GFX10-FLUSH-NEXT: s_denorm_mode 3 @@ -188,7 +188,7 @@ define float @v_fdiv_f32_ulp25(float %a, float %b) { ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v3, v2 ; GFX10-IEEE-NEXT: v_fma_f32 v4, -v2, v3, 1.0 @@ -289,7 +289,7 @@ define float @v_rcp_f32(float %x) { ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v1, s4, v0, v0, 1.0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v2, v1 ; GFX10-IEEE-NEXT: v_fma_f32 v3, -v1, v2, 1.0 @@ -306,7 +306,7 @@ define float @v_rcp_f32(float %x) { ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLUSH-NEXT: v_div_scale_f32 v1, s4, v0, v0, 1.0 +; GFX10-FLUSH-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v2, v1 ; GFX10-FLUSH-NEXT: s_denorm_mode 3 @@ -397,7 +397,7 @@ define float @v_rcp_f32_arcp(float %x) { ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v1, s4, v0, v0, 1.0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v2, v1 ; GFX10-IEEE-NEXT: v_fma_f32 v3, -v1, v2, 1.0 @@ -414,7 +414,7 @@ define float @v_rcp_f32_arcp(float %x) { ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLUSH-NEXT: v_div_scale_f32 v1, s4, v0, v0, 1.0 +; GFX10-FLUSH-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v2, v1 ; GFX10-FLUSH-NEXT: s_denorm_mode 3 @@ -560,7 +560,7 @@ define float @v_fdiv_f32_arcp_ulp25(float %a, float %b) { ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v3, v2 ; GFX10-IEEE-NEXT: v_fma_f32 v4, -v2, v3, 1.0 @@ -710,8 +710,8 @@ define <2 x float> @v_fdiv_v2f32(<2 x float> %a, <2 x float> %b) { ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v4, s4, v2, v2, v0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v5, s4, v3, v3, v1 +; GFX10-IEEE-NEXT: v_div_scale_f32 v4, null, v2, v2, v0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v5, null, v3, v3, v1 ; GFX10-IEEE-NEXT: v_div_scale_f32 v10, vcc_lo, v0, v2, v0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v6, v4 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v7, v5 @@ -739,7 +739,7 @@ define <2 x float> @v_fdiv_v2f32(<2 x float> %a, <2 x float> %b) { ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, s4, v2, v2, v0 +; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, null, v2, v2, v0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v6, vcc_lo, v0, v2, v0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v5, v4 ; GFX10-FLUSH-NEXT: s_denorm_mode 3 @@ -750,7 +750,7 @@ define <2 x float> @v_fdiv_v2f32(<2 x float> %a, <2 x float> %b) { ; GFX10-FLUSH-NEXT: v_fmac_f32_e32 v7, v8, v5 ; GFX10-FLUSH-NEXT: v_fma_f32 v4, -v4, v7, v6 ; GFX10-FLUSH-NEXT: s_denorm_mode 0 -; GFX10-FLUSH-NEXT: v_div_scale_f32 v6, s4, v3, v3, v1 +; GFX10-FLUSH-NEXT: v_div_scale_f32 v6, null, v3, v3, v1 ; GFX10-FLUSH-NEXT: v_div_fmas_f32 v4, v4, v5, v7 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v5, v6 ; GFX10-FLUSH-NEXT: v_div_fixup_f32 v0, v4, v2, v0 @@ -872,8 +872,8 @@ define <2 x float> @v_fdiv_v2f32_ulp25(<2 x float> %a, <2 x float> %b) { ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v4, s4, v2, v2, v0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v5, s4, v3, v3, v1 +; GFX10-IEEE-NEXT: v_div_scale_f32 v4, null, v2, v2, v0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v5, null, v3, v3, v1 ; GFX10-IEEE-NEXT: v_div_scale_f32 v10, vcc_lo, v0, v2, v0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v6, v4 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v7, v5 @@ -1040,8 +1040,8 @@ define <2 x float> @v_rcp_v2f32(<2 x float> %x) { ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v0, v0, 1.0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v3, s4, v1, v1, 1.0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v2, null, v0, v0, 1.0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v3, null, v1, v1, 1.0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v8, vcc_lo, 1.0, v0, 1.0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v4, v2 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v5, v3 @@ -1069,7 +1069,7 @@ define <2 x float> @v_rcp_v2f32(<2 x float> %x) { ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLUSH-NEXT: v_div_scale_f32 v2, s4, v0, v0, 1.0 +; GFX10-FLUSH-NEXT: v_div_scale_f32 v2, null, v0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 ; GFX10-FLUSH-NEXT: s_denorm_mode 3 @@ -1080,7 +1080,7 @@ define <2 x float> @v_rcp_v2f32(<2 x float> %x) { ; GFX10-FLUSH-NEXT: v_fmac_f32_e32 v5, v6, v3 ; GFX10-FLUSH-NEXT: v_fma_f32 v2, -v2, v5, v4 ; GFX10-FLUSH-NEXT: s_denorm_mode 0 -; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, s4, v1, v1, 1.0 +; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, null, v1, v1, 1.0 ; GFX10-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v3, v4 ; GFX10-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, 1.0 @@ -1222,8 +1222,8 @@ define <2 x float> @v_rcp_v2f32_arcp(<2 x float> %x) { ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v2, s4, v0, v0, 1.0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v3, s4, v1, v1, 1.0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v2, null, v0, v0, 1.0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v3, null, v1, v1, 1.0 ; GFX10-IEEE-NEXT: v_div_scale_f32 v8, vcc_lo, 1.0, v0, 1.0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v4, v2 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v5, v3 @@ -1251,7 +1251,7 @@ define <2 x float> @v_rcp_v2f32_arcp(<2 x float> %x) { ; GFX10-FLUSH: ; %bb.0: ; GFX10-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-FLUSH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLUSH-NEXT: v_div_scale_f32 v2, s4, v0, v0, 1.0 +; GFX10-FLUSH-NEXT: v_div_scale_f32 v2, null, v0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 ; GFX10-FLUSH-NEXT: s_denorm_mode 3 @@ -1262,7 +1262,7 @@ define <2 x float> @v_rcp_v2f32_arcp(<2 x float> %x) { ; GFX10-FLUSH-NEXT: v_fmac_f32_e32 v5, v6, v3 ; GFX10-FLUSH-NEXT: v_fma_f32 v2, -v2, v5, v4 ; GFX10-FLUSH-NEXT: s_denorm_mode 0 -; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, s4, v1, v1, 1.0 +; GFX10-FLUSH-NEXT: v_div_scale_f32 v4, null, v1, v1, 1.0 ; GFX10-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 ; GFX10-FLUSH-NEXT: v_rcp_f32_e32 v3, v4 ; GFX10-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, 1.0 @@ -1459,8 +1459,8 @@ define <2 x float> @v_fdiv_v2f32_arcp_ulp25(<2 x float> %a, <2 x float> %b) { ; GFX10-IEEE: ; %bb.0: ; GFX10-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-IEEE-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v4, s4, v2, v2, v0 -; GFX10-IEEE-NEXT: v_div_scale_f32 v5, s4, v3, v3, v1 +; GFX10-IEEE-NEXT: v_div_scale_f32 v4, null, v2, v2, v0 +; GFX10-IEEE-NEXT: v_div_scale_f32 v5, null, v3, v3, v1 ; GFX10-IEEE-NEXT: v_div_scale_f32 v10, vcc_lo, v0, v2, v0 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v6, v4 ; GFX10-IEEE-NEXT: v_rcp_f32_e32 v7, v5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll index 39153cb8a744ef..765fdeb16eb0b5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll @@ -67,7 +67,7 @@ define double @v_fdiv_f64(double %a, double %b) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[4:5], null, v[2:3], v[2:3], v[0:1] ; GFX10-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX10-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 @@ -170,7 +170,7 @@ define double @v_fdiv_f64_ulp25(double %a, double %b) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[4:5], null, v[2:3], v[2:3], v[0:1] ; GFX10-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX10-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 @@ -243,7 +243,7 @@ define double @v_rcp_f64(double %x) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0 +; GFX10-NEXT: v_div_scale_f64 v[2:3], null, v[0:1], v[0:1], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0 ; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 @@ -316,7 +316,7 @@ define double @v_rcp_f64_arcp(double %x) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0 +; GFX10-NEXT: v_div_scale_f64 v[2:3], null, v[0:1], v[0:1], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0 ; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 @@ -420,7 +420,7 @@ define double @v_rcp_f64_ulp25(double %x) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0 +; GFX10-NEXT: v_div_scale_f64 v[2:3], null, v[0:1], v[0:1], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0 ; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] ; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 @@ -523,7 +523,7 @@ define double @v_fdiv_f64_arcp_ulp25(double %a, double %b) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[4:5], null, v[2:3], v[2:3], v[0:1] ; GFX10-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX10-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 @@ -634,8 +634,8 @@ define <2 x double> @v_fdiv_v2f64(<2 x double> %a, <2 x double> %b) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1] -; GFX10-NEXT: v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3] +; GFX10-NEXT: v_div_scale_f64 v[8:9], null, v[4:5], v[4:5], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[10:11], null, v[6:7], v[6:7], v[2:3] ; GFX10-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] ; GFX10-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] @@ -804,8 +804,8 @@ define <2 x double> @v_fdiv_v2f64_ulp25(<2 x double> %a, <2 x double> %b) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1] -; GFX10-NEXT: v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3] +; GFX10-NEXT: v_div_scale_f64 v[8:9], null, v[4:5], v[4:5], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[10:11], null, v[6:7], v[6:7], v[2:3] ; GFX10-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] ; GFX10-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] @@ -928,8 +928,8 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0 -; GFX10-NEXT: v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0 +; GFX10-NEXT: v_div_scale_f64 v[4:5], null, v[0:1], v[0:1], 1.0 +; GFX10-NEXT: v_div_scale_f64 v[6:7], null, v[2:3], v[2:3], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0 ; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] ; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] @@ -1052,8 +1052,8 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0 -; GFX10-NEXT: v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0 +; GFX10-NEXT: v_div_scale_f64 v[4:5], null, v[0:1], v[0:1], 1.0 +; GFX10-NEXT: v_div_scale_f64 v[6:7], null, v[2:3], v[2:3], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0 ; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] ; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] @@ -1223,8 +1223,8 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0 -; GFX10-NEXT: v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0 +; GFX10-NEXT: v_div_scale_f64 v[4:5], null, v[0:1], v[0:1], 1.0 +; GFX10-NEXT: v_div_scale_f64 v[6:7], null, v[2:3], v[2:3], 1.0 ; GFX10-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0 ; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] ; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] @@ -1393,8 +1393,8 @@ define <2 x double> @v_fdiv_v2f64_arcp_ulp25(<2 x double> %a, <2 x double> %b) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1] -; GFX10-NEXT: v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3] +; GFX10-NEXT: v_div_scale_f64 v[8:9], null, v[4:5], v[4:5], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[10:11], null, v[6:7], v[6:7], v[2:3] ; GFX10-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] ; GFX10-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll index ef3d6f6b479e7e..c7e8b66ace8250 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll @@ -54,7 +54,7 @@ define amdgpu_kernel void @test_div_scale_f32_1(float addrspace(1)* %out, float ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s2, v2, v2, v1 +; GFX10-NEXT: v_div_scale_f32 v0, null, v2, v2, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm @@ -121,7 +121,7 @@ define amdgpu_kernel void @test_div_scale_f32_2(float addrspace(1)* %out, float ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s2, v1, v2, v1 +; GFX10-NEXT: v_div_scale_f32 v0, null, v1, v2, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm @@ -193,7 +193,7 @@ define amdgpu_kernel void @test_div_scale_f64_1(double addrspace(1)* %out, doubl ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[2:3] offset:8 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, v[2:3], v[2:3], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[0:1], null, v[2:3], v[2:3], v[0:1] ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm @@ -265,7 +265,7 @@ define amdgpu_kernel void @test_div_scale_f64_2(double addrspace(1)* %out, doubl ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[2:3] offset:8 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, v[0:1], v[2:3], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[0:1], null, v[0:1], v[2:3], v[0:1] ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm @@ -328,7 +328,7 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_num_1(float addrspace(1)* % ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dword v0, v0, s[6:7] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s0, v0, v0, s0 +; GFX10-NEXT: v_div_scale_f32 v0, null, v0, v0, s0 ; GFX10-NEXT: global_store_dword v1, v0, s[4:5] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -388,7 +388,7 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_num_2(float addrspace(1)* % ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dword v0, v0, s[6:7] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s0, s0, v0, s0 +; GFX10-NEXT: v_div_scale_f32 v0, null, s0, v0, s0 ; GFX10-NEXT: global_store_dword v1, v0, s[4:5] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -448,7 +448,7 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_den_1(float addrspace(1)* % ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dword v0, v0, s[6:7] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s0, s0, s0, v0 +; GFX10-NEXT: v_div_scale_f32 v0, null, s0, s0, v0 ; GFX10-NEXT: global_store_dword v1, v0, s[4:5] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -508,7 +508,7 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_den_2(float addrspace(1)* % ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dword v0, v0, s[6:7] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s0, v0, s0, v0 +; GFX10-NEXT: v_div_scale_f32 v0, null, v0, s0, v0 ; GFX10-NEXT: global_store_dword v1, v0, s[4:5] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -568,7 +568,7 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_num_1(double addrspace(1)* ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[6:7] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s0, v[0:1], v[0:1], s[0:1] +; GFX10-NEXT: v_div_scale_f64 v[0:1], null, v[0:1], v[0:1], s[0:1] ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -628,7 +628,7 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_num_2(double addrspace(1)* ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[6:7] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s0, s[0:1], v[0:1], s[0:1] +; GFX10-NEXT: v_div_scale_f64 v[0:1], null, s[0:1], v[0:1], s[0:1] ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -688,7 +688,7 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_den_1(double addrspace(1)* ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[6:7] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s0, s[0:1], s[0:1], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[0:1], null, s[0:1], s[0:1], v[0:1] ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -748,7 +748,7 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_den_2(double addrspace(1)* ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dwordx2 v[0:1], v0, s[6:7] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s0, v[0:1], s[0:1], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[0:1], null, v[0:1], s[0:1], v[0:1] ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -797,7 +797,7 @@ define amdgpu_kernel void @test_div_scale_f32_all_scalar_1(float addrspace(1)* % ; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s0, s5, s5, s4 +; GFX10-NEXT: v_div_scale_f32 v0, null, s5, s5, s4 ; GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GFX10-NEXT: s_endpgm %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) @@ -841,7 +841,7 @@ define amdgpu_kernel void @test_div_scale_f32_all_scalar_2(float addrspace(1)* % ; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s0, s4, s5, s4 +; GFX10-NEXT: v_div_scale_f32 v0, null, s4, s5, s4 ; GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GFX10-NEXT: s_endpgm %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) @@ -887,7 +887,7 @@ define amdgpu_kernel void @test_div_scale_f64_all_scalar_1(double addrspace(1)* ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, s[4:5], s[4:5], s[2:3] +; GFX10-NEXT: v_div_scale_f64 v[0:1], null, s[4:5], s[4:5], s[2:3] ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) @@ -933,7 +933,7 @@ define amdgpu_kernel void @test_div_scale_f64_all_scalar_2(double addrspace(1)* ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, s[2:3], s[4:5], s[2:3] +; GFX10-NEXT: v_div_scale_f64 v[0:1], null, s[2:3], s[4:5], s[2:3] ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) @@ -985,7 +985,7 @@ define amdgpu_kernel void @test_div_scale_f32_inline_imm_num(float addrspace(1)* ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dword v0, v0, s[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s2, v0, v0, 1.0 +; GFX10-NEXT: v_div_scale_f32 v0, null, v0, v0, 1.0 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1041,7 +1041,7 @@ define amdgpu_kernel void @test_div_scale_f32_inline_imm_den(float addrspace(1)* ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_load_dword v0, v0, s[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s2, 2.0, 2.0, v0 +; GFX10-NEXT: v_div_scale_f32 v0, null, 2.0, 2.0, v0 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1108,7 +1108,7 @@ define amdgpu_kernel void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_and_b32_e32 v0, 0x7fffffff, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 -; GFX10-NEXT: v_div_scale_f32 v0, s2, v2, v2, v0 +; GFX10-NEXT: v_div_scale_f32 v0, null, v2, v2, v0 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1179,7 +1179,7 @@ define amdgpu_kernel void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, ; GFX10-NEXT: global_load_dword v2, v0, s[2:3] offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_and_b32_e32 v0, 0x7fffffff, v2 -; GFX10-NEXT: v_div_scale_f32 v0, s2, v0, v0, v1 +; GFX10-NEXT: v_div_scale_f32 v0, null, v0, v0, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm @@ -1226,7 +1226,7 @@ define amdgpu_kernel void @test_div_scale_f32_val_undef_val(float addrspace(1)* ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s2, s0, s0, 0x41000000 +; GFX10-NEXT: v_div_scale_f32 v0, null, s0, s0, 0x41000000 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 false) @@ -1263,7 +1263,7 @@ define amdgpu_kernel void @test_div_scale_f32_undef_val_val(float addrspace(1)* ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s2, 0x41000000, 0x41000000, s0 +; GFX10-NEXT: v_div_scale_f32 v0, null, 0x41000000, 0x41000000, s0 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float 8.0, i1 false) @@ -1298,7 +1298,7 @@ define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(float addrspace(1) ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v0, s2, s0, s0, s0 +; GFX10-NEXT: v_div_scale_f32 v0, null, s0, s0, s0 ; GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 false) @@ -1337,7 +1337,7 @@ define amdgpu_kernel void @test_div_scale_f64_val_undef_val(double addrspace(1)* ; GFX10-NEXT: s_mov_b32 s2, 0 ; GFX10-NEXT: s_mov_b32 s3, 0x40200000 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 -; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, s[0:1], s[0:1], s[2:3] +; GFX10-NEXT: v_div_scale_f64 v[0:1], null, s[0:1], s[0:1], s[2:3] ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll index 45c81b9011829c..7fab7f64e20ccd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll @@ -384,7 +384,7 @@ define i64 @v_mul_i64(i64 %num, i64 %den) { ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; GFX10-NEXT: v_mov_b32_e32 v5, v1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v4, v2, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0 ; GFX10-NEXT: v_mul_lo_u32 v3, v4, v3 ; GFX10-NEXT: v_mul_lo_u32 v2, v5, v2 ; GFX10-NEXT: v_add3_u32 v1, v1, v3, v2 @@ -514,10 +514,10 @@ define i96 @v_mul_i96(i96 %num, i96 %den) { ; GFX10-NEXT: v_mul_lo_u32 v2, v2, v3 ; GFX10-NEXT: v_mul_lo_u32 v5, v6, v5 ; GFX10-NEXT: v_mul_lo_u32 v8, v7, v4 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v6, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v6, v3, 0 ; GFX10-NEXT: v_add3_u32 v2, v5, v8, v2 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s4, v6, v4, v[1:2] -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s4, v7, v3, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v6, v4, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, v7, v3, v[1:2] ; GFX10-NEXT: s_setpc_b64 s[30:31] %result = mul i96 %num, %den ret i96 %result @@ -769,12 +769,12 @@ define i128 @v_mul_i128(i128 %num, i128 %den) { ; GFX10-NEXT: v_mov_b32_e32 v9, v1 ; GFX10-NEXT: v_mov_b32_e32 v10, v2 ; GFX10-NEXT: v_mul_lo_u32 v3, v3, v4 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v8, v6, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v8, v6, 0 ; GFX10-NEXT: v_mul_lo_u32 v7, v8, v7 ; GFX10-NEXT: v_mul_lo_u32 v6, v9, v6 -; GFX10-NEXT: v_mad_u64_u32 v[11:12], s4, v9, v5, v[0:1] -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v8, v4, 0 -; GFX10-NEXT: v_mad_u64_u32 v[11:12], s4, v10, v4, v[11:12] +; GFX10-NEXT: v_mad_u64_u32 v[11:12], null, v9, v5, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v8, v4, 0 +; GFX10-NEXT: v_mad_u64_u32 v[11:12], null, v10, v4, v[11:12] ; GFX10-NEXT: v_mov_b32_e32 v2, v11 ; GFX10-NEXT: v_mad_u64_u32 v[1:2], vcc_lo, v8, v5, v[1:2] ; GFX10-NEXT: v_mul_lo_u32 v5, v10, v5 @@ -1813,24 +1813,24 @@ define i256 @v_mul_i256(i256 %num, i256 %den) { ; GFX10-NEXT: v_mul_lo_u32 v27, v6, v9 ; GFX10-NEXT: v_mul_lo_u32 v28, v5, v10 ; GFX10-NEXT: v_mul_lo_u32 v7, v7, v8 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v16, v14, 0 -; GFX10-NEXT: v_mad_u64_u32 v[18:19], s4, v16, v12, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v16, v14, 0 +; GFX10-NEXT: v_mad_u64_u32 v[18:19], null, v16, v12, 0 ; GFX10-NEXT: v_mul_lo_u32 v30, v17, v14 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v17, v13, v[0:1] -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v2, v12, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v17, v13, v[0:1] ; GFX10-NEXT: v_mad_u64_u32 v[18:19], s4, v17, v11, v[18:19] ; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, 1, s4 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, v3, v11, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v2, v12, v[0:1] ; GFX10-NEXT: v_mad_u64_u32 v[18:19], vcc_lo, v2, v10, v[18:19] ; GFX10-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v20, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[20:21], s4, v16, v10, 0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v4, v10, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[20:21], null, v16, v10, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v3, v11, v[0:1] ; GFX10-NEXT: v_mad_u64_u32 v[18:19], vcc_lo, v3, v9, v[18:19] ; GFX10-NEXT: v_add_co_ci_u32_e32 v24, vcc_lo, 0, v22, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v5, v9, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v4, v10, v[0:1] ; GFX10-NEXT: v_mad_u64_u32 v[18:19], vcc_lo, v4, v8, v[18:19] ; GFX10-NEXT: v_add_co_ci_u32_e32 v26, vcc_lo, 0, v24, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[22:23], s4, v6, v8, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v5, v9, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[22:23], null, v6, v8, v[0:1] ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v17, v9, v[20:21] ; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, 1, s4 ; GFX10-NEXT: v_mov_b32_e32 v20, v22 @@ -1842,7 +1842,7 @@ define i256 @v_mul_i256(i256 %num, i256 %den) { ; GFX10-NEXT: v_mul_lo_u32 v22, v16, v15 ; GFX10-NEXT: v_mad_u64_u32 v[24:25], vcc_lo, v17, v12, v[0:1] ; GFX10-NEXT: v_mad_u64_u32 v[14:15], s6, v16, v11, v[19:20] -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, v16, v8, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v16, v8, 0 ; GFX10-NEXT: v_mul_lo_u32 v20, v4, v11 ; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, 1, s6 ; GFX10-NEXT: v_mad_u64_u32 v[18:19], s5, v2, v11, v[24:25] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll index 3b133078aa909b..48bf3167bb50bf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll @@ -488,7 +488,7 @@ define amdgpu_kernel void @sdivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1) ; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v1 ; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX10-NEXT: v_mul_lo_u32 v4, s10, v2 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s14, s10, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s10, v3, 0 ; GFX10-NEXT: v_mul_lo_u32 v5, s11, v3 ; GFX10-NEXT: v_mul_hi_u32 v6, v2, v0 ; GFX10-NEXT: v_add3_u32 v1, v1, v4, v5 @@ -514,7 +514,7 @@ define amdgpu_kernel void @sdivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1) ; GFX10-NEXT: v_add3_u32 v1, v5, v4, v1 ; GFX10-NEXT: v_mul_lo_u32 v4, s11, v3 ; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v1, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s14, s10, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s10, v3, 0 ; GFX10-NEXT: v_mul_lo_u32 v5, s10, v2 ; GFX10-NEXT: v_mul_hi_u32 v6, v2, v0 ; GFX10-NEXT: v_add3_u32 v1, v1, v5, v4 @@ -561,7 +561,7 @@ define amdgpu_kernel void @sdivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1) ; GFX10-NEXT: v_mul_lo_u32 v4, s9, v2 ; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v2, 1 ; GFX10-NEXT: v_add3_u32 v3, v3, v0, v1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s10, s8, v2, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s8, v2, 0 ; GFX10-NEXT: v_mul_lo_u32 v5, s8, v3 ; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_add3_u32 v1, v1, v5, v4 @@ -1930,7 +1930,9 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64 ; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s10 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX10-NEXT: s_xor_b64 s[14:15], s[6:7], s[12:13] +; GFX10-NEXT: s_sub_u32 s3, 0, s10 ; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; GFX10-NEXT: s_subb_u32 s6, 0, s11 ; GFX10-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 @@ -1945,18 +1947,16 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64 ; GFX10-NEXT: v_trunc_f32_e32 v4, v4 ; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v0 ; GFX10-NEXT: v_mul_f32_e32 v2, 0xcf800000, v4 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s3, s20, v6, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s20, v6, 0 ; GFX10-NEXT: v_mul_lo_u32 v8, s21, v6 ; GFX10-NEXT: v_add_f32_e32 v2, v2, v3 ; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v4 -; GFX10-NEXT: s_sub_u32 s3, 0, s10 -; GFX10-NEXT: s_subb_u32 s6, 0, s11 ; GFX10-NEXT: v_cvt_u32_f32_e32 v4, v2 ; GFX10-NEXT: v_mul_lo_u32 v9, s3, v3 ; GFX10-NEXT: v_add3_u32 v7, v1, v7, v8 ; GFX10-NEXT: v_mul_lo_u32 v10, v5, v0 ; GFX10-NEXT: v_mul_hi_u32 v11, v6, v0 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s7, s3, v4, 0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, s3, v4, 0 ; GFX10-NEXT: v_mul_lo_u32 v8, s6, v4 ; GFX10-NEXT: v_mul_lo_u32 v12, v6, v7 ; GFX10-NEXT: v_mul_hi_u32 v0, v5, v0 @@ -1980,8 +1980,8 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64 ; GFX10-NEXT: v_add_nc_u32_e32 v10, v12, v10 ; GFX10-NEXT: v_add_co_u32 v8, s7, v8, v15 ; GFX10-NEXT: v_cndmask_b32_e64 v12, 0, 1, s7 -; GFX10-NEXT: v_mul_hi_u32 v16, v4, v2 ; GFX10-NEXT: v_add_nc_u32_e32 v11, v13, v11 +; GFX10-NEXT: v_mul_hi_u32 v16, v4, v2 ; GFX10-NEXT: v_add_co_u32 v1, s7, v14, v1 ; GFX10-NEXT: v_cndmask_b32_e64 v13, 0, 1, s7 ; GFX10-NEXT: v_add_co_u32 v0, s7, v0, v10 @@ -1990,14 +1990,14 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64 ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s7 ; GFX10-NEXT: v_add_co_u32 v9, s7, v1, v16 ; GFX10-NEXT: v_add3_u32 v7, v11, v10, v7 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s7 ; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v6, v0 ; GFX10-NEXT: v_add_nc_u32_e32 v8, v12, v8 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s7 ; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v5, v7, vcc_lo ; GFX10-NEXT: v_mul_hi_u32 v2, v3, v2 -; GFX10-NEXT: v_add_nc_u32_e32 v10, v13, v1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s7, s20, v6, 0 ; GFX10-NEXT: v_add_co_u32 v7, s7, v9, v8 +; GFX10-NEXT: v_add_nc_u32_e32 v10, v13, v1 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s20, v6, 0 ; GFX10-NEXT: v_mul_lo_u32 v9, s21, v6 ; GFX10-NEXT: v_mul_lo_u32 v11, s20, v5 ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, 1, s7 @@ -2009,7 +2009,7 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64 ; GFX10-NEXT: v_mul_hi_u32 v0, v5, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v2, vcc_lo ; GFX10-NEXT: v_mul_lo_u32 v12, v6, v7 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s7, s3, v4, 0 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, s3, v4, 0 ; GFX10-NEXT: v_mul_lo_u32 v9, s6, v4 ; GFX10-NEXT: v_mul_lo_u32 v11, s3, v3 ; GFX10-NEXT: v_mul_lo_u32 v13, v5, v7 @@ -2074,7 +2074,7 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64 ; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 ; GFX10-NEXT: v_mul_hi_u32 v8, s14, v4 ; GFX10-NEXT: v_add3_u32 v2, v6, v1, v2 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s3, s8, v9, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s8, v9, 0 ; GFX10-NEXT: v_mul_lo_u32 v6, s9, v9 ; GFX10-NEXT: v_mul_lo_u32 v7, s8, v5 ; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v3, v2, vcc_lo @@ -2126,7 +2126,7 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64 ; GFX10-NEXT: v_add3_u32 v2, v3, v1, v2 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v6, v19, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v6, v7, v20, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s10, v4, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s10, v4, 0 ; GFX10-NEXT: v_mul_lo_u32 v7, s10, v2 ; GFX10-NEXT: v_mul_lo_u32 v11, s11, v4 ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v17 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll index 91517e444a4edf..1097014a5bb082 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll @@ -403,7 +403,7 @@ define amdgpu_kernel void @udivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1) ; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v1 ; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v0 ; GFX10-NEXT: v_mul_lo_u32 v4, s0, v2 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, s0, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, v3, 0 ; GFX10-NEXT: v_mul_lo_u32 v5, s1, v3 ; GFX10-NEXT: v_mul_hi_u32 v6, v2, v0 ; GFX10-NEXT: v_add3_u32 v1, v1, v4, v5 @@ -429,7 +429,7 @@ define amdgpu_kernel void @udivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1) ; GFX10-NEXT: v_add3_u32 v1, v5, v4, v1 ; GFX10-NEXT: v_mul_lo_u32 v4, s1, v3 ; GFX10-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v2, v1, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, s0, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, v3, 0 ; GFX10-NEXT: v_mul_lo_u32 v5, s0, v2 ; GFX10-NEXT: v_mul_hi_u32 v6, v2, v0 ; GFX10-NEXT: v_add3_u32 v1, v1, v5, v4 @@ -476,7 +476,7 @@ define amdgpu_kernel void @udivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1) ; GFX10-NEXT: v_mul_lo_u32 v4, s11, v2 ; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v2, 1 ; GFX10-NEXT: v_add3_u32 v3, v3, v0, v1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s10, v2, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s10, v2, 0 ; GFX10-NEXT: v_mul_lo_u32 v5, s10, v3 ; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v3, vcc_lo ; GFX10-NEXT: v_add3_u32 v1, v1, v5, v4 @@ -1553,9 +1553,9 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64 ; GFX10-NEXT: v_cvt_u32_f32_e32 v5, v0 ; GFX10-NEXT: v_cvt_u32_f32_e32 v8, v1 ; GFX10-NEXT: v_mul_lo_u32 v7, s0, v4 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s6, s0, v5, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, v5, 0 ; GFX10-NEXT: v_mul_lo_u32 v9, s1, v5 -; GFX10-NEXT: v_mad_u64_u32 v[2:3], s6, s2, v8, 0 +; GFX10-NEXT: v_mad_u64_u32 v[2:3], null, s2, v8, 0 ; GFX10-NEXT: v_mul_lo_u32 v11, s3, v8 ; GFX10-NEXT: v_add3_u32 v1, v1, v7, v9 ; GFX10-NEXT: v_mul_lo_u32 v7, v4, v0 @@ -1599,14 +1599,15 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64 ; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s6 ; GFX10-NEXT: v_add3_u32 v1, v9, v7, v1 ; GFX10-NEXT: v_add_co_u32 v5, vcc_lo, v5, v0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 ; GFX10-NEXT: v_add3_u32 v3, v11, v10, v3 ; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v4, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v8, vcc_lo, v8, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, v6, v3, vcc_lo -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s6, s0, v5, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, v5, 0 ; GFX10-NEXT: v_mul_lo_u32 v7, s1, v5 ; GFX10-NEXT: v_mul_lo_u32 v9, s0, v4 -; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, s2, v8, 0 +; GFX10-NEXT: v_mad_u64_u32 v[2:3], null, s2, v8, 0 ; GFX10-NEXT: v_mul_lo_u32 v10, s3, v8 ; GFX10-NEXT: v_mul_lo_u32 v11, s2, v6 ; GFX10-NEXT: v_mul_lo_u32 v12, v4, v0 @@ -1682,9 +1683,9 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64 ; GFX10-NEXT: v_mul_hi_u32 v11, s10, v3 ; GFX10-NEXT: v_add_co_u32 v6, s0, v6, v7 ; GFX10-NEXT: v_add3_u32 v5, v5, v0, v1 -; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s12, v4, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s12, v4, 0 ; GFX10-NEXT: v_mul_lo_u32 v12, s13, v4 +; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0 ; GFX10-NEXT: v_mul_lo_u32 v13, s12, v5 ; GFX10-NEXT: v_add_co_u32 v2, s0, v10, v2 ; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, 1, s0 @@ -1695,14 +1696,13 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64 ; GFX10-NEXT: v_add3_u32 v1, v1, v13, v12 ; GFX10-NEXT: v_add_nc_u32_e32 v6, v7, v6 ; GFX10-NEXT: v_mul_hi_u32 v3, s11, v3 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX10-NEXT: v_mov_b32_e32 v9, 0 ; GFX10-NEXT: v_add_nc_u32_e32 v7, v10, v8 ; GFX10-NEXT: v_sub_nc_u32_e32 v8, s9, v1 ; GFX10-NEXT: v_sub_co_u32 v10, vcc_lo, s8, v0 ; GFX10-NEXT: v_sub_co_ci_u32_e64 v11, s0, s9, v1, vcc_lo ; GFX10-NEXT: v_subrev_co_ci_u32_e32 v0, vcc_lo, s13, v8, vcc_lo ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s12, v10 -; GFX10-NEXT: v_mov_b32_e32 v9, 0 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo ; GFX10-NEXT: v_sub_co_u32 v8, vcc_lo, v10, s12 ; GFX10-NEXT: v_subrev_co_ci_u32_e64 v12, s0, 0, v0, vcc_lo @@ -1722,25 +1722,25 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64 ; GFX10-NEXT: v_add_co_u32 v6, s0, v2, v6 ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 ; GFX10-NEXT: v_add_co_u32 v15, s0, v16, 1 -; GFX10-NEXT: v_add_co_ci_u32_e64 v18, s0, 0, v17, s0 -; GFX10-NEXT: v_add3_u32 v3, v7, v1, v3 -; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, s14, v6, 0 ; GFX10-NEXT: v_mul_lo_u32 v19, s15, v6 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v14 +; GFX10-NEXT: v_add3_u32 v3, v7, v1, v3 +; GFX10-NEXT: v_mad_u64_u32 v[1:2], null, s14, v6, 0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v18, s0, 0, v17, s0 ; GFX10-NEXT: v_mul_lo_u32 v7, s14, v3 ; GFX10-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc_lo ; GFX10-NEXT: v_sub_co_u32 v16, s0, v8, s12 ; GFX10-NEXT: v_subrev_co_ci_u32_e64 v20, s0, 0, v0, s0 -; GFX10-NEXT: v_add3_u32 v2, v2, v7, v19 -; GFX10-NEXT: v_sub_co_u32 v7, s0, s10, v1 ; GFX10-NEXT: v_cndmask_b32_e32 v17, v17, v18, vcc_lo ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v13 +; GFX10-NEXT: v_add3_u32 v2, v2, v7, v19 +; GFX10-NEXT: v_sub_co_u32 v7, s0, s10, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v15, vcc_lo ; GFX10-NEXT: v_sub_co_ci_u32_e64 v13, s1, s11, v2, s0 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, s11, v2 ; GFX10-NEXT: v_cmp_ne_u32_e64 s1, 0, v14 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v15, vcc_lo -; GFX10-NEXT: v_cmp_le_u32_e64 s2, s15, v13 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v17, vcc_lo +; GFX10-NEXT: v_cmp_le_u32_e64 s2, s15, v13 ; GFX10-NEXT: v_subrev_co_ci_u32_e64 v2, s0, s15, v2, s0 ; GFX10-NEXT: v_cmp_le_u32_e64 s0, s14, v7 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v16, s1 diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll index 0508b97d85007f..fa75ea0c96d8d3 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll @@ -271,7 +271,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, <4 x i32> %in ; GFX10W64-NEXT: s_waitcnt vmcnt(0) ; GFX10W64-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10W64-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v0, s[0:1] +; GFX10W64-NEXT: v_mad_u64_u32 v[0:1], null, s8, v0, s[0:1] ; GFX10W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10W64-NEXT: global_store_dword v1, v0, s[2:3] ; GFX10W64-NEXT: s_endpgm @@ -300,7 +300,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, <4 x i32> %in ; GFX10W32-NEXT: s_waitcnt vmcnt(0) ; GFX10W32-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10W32-NEXT: v_mad_u64_u32 v[0:1], s0, s4, v0, s[0:1] +; GFX10W32-NEXT: v_mad_u64_u32 v[0:1], null, s4, v0, s[0:1] ; GFX10W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10W32-NEXT: global_store_dword v1, v0, s[2:3] ; GFX10W32-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll index a85e17d019c2f7..458c15a5b38210 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll @@ -292,7 +292,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 addrspace ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1064-NEXT: s_mov_b32 s6, -1 -; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v0, s[0:1] +; GFX1064-NEXT: v_mad_u64_u32 v[0:1], null, s8, v0, s[0:1] ; GFX1064-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX1064-NEXT: s_endpgm ; @@ -329,7 +329,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 addrspace ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s6, -1 -; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s0, s2, v0, s[0:1] +; GFX1032-NEXT: v_mad_u64_u32 v[0:1], null, s2, v0, s[0:1] ; GFX1032-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX1032-NEXT: s_endpgm entry: @@ -705,7 +705,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s3, v1 -; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v2, 5, s[2:3] +; GFX1064-NEXT: v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3] ; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064-NEXT: s_mov_b32 s2, -1 ; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -742,7 +742,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v1 -; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s2, v2, 5, s[2:3] +; GFX1032-NEXT: v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3] ; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s2, -1 ; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -922,8 +922,8 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 addrspace ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1064-NEXT: s_mov_b32 s6, -1 -; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s2, v2, s[0:1] -; GFX1064-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s3, v2, v[1:2] +; GFX1064-NEXT: v_mad_u64_u32 v[0:1], null, s2, v2, s[0:1] +; GFX1064-NEXT: v_mad_u64_u32 v[1:2], null, s3, v2, v[1:2] ; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX1064-NEXT: s_endpgm ; @@ -965,8 +965,8 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 addrspace ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s6, -1 -; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s0, s2, v2, s[0:1] -; GFX1032-NEXT: v_mad_u64_u32 v[1:2], s0, s3, v2, v[1:2] +; GFX1032-NEXT: v_mad_u64_u32 v[0:1], null, s2, v2, s[0:1] +; GFX1032-NEXT: v_mad_u64_u32 v[1:2], null, s3, v2, v[1:2] ; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX1032-NEXT: s_endpgm entry: @@ -2043,12 +2043,12 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 addrspace ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: v_mad_u64_u32 v[3:4], s[0:1], s2, v2, 0 -; GFX1064-NEXT: s_mov_b32 s7, 0x31016000 -; GFX1064-NEXT: s_mov_b32 s6, -1 -; GFX1064-NEXT: v_mad_u64_u32 v[4:5], s[0:1], s3, v2, v[4:5] +; GFX1064-NEXT: v_mad_u64_u32 v[3:4], null, s2, v2, 0 ; GFX1064-NEXT: v_readfirstlane_b32 s0, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1064-NEXT: s_mov_b32 s7, 0x31016000 +; GFX1064-NEXT: s_mov_b32 s6, -1 +; GFX1064-NEXT: v_mad_u64_u32 v[4:5], null, s3, v2, v[4:5] ; GFX1064-NEXT: v_sub_co_u32 v0, vcc, s0, v3 ; GFX1064-NEXT: v_mov_b32_e32 v1, v4 ; GFX1064-NEXT: v_sub_co_ci_u32_e32 v1, vcc, s1, v1, vcc @@ -2089,12 +2089,12 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 addrspace ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: v_mad_u64_u32 v[3:4], s0, s2, v2, 0 +; GFX1032-NEXT: v_mad_u64_u32 v[3:4], null, s2, v2, 0 +; GFX1032-NEXT: v_readfirstlane_b32 s0, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s1, v1 ; GFX1032-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s6, -1 -; GFX1032-NEXT: v_mad_u64_u32 v[4:5], s0, s3, v2, v[4:5] -; GFX1032-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1032-NEXT: v_mad_u64_u32 v[4:5], null, s3, v2, v[4:5] ; GFX1032-NEXT: v_sub_co_u32 v0, vcc_lo, s0, v3 ; GFX1032-NEXT: v_mov_b32_e32 v1, v4 ; GFX1032-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll index 55b732244a1f51..8fc3d04e4248a2 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -357,7 +357,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive ; GFX1064-NEXT: v_readfirstlane_b32 s0, v1 ; GFX1064-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s6, v0, s[0:1] +; GFX1064-NEXT: v_mad_u64_u32 v[0:1], null, s6, v0, s[0:1] ; GFX1064-NEXT: s_mov_b32 s6, -1 ; GFX1064-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX1064-NEXT: s_endpgm @@ -391,7 +391,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive ; GFX1032-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s6, -1 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s0, s2, v0, s[0:1] +; GFX1032-NEXT: v_mad_u64_u32 v[0:1], null, s2, v0, s[0:1] ; GFX1032-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX1032-NEXT: s_endpgm ; @@ -423,7 +423,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive ; GFX1164-NEXT: v_readfirstlane_b32 s0, v1 ; GFX1164-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: v_mad_u64_u32 v[1:2], s[0:1], s6, v0, s[0:1] +; GFX1164-NEXT: v_mad_u64_u32 v[1:2], null, s6, v0, s[0:1] ; GFX1164-NEXT: s_mov_b32 s6, -1 ; GFX1164-NEXT: buffer_store_b32 v1, off, s[4:7], 0 ; GFX1164-NEXT: s_endpgm @@ -456,7 +456,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive ; GFX1132-NEXT: s_mov_b32 s7, 0x31016000 ; GFX1132-NEXT: s_mov_b32 s6, -1 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: v_mad_u64_u32 v[1:2], s0, s0, v0, s[2:3] +; GFX1132-NEXT: v_mad_u64_u32 v[1:2], null, s0, v0, s[2:3] ; GFX1132-NEXT: buffer_store_b32 v1, off, s[4:7], 0 ; GFX1132-NEXT: s_endpgm entry: @@ -1152,7 +1152,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) { ; GFX1064-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s3, v1 -; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v2, 5, s[2:3] +; GFX1064-NEXT: v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3] ; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064-NEXT: s_mov_b32 s2, -1 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) @@ -1183,7 +1183,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) { ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s3, v1 -; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s2, v2, 5, s[2:3] +; GFX1032-NEXT: v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3] ; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s2, -1 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) @@ -1214,7 +1214,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) { ; GFX1164-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX1164-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1164-NEXT: v_readfirstlane_b32 s3, v1 -; GFX1164-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v2, 5, s[2:3] +; GFX1164-NEXT: v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3] ; GFX1164-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1164-NEXT: s_mov_b32 s2, -1 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -1244,7 +1244,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) { ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s2 ; GFX1132-NEXT: v_readfirstlane_b32 s2, v0 ; GFX1132-NEXT: v_readfirstlane_b32 s3, v1 -; GFX1132-NEXT: v_mad_u64_u32 v[0:1], s2, v2, 5, s[2:3] +; GFX1132-NEXT: v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3] ; GFX1132-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1132-NEXT: s_mov_b32 s2, -1 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -1411,10 +1411,10 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive ; GFX1064-NEXT: v_readfirstlane_b32 s4, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s5, v1 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s2, v2, s[4:5] -; GFX1064-NEXT: v_mad_u64_u32 v[1:2], s[2:3], s3, v2, v[1:2] -; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1064-NEXT: v_mad_u64_u32 v[0:1], null, s2, v2, s[4:5] ; GFX1064-NEXT: s_mov_b32 s2, -1 +; GFX1064-NEXT: v_mad_u64_u32 v[1:2], null, s3, v2, v[1:2] +; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX1064-NEXT: s_endpgm ; @@ -1448,10 +1448,10 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive ; GFX1032-NEXT: v_readfirstlane_b32 s4, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s5, v1 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s2, s2, v2, s[4:5] -; GFX1032-NEXT: v_mad_u64_u32 v[1:2], s2, s3, v2, v[1:2] -; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1032-NEXT: v_mad_u64_u32 v[0:1], null, s2, v2, s[4:5] ; GFX1032-NEXT: s_mov_b32 s2, -1 +; GFX1032-NEXT: v_mad_u64_u32 v[1:2], null, s3, v2, v[1:2] +; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX1032-NEXT: s_endpgm ; @@ -1485,10 +1485,10 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive ; GFX1164-NEXT: v_readfirstlane_b32 s4, v0 ; GFX1164-NEXT: v_readfirstlane_b32 s5, v1 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s2, v2, s[4:5] -; GFX1164-NEXT: v_mad_u64_u32 v[3:4], s[2:3], s3, v2, v[1:2] -; GFX1164-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1164-NEXT: v_mad_u64_u32 v[0:1], null, s2, v2, s[4:5] ; GFX1164-NEXT: s_mov_b32 s2, -1 +; GFX1164-NEXT: v_mad_u64_u32 v[3:4], null, s3, v2, v[1:2] +; GFX1164-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1164-NEXT: v_mov_b32_e32 v1, v3 ; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 ; GFX1164-NEXT: s_endpgm @@ -1522,10 +1522,10 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive ; GFX1132-NEXT: v_readfirstlane_b32 s4, v0 ; GFX1132-NEXT: v_readfirstlane_b32 s5, v1 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: v_mad_u64_u32 v[0:1], s2, s2, v2, s[4:5] -; GFX1132-NEXT: v_mad_u64_u32 v[3:4], s2, s3, v2, v[1:2] -; GFX1132-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1132-NEXT: v_mad_u64_u32 v[0:1], null, s2, v2, s[4:5] ; GFX1132-NEXT: s_mov_b32 s2, -1 +; GFX1132-NEXT: v_mad_u64_u32 v[3:4], null, s3, v2, v[1:2] +; GFX1132-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1132-NEXT: v_mov_b32_e32 v1, v3 ; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 ; GFX1132-NEXT: s_endpgm @@ -3034,14 +3034,14 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive ; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1064-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: v_mad_u64_u32 v[3:4], s[4:5], s2, v2, 0 -; GFX1064-NEXT: v_readfirstlane_b32 s4, v1 -; GFX1064-NEXT: v_mad_u64_u32 v[4:5], s[2:3], s3, v2, v[4:5] +; GFX1064-NEXT: v_mad_u64_u32 v[3:4], null, s2, v2, 0 ; GFX1064-NEXT: v_readfirstlane_b32 s2, v0 -; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1064-NEXT: v_readfirstlane_b32 s4, v1 +; GFX1064-NEXT: v_mad_u64_u32 v[4:5], null, s3, v2, v[4:5] ; GFX1064-NEXT: v_sub_co_u32 v0, vcc, s2, v3 -; GFX1064-NEXT: v_mov_b32_e32 v1, v4 +; GFX1064-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1064-NEXT: s_mov_b32 s2, -1 +; GFX1064-NEXT: v_mov_b32_e32 v1, v4 ; GFX1064-NEXT: v_sub_co_ci_u32_e32 v1, vcc, s4, v1, vcc ; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX1064-NEXT: s_endpgm @@ -3074,14 +3074,14 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive ; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 ; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: v_mad_u64_u32 v[3:4], s2, s2, v2, 0 -; GFX1032-NEXT: v_readfirstlane_b32 s4, v1 -; GFX1032-NEXT: v_mad_u64_u32 v[4:5], s2, s3, v2, v[4:5] +; GFX1032-NEXT: v_mad_u64_u32 v[3:4], null, s2, v2, 0 ; GFX1032-NEXT: v_readfirstlane_b32 s2, v0 -; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1032-NEXT: v_readfirstlane_b32 s4, v1 +; GFX1032-NEXT: v_mad_u64_u32 v[4:5], null, s3, v2, v[4:5] ; GFX1032-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v3 -; GFX1032-NEXT: v_mov_b32_e32 v1, v4 +; GFX1032-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1032-NEXT: s_mov_b32 s2, -1 +; GFX1032-NEXT: v_mov_b32_e32 v1, v4 ; GFX1032-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, s4, v1, vcc_lo ; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX1032-NEXT: s_endpgm @@ -3114,14 +3114,14 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive ; GFX1164-NEXT: .LBB12_2: ; GFX1164-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: v_mad_u64_u32 v[3:4], s[4:5], s2, v2, 0 -; GFX1164-NEXT: v_readfirstlane_b32 s4, v1 -; GFX1164-NEXT: v_mad_u64_u32 v[5:6], s[2:3], s3, v2, v[4:5] +; GFX1164-NEXT: v_mad_u64_u32 v[3:4], null, s2, v2, 0 ; GFX1164-NEXT: v_readfirstlane_b32 s2, v0 -; GFX1164-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1164-NEXT: v_readfirstlane_b32 s4, v1 +; GFX1164-NEXT: v_mad_u64_u32 v[5:6], null, s3, v2, v[4:5] ; GFX1164-NEXT: v_sub_co_u32 v0, vcc, s2, v3 -; GFX1164-NEXT: v_mov_b32_e32 v1, v5 +; GFX1164-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1164-NEXT: s_mov_b32 s2, -1 +; GFX1164-NEXT: v_mov_b32_e32 v1, v5 ; GFX1164-NEXT: v_sub_co_ci_u32_e32 v1, vcc, s4, v1, vcc ; GFX1164-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 ; GFX1164-NEXT: s_endpgm @@ -3153,14 +3153,14 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive ; GFX1132-NEXT: .LBB12_2: ; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: v_mad_u64_u32 v[3:4], s2, s2, v2, 0 -; GFX1132-NEXT: v_readfirstlane_b32 s4, v1 -; GFX1132-NEXT: v_mad_u64_u32 v[5:6], s2, s3, v2, v[4:5] +; GFX1132-NEXT: v_mad_u64_u32 v[3:4], null, s2, v2, 0 ; GFX1132-NEXT: v_readfirstlane_b32 s2, v0 -; GFX1132-NEXT: s_mov_b32 s3, 0x31016000 +; GFX1132-NEXT: v_readfirstlane_b32 s4, v1 +; GFX1132-NEXT: v_mad_u64_u32 v[5:6], null, s3, v2, v[4:5] ; GFX1132-NEXT: v_sub_co_u32 v0, vcc_lo, s2, v3 -; GFX1132-NEXT: v_mov_b32_e32 v1, v5 +; GFX1132-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1132-NEXT: s_mov_b32 s2, -1 +; GFX1132-NEXT: v_mov_b32_e32 v1, v5 ; GFX1132-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, s4, v1, vcc_lo ; GFX1132-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 ; GFX1132-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll index 007f7e6ef7c7af..a33e7dc1550dff 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll @@ -270,7 +270,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, <4 x i32> %in ; GFX10W64-NEXT: s_waitcnt vmcnt(0) ; GFX10W64-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10W64-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v0, s[0:1] +; GFX10W64-NEXT: v_mad_u64_u32 v[0:1], null, s8, v0, s[0:1] ; GFX10W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10W64-NEXT: global_store_dword v1, v0, s[2:3] ; GFX10W64-NEXT: s_endpgm @@ -299,7 +299,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, <4 x i32> %in ; GFX10W32-NEXT: s_waitcnt vmcnt(0) ; GFX10W32-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10W32-NEXT: v_mad_u64_u32 v[0:1], s0, s4, v0, s[0:1] +; GFX10W32-NEXT: v_mad_u64_u32 v[0:1], null, s4, v0, s[0:1] ; GFX10W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10W32-NEXT: global_store_dword v1, v0, s[2:3] ; GFX10W32-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll index 9cfd9df76444e9..6cd1d8b8e7919a 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll @@ -279,7 +279,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, <4 x i32> %in ; GFX10W64-NEXT: s_waitcnt vmcnt(0) ; GFX10W64-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX10W64-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s8, v0, s[0:1] +; GFX10W64-NEXT: v_mad_u64_u32 v[0:1], null, s8, v0, s[0:1] ; GFX10W64-NEXT: v_mov_b32_e32 v1, 0 ; GFX10W64-NEXT: global_store_dword v1, v0, s[2:3] ; GFX10W64-NEXT: s_endpgm @@ -309,7 +309,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, <4 x i32> %in ; GFX10W32-NEXT: s_waitcnt vmcnt(0) ; GFX10W32-NEXT: v_readfirstlane_b32 s0, v1 ; GFX10W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX10W32-NEXT: v_mad_u64_u32 v[0:1], s0, s4, v0, s[0:1] +; GFX10W32-NEXT: v_mad_u64_u32 v[0:1], null, s4, v0, s[0:1] ; GFX10W32-NEXT: v_mov_b32_e32 v1, 0 ; GFX10W32-NEXT: global_store_dword v1, v0, s[2:3] ; GFX10W32-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll index 578faa4a6ea09d..860fa9ab31bf29 100644 --- a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll +++ b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll @@ -52,7 +52,7 @@ entry: ; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc ; ; GFX1010: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}} -; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] +; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]] define amdgpu_kernel void @vadd64rr(i64 addrspace(1)* %out, i64 %a) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -82,7 +82,7 @@ entry: ; GFX9: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; ; GFX1010: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}} -; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0, 0x1234, [[CARRY]] +; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, 0, 0x1234, [[CARRY]] define amdgpu_kernel void @vadd64ri(i64 addrspace(1)* %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -171,7 +171,7 @@ define amdgpu_kernel void @suaddo64(i64 addrspace(1)* %out, i1 addrspace(1)* %ca ; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc ; ; GFX1010: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0 -; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] +; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]] define amdgpu_kernel void @vuaddo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -237,7 +237,7 @@ entry: ; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc ; ; GFX1010: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}} -; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] +; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]] define amdgpu_kernel void @vsub64rr(i64 addrspace(1)* %out, i64 %a) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -267,7 +267,7 @@ entry: ; GFX9: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc ; ; GFX1010: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}} -; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0x1234, 0, [[CARRY]] +; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, 0x1234, 0, [[CARRY]] define amdgpu_kernel void @vsub64ri(i64 addrspace(1)* %out) { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -356,7 +356,7 @@ define amdgpu_kernel void @susubo64(i64 addrspace(1)* %out, i1 addrspace(1)* %ca ; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc ; ; GFX1010: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0 -; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] +; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]] define amdgpu_kernel void @vusubo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll index c10143c928b23c..cee3afc3e5bff8 100644 --- a/llvm/test/CodeGen/AMDGPU/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/frem.ll @@ -561,7 +561,7 @@ define amdgpu_kernel void @frem_f32(float addrspace(1)* %out, float addrspace(1) ; GFX10-NEXT: global_load_dword v1, v0, s[6:7] ; GFX10-NEXT: global_load_dword v2, v0, s[2:3] offset:16 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v4, s0, v2, v2, v1 +; GFX10-NEXT: v_div_scale_f32 v4, null, v2, v2, v1 ; GFX10-NEXT: v_div_scale_f32 v3, vcc_lo, v1, v2, v1 ; GFX10-NEXT: v_rcp_f32_e32 v5, v4 ; GFX10-NEXT: s_denorm_mode 15 @@ -976,7 +976,7 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace( ; GFX10-NEXT: global_load_dwordx2 v[0:1], v12, s[6:7] ; GFX10-NEXT: global_load_dwordx2 v[2:3], v12, s[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[4:5], s0, v[2:3], v[2:3], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[4:5], null, v[2:3], v[2:3], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX10-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 ; GFX10-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] @@ -2140,7 +2140,7 @@ define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float ; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[6:7] ; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[2:3] offset:32 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v6, s0, v3, v3, v1 +; GFX10-NEXT: v_div_scale_f32 v6, null, v3, v3, v1 ; GFX10-NEXT: v_div_scale_f32 v5, vcc_lo, v1, v3, v1 ; GFX10-NEXT: v_rcp_f32_e32 v7, v6 ; GFX10-NEXT: s_denorm_mode 15 @@ -2155,7 +2155,7 @@ define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float ; GFX10-NEXT: v_div_fixup_f32 v5, v5, v3, v1 ; GFX10-NEXT: v_trunc_f32_e32 v5, v5 ; GFX10-NEXT: v_fma_f32 v1, -v5, v3, v1 -; GFX10-NEXT: v_div_scale_f32 v5, s0, v2, v2, v0 +; GFX10-NEXT: v_div_scale_f32 v5, null, v2, v2, v0 ; GFX10-NEXT: v_div_scale_f32 v3, vcc_lo, v0, v2, v0 ; GFX10-NEXT: v_rcp_f32_e32 v6, v5 ; GFX10-NEXT: s_denorm_mode 15 @@ -2506,7 +2506,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float ; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[6:7] ; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[2:3] offset:64 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v10, s0, v7, v7, v3 +; GFX10-NEXT: v_div_scale_f32 v10, null, v7, v7, v3 ; GFX10-NEXT: v_div_scale_f32 v9, vcc_lo, v3, v7, v3 ; GFX10-NEXT: v_rcp_f32_e32 v11, v10 ; GFX10-NEXT: s_denorm_mode 15 @@ -2521,7 +2521,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float ; GFX10-NEXT: v_div_fixup_f32 v9, v9, v7, v3 ; GFX10-NEXT: v_trunc_f32_e32 v9, v9 ; GFX10-NEXT: v_fma_f32 v3, -v9, v7, v3 -; GFX10-NEXT: v_div_scale_f32 v9, s0, v6, v6, v2 +; GFX10-NEXT: v_div_scale_f32 v9, null, v6, v6, v2 ; GFX10-NEXT: v_div_scale_f32 v7, vcc_lo, v2, v6, v2 ; GFX10-NEXT: v_rcp_f32_e32 v10, v9 ; GFX10-NEXT: s_denorm_mode 15 @@ -2536,7 +2536,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float ; GFX10-NEXT: v_div_fixup_f32 v7, v7, v6, v2 ; GFX10-NEXT: v_trunc_f32_e32 v7, v7 ; GFX10-NEXT: v_fma_f32 v2, -v7, v6, v2 -; GFX10-NEXT: v_div_scale_f32 v7, s0, v5, v5, v1 +; GFX10-NEXT: v_div_scale_f32 v7, null, v5, v5, v1 ; GFX10-NEXT: v_div_scale_f32 v6, vcc_lo, v1, v5, v1 ; GFX10-NEXT: v_rcp_f32_e32 v9, v7 ; GFX10-NEXT: s_denorm_mode 15 @@ -2551,7 +2551,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float ; GFX10-NEXT: v_div_fixup_f32 v6, v6, v5, v1 ; GFX10-NEXT: v_trunc_f32_e32 v6, v6 ; GFX10-NEXT: v_fma_f32 v1, -v6, v5, v1 -; GFX10-NEXT: v_div_scale_f32 v6, s0, v4, v4, v0 +; GFX10-NEXT: v_div_scale_f32 v6, null, v4, v4, v0 ; GFX10-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v4, v0 ; GFX10-NEXT: v_rcp_f32_e32 v7, v6 ; GFX10-NEXT: s_denorm_mode 15 @@ -2807,7 +2807,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub ; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[6:7] ; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[2:3] offset:64 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[8:9], s0, v[6:7], v[6:7], v[2:3] +; GFX10-NEXT: v_div_scale_f64 v[8:9], null, v[6:7], v[6:7], v[2:3] ; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[8:9] ; GFX10-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], 1.0 ; GFX10-NEXT: v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11] @@ -2820,7 +2820,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub ; GFX10-NEXT: v_div_fixup_f64 v[8:9], v[8:9], v[6:7], v[2:3] ; GFX10-NEXT: v_trunc_f64_e32 v[8:9], v[8:9] ; GFX10-NEXT: v_fma_f64 v[2:3], -v[8:9], v[6:7], v[2:3] -; GFX10-NEXT: v_div_scale_f64 v[6:7], s0, v[4:5], v[4:5], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[6:7], null, v[4:5], v[4:5], v[0:1] ; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[6:7] ; GFX10-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 ; GFX10-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll index e0c7b72ebe4a27..b02c46c560ed42 100644 --- a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll @@ -121,7 +121,7 @@ define amdgpu_ps float @global_xchg_saddr_i32_rtn_2048(i8 addrspace(1)* inreg %s ; GFX10-LABEL: global_xchg_saddr_i32_rtn_2048: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s[0:1], s3, 0, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s3, 0, s[0:1] ; GFX10-NEXT: v_add_co_u32 v2, vcc, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc, 0, v3, vcc ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll index 9f3edae99aa18d..897dec0235a503 100644 --- a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll +++ b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll @@ -86,7 +86,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4096(i8 addrspace(1)* inr ; GFX10-LABEL: global_load_saddr_i8_offset_neg4096: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff000, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -111,7 +111,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4097(i8 addrspace(1)* inr ; GFX10-LABEL: global_load_saddr_i8_offset_neg4097: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff000, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -136,7 +136,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4098(i8 addrspace(1)* inr ; GFX10-LABEL: global_load_saddr_i8_offset_neg4098: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff000, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -240,7 +240,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg2049(i8 addrspace(1)* inr ; GFX10-LABEL: global_load_saddr_i8_offset_neg2049: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff800, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -263,7 +263,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg2050(i8 addrspace(1)* inr ; GFX10-LABEL: global_load_saddr_i8_offset_neg2050: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0xfffff800, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -308,7 +308,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_4294967296(i8 addrspace(1)* ; GFX10-LABEL: global_load_saddr_i8_offset_4294967296: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -332,7 +332,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_4294967297(i8 addrspace(1)* ; GFX10-LABEL: global_load_saddr_i8_offset_4294967297: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -356,7 +356,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_4294971391(i8 addrspace(1)* ; GFX10-LABEL: global_load_saddr_i8_offset_4294971391: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0x800, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -380,7 +380,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_4294971392(i8 addrspace(1)* ; GFX10-LABEL: global_load_saddr_i8_offset_4294971392: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0x1000, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -405,7 +405,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4294967295(i8 addrspace(1 ; GFX10-LABEL: global_load_saddr_i8_offset_neg4294967295: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0x800, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2047 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -429,7 +429,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4294967296(i8 addrspace(1 ; GFX10-LABEL: global_load_saddr_i8_offset_neg4294967296: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -453,7 +453,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4294967297(i8 addrspace(1 ; GFX10-LABEL: global_load_saddr_i8_offset_neg4294967297: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], 0, s2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1] ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog @@ -494,7 +494,7 @@ define amdgpu_ps float @global_load_saddr_i8_zext_vgpr_offset_4095(i8 addrspace( ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1] ; GFX10-NEXT: v_add_co_u32 v0, vcc, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 @@ -525,7 +525,7 @@ define amdgpu_ps float @global_load_saddr_i8_zext_vgpr_offset_4096(i8 addrspace( ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1] ; GFX10-NEXT: v_add_co_u32 v0, vcc, 0x1000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off @@ -551,7 +551,7 @@ define amdgpu_ps float @global_load_saddr_i8_zext_vgpr_offset_neg4096(i8 addrspa ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1] ; GFX10-NEXT: v_add_co_u32 v0, vcc, 0xfffff000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, -1, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off @@ -582,7 +582,7 @@ define amdgpu_ps float @global_load_saddr_i8_zext_vgpr_offset_neg4097(i8 addrspa ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1] ; GFX10-NEXT: v_add_co_u32 v0, vcc, 0xfffff000, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, -1, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 @@ -624,7 +624,7 @@ define amdgpu_ps float @global_load_saddr_i8_zext_vgpr_offset_2048(i8 addrspace( ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1] ; GFX10-NEXT: v_add_co_u32 v0, vcc, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off @@ -666,7 +666,7 @@ define amdgpu_ps float @global_load_saddr_i8_zext_vgpr_offset_neg2049(i8 addrspa ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1] ; GFX10-NEXT: v_add_co_u32 v0, vcc, 0xfffff800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, -1, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 @@ -692,7 +692,7 @@ define amdgpu_ps float @global_load_saddr_i8_zext_vgpr_offset_4095_gep_order(i8 ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_add_co_u32 v0, s[0:1], s2, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1] +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1] ; GFX10-NEXT: v_add_co_u32 v0, vcc, 0x800, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll index 24b236b5dd341f..e64e437342dea4 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll @@ -131,9 +131,9 @@ define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(i32* %p_node_ptr ; GFX1013-NEXT: v_mov_b32_e32 v10, 0x41000000 ; GFX1013-NEXT: s_waitcnt lgkmcnt(0) ; GFX1013-NEXT: v_add_co_u32 v2, s4, s4, v0 -; GFX1013-NEXT: v_add_co_ci_u32_e64 v3, s4, s5, 0, s4 +; GFX1013-NEXT: v_add_co_ci_u32_e64 v3, null, s5, 0, s4 ; GFX1013-NEXT: v_add_co_u32 v4, s4, s6, v0 -; GFX1013-NEXT: v_add_co_ci_u32_e64 v5, s4, s7, 0, s4 +; GFX1013-NEXT: v_add_co_ci_u32_e64 v5, null, s7, 0, s4 ; GFX1013-NEXT: flat_load_dword v0, v[2:3] ; GFX1013-NEXT: flat_load_dword v1, v[4:5] ; GFX1013-NEXT: v_mov_b32_e32 v2, 0 @@ -160,9 +160,9 @@ define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(i32* %p_node_ptr ; GFX1030-NEXT: v_mov_b32_e32 v4, 2.0 ; GFX1030-NEXT: s_waitcnt lgkmcnt(0) ; GFX1030-NEXT: v_add_co_u32 v0, s4, s4, v2 -; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, s4, s5, 0, s4 +; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s4 ; GFX1030-NEXT: v_add_co_u32 v2, s4, s6, v2 -; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, s4, s7, 0, s4 +; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s4 ; GFX1030-NEXT: flat_load_dword v0, v[0:1] ; GFX1030-NEXT: flat_load_dword v1, v[2:3] ; GFX1030-NEXT: v_mov_b32_e32 v2, 0 @@ -202,9 +202,9 @@ define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(i32* %p_node ; GFX1013-NEXT: v_mov_b32_e32 v7, 0x48004700 ; GFX1013-NEXT: s_waitcnt lgkmcnt(0) ; GFX1013-NEXT: v_add_co_u32 v2, s4, s4, v0 -; GFX1013-NEXT: v_add_co_ci_u32_e64 v3, s4, s5, 0, s4 +; GFX1013-NEXT: v_add_co_ci_u32_e64 v3, null, s5, 0, s4 ; GFX1013-NEXT: v_add_co_u32 v4, s4, s6, v0 -; GFX1013-NEXT: v_add_co_ci_u32_e64 v5, s4, s7, 0, s4 +; GFX1013-NEXT: v_add_co_ci_u32_e64 v5, null, s7, 0, s4 ; GFX1013-NEXT: flat_load_dword v0, v[2:3] ; GFX1013-NEXT: flat_load_dword v1, v[4:5] ; GFX1013-NEXT: v_mov_b32_e32 v2, 0 @@ -228,9 +228,9 @@ define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(i32* %p_node ; GFX1030-NEXT: v_mov_b32_e32 v7, 0x48004700 ; GFX1030-NEXT: s_waitcnt lgkmcnt(0) ; GFX1030-NEXT: v_add_co_u32 v0, s4, s4, v2 -; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, s4, s5, 0, s4 +; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s4 ; GFX1030-NEXT: v_add_co_u32 v2, s4, s6, v2 -; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, s4, s7, 0, s4 +; GFX1030-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s4 ; GFX1030-NEXT: flat_load_dword v0, v[0:1] ; GFX1030-NEXT: flat_load_dword v1, v[2:3] ; GFX1030-NEXT: v_mov_b32_e32 v2, 0 @@ -277,7 +277,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(float* %p_ray, ; GFX1013-NEXT: v_mov_b32_e32 v11, 0x41000000 ; GFX1013-NEXT: s_waitcnt lgkmcnt(0) ; GFX1013-NEXT: v_add_co_u32 v0, s4, s4, v0 -; GFX1013-NEXT: v_add_co_ci_u32_e64 v1, s4, s5, 0, s4 +; GFX1013-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s4 ; GFX1013-NEXT: flat_load_dword v2, v[0:1] ; GFX1013-NEXT: v_mov_b32_e32 v0, 0xb36211c7 ; GFX1013-NEXT: v_mov_b32_e32 v1, 0x102 @@ -303,7 +303,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(float* %p_ray, ; GFX1030-NEXT: v_mov_b32_e32 v4, 1.0 ; GFX1030-NEXT: s_waitcnt lgkmcnt(0) ; GFX1030-NEXT: v_add_co_u32 v0, s4, s4, v0 -; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, s4, s5, 0, s4 +; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s4 ; GFX1030-NEXT: flat_load_dword v2, v[0:1] ; GFX1030-NEXT: v_mov_b32_e32 v1, 0x102 ; GFX1030-NEXT: v_mov_b32_e32 v0, 0xb36211c7 @@ -344,7 +344,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(float* %p_ ; GFX1013-NEXT: v_mov_b32_e32 v8, 0x48004700 ; GFX1013-NEXT: s_waitcnt lgkmcnt(0) ; GFX1013-NEXT: v_add_co_u32 v0, s4, s4, v0 -; GFX1013-NEXT: v_add_co_ci_u32_e64 v1, s4, s5, 0, s4 +; GFX1013-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s4 ; GFX1013-NEXT: flat_load_dword v2, v[0:1] ; GFX1013-NEXT: v_mov_b32_e32 v0, 0xb36211c6 ; GFX1013-NEXT: v_mov_b32_e32 v1, 0x102 @@ -367,7 +367,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(float* %p_ ; GFX1030-NEXT: v_mov_b32_e32 v8, 0x48004700 ; GFX1030-NEXT: s_waitcnt lgkmcnt(0) ; GFX1030-NEXT: v_add_co_u32 v0, s4, s4, v0 -; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, s4, s5, 0, s4 +; GFX1030-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s4 ; GFX1030-NEXT: flat_load_dword v2, v[0:1] ; GFX1030-NEXT: v_mov_b32_e32 v1, 0x102 ; GFX1030-NEXT: v_mov_b32_e32 v0, 0xb36211c6 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll index 4d43abfceedf04..e0a2705b7fef6b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll @@ -58,10 +58,10 @@ define { i64, i1 } @umulo_i64_v_v(i64 %x, i64 %y) { ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; GFX10-NEXT: v_mov_b32_e32 v5, v1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v4, v2, 0 -; GFX10-NEXT: v_mad_u64_u32 v[6:7], s4, v4, v3, 0 -; GFX10-NEXT: v_mad_u64_u32 v[9:10], s4, v5, v2, 0 -; GFX10-NEXT: v_mad_u64_u32 v[11:12], s4, v5, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0 +; GFX10-NEXT: v_mad_u64_u32 v[6:7], null, v4, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[9:10], null, v5, v2, 0 +; GFX10-NEXT: v_mad_u64_u32 v[11:12], null, v5, v3, 0 ; GFX10-NEXT: v_mov_b32_e32 v8, v1 ; GFX10-NEXT: v_mul_lo_u32 v5, v5, v2 ; GFX10-NEXT: v_mul_lo_u32 v4, v4, v3 @@ -83,10 +83,10 @@ define { i64, i1 } @umulo_i64_v_v(i64 %x, i64 %y) { ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v4, v0 ; GFX11-NEXT: v_mov_b32_e32 v5, v1 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v4, v2, 0 -; GFX11-NEXT: v_mad_u64_u32 v[6:7], s0, v4, v3, 0 -; GFX11-NEXT: v_mad_u64_u32 v[9:10], s0, v5, v2, 0 -; GFX11-NEXT: v_mad_u64_u32 v[11:12], s0, v5, v3, 0 +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0 +; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, v4, v3, 0 +; GFX11-NEXT: v_mad_u64_u32 v[9:10], null, v5, v2, 0 +; GFX11-NEXT: v_mad_u64_u32 v[11:12], null, v5, v3, 0 ; GFX11-NEXT: v_mov_b32_e32 v8, v1 ; GFX11-NEXT: v_mul_lo_u32 v5, v5, v2 ; GFX11-NEXT: v_mul_lo_u32 v4, v4, v3 @@ -186,10 +186,10 @@ define { i64, i1 } @smulo_i64_v_v(i64 %x, i64 %y) { ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; GFX10-NEXT: v_mov_b32_e32 v5, v1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v4, v2, 0 -; GFX10-NEXT: v_mad_u64_u32 v[6:7], s4, v4, v3, 0 -; GFX10-NEXT: v_mad_u64_u32 v[9:10], s4, v5, v2, 0 -; GFX10-NEXT: v_mad_i64_i32 v[11:12], s4, v5, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0 +; GFX10-NEXT: v_mad_u64_u32 v[6:7], null, v4, v3, 0 +; GFX10-NEXT: v_mad_u64_u32 v[9:10], null, v5, v2, 0 +; GFX10-NEXT: v_mad_i64_i32 v[11:12], null, v5, v3, 0 ; GFX10-NEXT: v_mov_b32_e32 v8, v1 ; GFX10-NEXT: v_add_co_u32 v6, vcc_lo, v8, v6 ; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v7, vcc_lo @@ -223,10 +223,10 @@ define { i64, i1 } @smulo_i64_v_v(i64 %x, i64 %y) { ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v4, v0 ; GFX11-NEXT: v_mov_b32_e32 v5, v1 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v4, v2, 0 -; GFX11-NEXT: v_mad_u64_u32 v[6:7], s0, v4, v3, 0 -; GFX11-NEXT: v_mad_u64_u32 v[9:10], s0, v5, v2, 0 -; GFX11-NEXT: v_mad_i64_i32 v[11:12], s0, v5, v3, 0 +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0 +; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, v4, v3, 0 +; GFX11-NEXT: v_mad_u64_u32 v[9:10], null, v5, v2, 0 +; GFX11-NEXT: v_mad_i64_i32 v[11:12], null, v5, v3, 0 ; GFX11-NEXT: v_mov_b32_e32 v8, v1 ; GFX11-NEXT: v_add_co_u32 v6, vcc_lo, v8, v6 ; GFX11-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v7, vcc_lo diff --git a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll index 2c1f77e4787755..55a0d65fbafff0 100644 --- a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll +++ b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll @@ -34,7 +34,7 @@ define i64 @mad_i64_i32_sextops(i32 %arg0, i32 %arg1, i64 %arg2) #0 { ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v4, v1 ; GFX11-NEXT: v_mov_b32_e32 v5, v0 -; GFX11-NEXT: v_mad_i64_i32 v[0:1], s0, v5, v4, v[2:3] +; GFX11-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] %sext0 = sext i32 %arg0 to i64 %sext1 = sext i32 %arg1 to i64 @@ -71,7 +71,7 @@ define i64 @mad_i64_i32_sextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) #0 { ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v4, v1 ; GFX11-NEXT: v_mov_b32_e32 v5, v0 -; GFX11-NEXT: v_mad_i64_i32 v[0:1], s0, v5, v4, v[2:3] +; GFX11-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] %sext0 = sext i32 %arg0 to i64 %sext1 = sext i32 %arg1 to i64 @@ -108,7 +108,7 @@ define i64 @mad_u64_u32_zextops(i32 %arg0, i32 %arg1, i64 %arg2) #0 { ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v4, v1 ; GFX11-NEXT: v_mov_b32_e32 v5, v0 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v5, v4, v[2:3] +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] %sext0 = zext i32 %arg0 to i64 %sext1 = zext i32 %arg1 to i64 @@ -145,7 +145,7 @@ define i64 @mad_u64_u32_zextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) #0 { ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v4, v1 ; GFX11-NEXT: v_mov_b32_e32 v5, v0 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v5, v4, v[2:3] +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] %sext0 = zext i32 %arg0 to i64 %sext1 = zext i32 %arg1 to i64 @@ -244,20 +244,20 @@ define i128 @mad_i64_i32_sextops_i32_i128(i32 %arg0, i32 %arg1, i128 %arg2) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_mad_u64_u32 v[6:7], s0, v0, v1, 0 +; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, v0, v1, 0 ; GFX11-NEXT: v_mov_b32_e32 v8, 0 ; GFX11-NEXT: v_ashrrev_i32_e32 v14, 31, v0 ; GFX11-NEXT: v_ashrrev_i32_e32 v15, 31, v1 -; GFX11-NEXT: v_mad_u64_u32 v[9:10], s0, v14, v1, v[7:8] +; GFX11-NEXT: v_mad_u64_u32 v[9:10], null, v14, v1, v[7:8] ; GFX11-NEXT: v_mov_b32_e32 v7, v10 ; GFX11-NEXT: v_mov_b32_e32 v10, v8 -; GFX11-NEXT: v_mad_u64_u32 v[11:12], s0, v0, v15, v[9:10] -; GFX11-NEXT: v_mad_i64_i32 v[9:10], s0, v1, v14, 0 +; GFX11-NEXT: v_mad_u64_u32 v[11:12], null, v0, v15, v[9:10] +; GFX11-NEXT: v_mad_i64_i32 v[9:10], null, v1, v14, 0 ; GFX11-NEXT: v_mov_b32_e32 v8, v12 +; GFX11-NEXT: v_mad_i64_i32 v[12:13], null, v15, v0, v[9:10] ; GFX11-NEXT: v_add_co_u32 v7, s0, v7, v8 -; GFX11-NEXT: v_add_co_ci_u32_e64 v8, s0, 0, 0, s0 -; GFX11-NEXT: v_mad_i64_i32 v[12:13], s0, v15, v0, v[9:10] -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v14, v15, v[7:8] +; GFX11-NEXT: v_add_co_ci_u32_e64 v8, null, 0, 0, s0 +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v14, v15, v[7:8] ; GFX11-NEXT: v_mov_b32_e32 v7, v11 ; GFX11-NEXT: v_add_co_u32 v8, vcc_lo, v0, v12 ; GFX11-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v1, v13, vcc_lo @@ -301,7 +301,7 @@ define i63 @mad_i64_i32_sextops_i32_i63(i32 %arg0, i32 %arg1, i63 %arg2) #0 { ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v4, v1 ; GFX11-NEXT: v_mov_b32_e32 v5, v0 -; GFX11-NEXT: v_mad_i64_i32 v[0:1], s0, v5, v4, v[2:3] +; GFX11-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] %sext0 = sext i32 %arg0 to i63 %sext1 = sext i32 %arg1 to i63 @@ -346,7 +346,7 @@ define i63 @mad_i64_i32_sextops_i31_i63(i31 %arg0, i31 %arg1, i63 %arg2) #0 { ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_bfe_i32 v4, v1, 0, 31 ; GFX11-NEXT: v_bfe_i32 v5, v0, 0, 31 -; GFX11-NEXT: v_mad_i64_i32 v[0:1], s0, v5, v4, v[2:3] +; GFX11-NEXT: v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3] ; GFX11-NEXT: s_setpc_b64 s[30:31] %sext0 = sext i31 %arg0 to i63 %sext1 = sext i31 %arg1 to i63 @@ -394,10 +394,10 @@ define i64 @mad_i64_i32_extops_i32_i64(i32 %arg0, i32 %arg1, i64 %arg2) #0 { ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v4, v1 ; GFX11-NEXT: v_mov_b32_e32 v5, v0 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v5, v4, v[2:3] +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3] ; GFX11-NEXT: v_ashrrev_i32_e32 v5, 31, v5 ; GFX11-NEXT: v_mov_b32_e32 v3, v1 -; GFX11-NEXT: v_mad_u64_u32 v[1:2], s0, v5, v4, v[3:4] +; GFX11-NEXT: v_mad_u64_u32 v[1:2], null, v5, v4, v[3:4] ; GFX11-NEXT: s_setpc_b64 s[30:31] %ext0 = sext i32 %arg0 to i64 %ext1 = zext i32 %arg1 to i64 @@ -433,7 +433,7 @@ define i64 @mad_u64_u32_bitops(i64 %arg0, i64 %arg1, i64 %arg2) #0 { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v3, v0 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v3, v2, v[4:5] +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, v[4:5] ; GFX11-NEXT: s_setpc_b64 s[30:31] %trunc.lhs = and i64 %arg0, 4294967295 %trunc.rhs = and i64 %arg1, 4294967295 @@ -481,10 +481,10 @@ define i64 @mad_u64_u32_bitops_lhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) # ; GFX11-NEXT: v_mov_b32_e32 v3, v2 ; GFX11-NEXT: v_mov_b32_e32 v2, v0 ; GFX11-NEXT: v_mov_b32_e32 v6, v1 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v2, v3, v[4:5] +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v2, v3, v[4:5] ; GFX11-NEXT: v_and_b32_e32 v5, 1, v6 ; GFX11-NEXT: v_mov_b32_e32 v4, v1 -; GFX11-NEXT: v_mad_u64_u32 v[1:2], s0, v5, v3, v[4:5] +; GFX11-NEXT: v_mad_u64_u32 v[1:2], null, v5, v3, v[4:5] ; GFX11-NEXT: s_setpc_b64 s[30:31] %trunc.lhs = and i64 %arg0, 8589934591 %trunc.rhs = and i64 %arg1, 4294967295 @@ -532,10 +532,10 @@ define i64 @mad_u64_u32_bitops_rhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) # ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v6, v0 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v6, v2, v[4:5] +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v6, v2, v[4:5] ; GFX11-NEXT: v_and_b32_e32 v4, 1, v3 ; GFX11-NEXT: v_mov_b32_e32 v3, v1 -; GFX11-NEXT: v_mad_u64_u32 v[1:2], s0, v6, v4, v[3:4] +; GFX11-NEXT: v_mad_u64_u32 v[1:2], null, v6, v4, v[3:4] ; GFX11-NEXT: s_setpc_b64 s[30:31] %trunc.lhs = and i64 %arg0, 4294967295 %trunc.rhs = and i64 %arg1, 8589934591 @@ -571,7 +571,7 @@ define i64 @mad_i64_i32_bitops(i64 %arg0, i64 %arg1, i64 %arg2) #0 { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v3, v0 -; GFX11-NEXT: v_mad_i64_i32 v[0:1], s0, v3, v2, v[4:5] +; GFX11-NEXT: v_mad_i64_i32 v[0:1], null, v3, v2, v[4:5] ; GFX11-NEXT: s_setpc_b64 s[30:31] %shl.lhs = shl i64 %arg0, 32 %trunc.lhs = ashr i64 %shl.lhs, 32 @@ -609,7 +609,7 @@ define i64 @mad_i64_i32_unpack_i64ops(i64 %arg0) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_mad_u64_u32 v[2:3], s0, v1, v0, v[0:1] +; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v1, v0, v[0:1] ; GFX11-NEXT: v_mov_b32_e32 v0, v2 ; GFX11-NEXT: v_mov_b32_e32 v1, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -729,8 +729,8 @@ define i64 @mad_i64_i32_twice(i32 %arg0, i32 %arg1, i64 %arg2, i64 %arg3) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_mad_i64_i32 v[6:7], s0, v0, v1, v[2:3] -; GFX11-NEXT: v_mad_i64_i32 v[2:3], s0, v0, v1, v[4:5] +; GFX11-NEXT: v_mad_i64_i32 v[6:7], null, v0, v1, v[2:3] +; GFX11-NEXT: v_mad_i64_i32 v[2:3], null, v0, v1, v[4:5] ; GFX11-NEXT: v_xor_b32_e32 v0, v6, v2 ; GFX11-NEXT: v_xor_b32_e32 v1, v7, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -793,7 +793,7 @@ define i64 @mad_i64_i32_thrice(i32 %arg0, i32 %arg1, i64 %arg2, i64 %arg3, i64 % ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_mad_i64_i32 v[8:9], s0, v0, v1, 0 +; GFX11-NEXT: v_mad_i64_i32 v[8:9], null, v0, v1, 0 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v8, v2 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v9, v3, vcc_lo ; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v8, v4 @@ -851,7 +851,7 @@ define i64 @mad_i64_i32_secondary_use(i32 %arg0, i32 %arg1, i64 %arg2) #0 { ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_mad_i64_i32 v[4:5], s0, v0, v1, 0 +; GFX11-NEXT: v_mad_i64_i32 v[4:5], null, v0, v1, 0 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v4, v2 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v5, v3, vcc_lo ; GFX11-NEXT: v_xor_b32_e32 v0, v0, v4 @@ -908,7 +908,7 @@ define i48 @mad_i48_i48(i48 %arg0, i48 %arg1, i48 %arg2) #0 { ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_mov_b32_e32 v6, v1 ; GFX11-NEXT: v_mov_b32_e32 v7, v0 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v7, v2, v[4:5] +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v7, v2, v[4:5] ; GFX11-NEXT: v_mul_lo_u32 v3, v7, v3 ; GFX11-NEXT: v_mul_lo_u32 v2, v6, v2 ; GFX11-NEXT: v_add3_u32 v1, v2, v1, v3 diff --git a/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll b/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll index 2a06faaa5878ce..279a79427345a3 100644 --- a/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll +++ b/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll @@ -2,6 +2,8 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9 %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s ; RUN: llc -march=amdgcn -mcpu=gfx1100 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s define amdgpu_ps float @mad_i32_vvv(i32 %a, i32 %b, i32 %c) { ; GFX9-LABEL: mad_i32_vvv: @@ -11,14 +13,14 @@ define amdgpu_ps float @mad_i32_vvv(i32 %a, i32 %b, i32 %c) { ; ; GFX10-LABEL: mad_i32_vvv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v0, v1, v[2:3] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[2:3] ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_vvv: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v3, v1 ; GFX11-NEXT: v_mov_b32_e32 v4, v0 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v4, v3, v[2:3] +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v4, v3, v[2:3] ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, %b %add = add i32 %mul, %c @@ -47,14 +49,14 @@ define amdgpu_ps float @mad_i32_vvc(i32 %a, i32 %b) { ; ; GFX10-LABEL: mad_i32_vvc: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v0, v1, 42 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, 42 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_vvc: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v2, v1 ; GFX11-NEXT: v_mov_b32_e32 v3, v0 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v3, v2, 42 +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, 42 ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, %b %add = add i32 %mul, 42 @@ -72,14 +74,14 @@ define amdgpu_ps float @mad_i32_vvi(i32 %a, i32 %b) { ; ; GFX10-LABEL: mad_i32_vvi: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v0, v1, 0x12d687 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, 0x12d687 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_vvi: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v2, v1 ; GFX11-NEXT: v_mov_b32_e32 v3, v0 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v3, v2, 0x12d687 +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, 0x12d687 ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, %b %add = add i32 %mul, 1234567 @@ -95,12 +97,12 @@ define amdgpu_ps float @mad_i32_vcv(i32 %a, i32 %c) { ; ; GFX10-LABEL: mad_i32_vcv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v0, 42, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 42, v[1:2] ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_vcv: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_mad_u64_u32 v[2:3], s0, v0, 42, v[1:2] +; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v0, 42, v[1:2] ; GFX11-NEXT: v_mov_b32_e32 v0, v2 ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, 42 @@ -117,13 +119,13 @@ define amdgpu_ps float @mad_i32_vcc(i32 %a) { ; ; GFX10-LABEL: mad_i32_vcc: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v0, 42, 43 +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 42, 43 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_vcc: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v2, v0 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v2, 42, 43 +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v2, 42, 43 ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, 42 %add = add i32 %mul, 43 @@ -139,14 +141,14 @@ define amdgpu_ps float @mad_i32_vvs(i32 %a, i32 %b, i32 inreg %c) { ; ; GFX10-LABEL: mad_i32_vvs: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v0, v1, s[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, s[0:1] ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_vvs: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v2, v1 ; GFX11-NEXT: v_mov_b32_e32 v3, v0 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v3, v2, s[0:1] +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, s[0:1] ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, %b %add = add i32 %mul, %c @@ -162,12 +164,12 @@ define amdgpu_ps float @mad_i32_vsv(i32 %a, i32 inreg %b, i32 %c) { ; ; GFX10-LABEL: mad_i32_vsv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v0, s0, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s0, v[1:2] ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_vsv: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_mad_u64_u32 v[2:3], s0, v0, s0, v[1:2] +; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v0, s0, v[1:2] ; GFX11-NEXT: v_mov_b32_e32 v0, v2 ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, %b @@ -184,12 +186,12 @@ define amdgpu_ps float @mad_i32_svv(i32 inreg %a, i32 %b, i32 %c) { ; ; GFX10-LABEL: mad_i32_svv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s0, v0, v[1:2] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, v0, v[1:2] ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_svv: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_mad_u64_u32 v[2:3], s0, s0, v0, v[1:2] +; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, s0, v0, v[1:2] ; GFX11-NEXT: v_mov_b32_e32 v0, v2 ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, %b @@ -208,14 +210,14 @@ define amdgpu_ps float @mad_i32_vss(i32 %a, i32 inreg %b, i32 inreg %c) { ; GFX10-LABEL: mad_i32_vss: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_mov_b32 s2, s1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v0, s0, s[2:3] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s0, s[2:3] ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_vss: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v2, v0 ; GFX11-NEXT: s_mov_b32 s2, s1 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v2, s0, s[2:3] +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v2, s0, s[2:3] ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, %b %add = add i32 %mul, %c @@ -233,14 +235,14 @@ define amdgpu_ps float @mad_i32_svs(i32 inreg %a, i32 %b, i32 inreg %c) { ; GFX10-LABEL: mad_i32_svs: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_mov_b32 s2, s1 -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s0, v0, s[2:3] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, v0, s[2:3] ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_svs: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_mov_b32_e32 v2, v0 ; GFX11-NEXT: s_mov_b32 s2, s1 -; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, s0, v2, s[2:3] +; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, s0, v2, s[2:3] ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, %b %add = add i32 %mul, %c @@ -257,12 +259,12 @@ define amdgpu_ps float @mad_i32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) { ; ; GFX10-LABEL: mad_i32_ssv: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, s0, s1, v[0:1] +; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, s1, v[0:1] ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: mad_i32_ssv: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_mad_u64_u32 v[1:2], s0, s0, s1, v[0:1] +; GFX11-NEXT: v_mad_u64_u32 v[1:2], null, s0, s1, v[0:1] ; GFX11-NEXT: v_mov_b32_e32 v0, v1 ; GFX11-NEXT: ; return to shader part epilog %mul = mul i32 %a, %b diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll index 06e36ec05cd608..71d9018c1d7b38 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll @@ -169,7 +169,7 @@ define amdgpu_kernel void @flat_nontemporal_load_1( ; GFX10-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_add_co_u32 v0, s0, s0, v0 -; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 +; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] slc ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s3 @@ -183,7 +183,7 @@ define amdgpu_kernel void @flat_nontemporal_load_1( ; GFX10-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_add_co_u32 v0, s0, s0, v0 -; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 +; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] slc ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s3 @@ -270,8 +270,8 @@ define amdgpu_kernel void @flat_nontemporal_load_1( ; GFX11-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_add_co_u32 v0, s0, s0, v0 -; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 -; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] slc dlc +; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 +; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] slc ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -284,8 +284,8 @@ define amdgpu_kernel void @flat_nontemporal_load_1( ; GFX11-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_add_co_u32 v0, s0, s0, v0 -; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 -; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] slc dlc +; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 +; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] slc ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -462,7 +462,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 ; GFX10-WGP-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX10-WGP-NEXT: flat_load_dword v2, v[1:2] -; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 +; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 glc slc ; GFX10-WGP-NEXT: s_endpgm @@ -476,7 +476,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 ; GFX10-CU-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX10-CU-NEXT: flat_load_dword v2, v[1:2] -; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 +; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 glc slc ; GFX10-CU-NEXT: s_endpgm @@ -563,7 +563,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1 ; GFX11-WGP-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[1:2] -; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 +; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 glc slc dlc ; GFX11-WGP-NEXT: s_endpgm @@ -577,7 +577,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 ; GFX11-CU-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX11-CU-NEXT: flat_load_b32 v2, v[1:2] -; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 +; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 glc slc dlc ; GFX11-CU-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll index a05a651128ebc3..9e530ed43453c8 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll @@ -120,7 +120,7 @@ define amdgpu_kernel void @flat_nontemporal_load_1( ; GFX10-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_add_co_u32 v0, s0, s0, v0 -; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 +; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 ; GFX10-WGP-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s2 @@ -135,7 +135,7 @@ define amdgpu_kernel void @flat_nontemporal_load_1( ; GFX10-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_add_co_u32 v0, s0, s0, v0 -; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 +; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 ; GFX10-CU-NEXT: flat_load_dword v2, v[0:1] glc dlc ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s2 @@ -166,7 +166,7 @@ define amdgpu_kernel void @flat_nontemporal_load_1( ; GFX11-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_add_co_u32 v0, s0, s0, v0 -; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 +; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc dlc ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2 @@ -181,7 +181,7 @@ define amdgpu_kernel void @flat_nontemporal_load_1( ; GFX11-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_add_co_u32 v0, s0, s0, v0 -; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 +; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 ; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc dlc ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2 @@ -315,7 +315,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1( ; GFX10-WGP-NEXT: v_mov_b32_e32 v2, s1 ; GFX10-WGP-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX10-WGP-NEXT: flat_load_dword v2, v[1:2] -; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 +; GFX10-WGP-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: flat_store_dword v[0:1], v2 ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -330,7 +330,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1( ; GFX10-CU-NEXT: v_mov_b32_e32 v2, s1 ; GFX10-CU-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX10-CU-NEXT: flat_load_dword v2, v[1:2] -; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 +; GFX10-CU-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 ; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-CU-NEXT: flat_store_dword v[0:1], v2 ; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 @@ -361,7 +361,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1( ; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1 ; GFX11-WGP-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX11-WGP-NEXT: flat_load_b32 v2, v[1:2] -; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 +; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 ; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 dlc ; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0 @@ -376,7 +376,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1( ; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1 ; GFX11-CU-NEXT: v_add_co_u32 v0, s0, s2, v0 ; GFX11-CU-NEXT: flat_load_b32 v2, v[1:2] -; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0 +; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s0 ; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 dlc ; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll index 06e0984d05e486..91672e0f3b2538 100644 --- a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll +++ b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll @@ -749,7 +749,7 @@ define amdgpu_kernel void @global_inst_salu_offset_neg_12bit_max(i8 addrspace(1) ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -778,7 +778,7 @@ define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(i8 addrspace(1) ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -883,7 +883,7 @@ define amdgpu_kernel void @global_inst_salu_offset_2x_neg_11bit_max(i8 addrspace ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -912,7 +912,7 @@ define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(i8 addrspace ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -941,7 +941,7 @@ define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(i8 addrspace ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -971,7 +971,7 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(i8 addrspa ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -1001,7 +1001,7 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(i8 addrspa ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0x800, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -1031,7 +1031,7 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(i8 addrspa ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0x800, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -1061,7 +1061,7 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(i8 addrspa ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -1091,7 +1091,7 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(i8 addrspa ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0x1800, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off @@ -1121,7 +1121,7 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(i8 addrspa ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_add_co_u32 v0, s0, 0x2000, s0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_byte v[0:1], v0, off diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll index d958f5d9d97d64..9c706745547111 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll @@ -186,7 +186,7 @@ define amdgpu_kernel void @clmem_read_simplified(i8 addrspace(1)* %buffer) { ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v2 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 3, v[0:1] ; GFX10-NEXT: v_add_co_u32 v2, s0, s34, v2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s0, s35, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s35, 0, s0 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, 0x1000 @@ -622,7 +622,7 @@ define hidden amdgpu_kernel void @clmem_read(i8 addrspace(1)* %buffer) { ; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, s35, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v0, s0, s34, v2 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, 0x5000, v3 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s35, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s35, 0, s0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v6, vcc_lo ; GFX10-NEXT: .LBB1_1: ; %for.cond.preheader ; GFX10-NEXT: ; =>This Loop Header: Depth=1 @@ -1087,7 +1087,7 @@ define amdgpu_kernel void @Address32(i8 addrspace(1)* %buffer) { ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v2 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] ; GFX10-NEXT: v_add_co_u32 v2, s0, s34, v2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s0, s35, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s35, 0, s0 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, 0x800, v0 @@ -1367,7 +1367,7 @@ define amdgpu_kernel void @Offset64(i8 addrspace(1)* %buffer) { ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v2 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 3, v[0:1] ; GFX10-NEXT: v_add_co_u32 v2, s0, s34, v2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s0, s35, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s35, 0, s0 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, 0xfffff800 @@ -1594,7 +1594,7 @@ define amdgpu_kernel void @p32Offset64(i8 addrspace(1)* %buffer) { ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v2 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] ; GFX10-NEXT: v_add_co_u32 v2, s0, s34, v2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s0, s35, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s35, 0, s0 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, 0x80000000 @@ -1838,9 +1838,9 @@ define amdgpu_kernel void @DiffBase(i8 addrspace(1)* %buffer1, ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 7, v0 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v0 ; GFX10-NEXT: v_add_co_u32 v0, s0, s36, v2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s37, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s37, 0, s0 ; GFX10-NEXT: v_add_co_u32 v14, s0, s38, v2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v15, s0, s39, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v15, null, s39, 0, s0 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v0, 0x1800 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v14, 0x3000 @@ -2090,7 +2090,7 @@ define amdgpu_kernel void @ReverseOrder(i8 addrspace(1)* %buffer) { ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v2 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 3, v[0:1] ; GFX10-NEXT: v_add_co_u32 v2, s0, s34, v2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s0, s35, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s35, 0, s0 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, 0x3800, v0 @@ -2366,7 +2366,7 @@ define hidden amdgpu_kernel void @negativeoffset(i8 addrspace(1)* nocapture %buf ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff8000, v2 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 3, v[0:1] ; GFX10-NEXT: v_add_co_u32 v2, s0, s34, v2 -; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s0, s35, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v3, null, s35, 0, s0 ; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v2, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v3, v1, vcc_lo ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v4 diff --git a/llvm/test/CodeGen/AMDGPU/saddo.ll b/llvm/test/CodeGen/AMDGPU/saddo.ll index bd8b68a16a2b50..50354486cf474c 100644 --- a/llvm/test/CodeGen/AMDGPU/saddo.ll +++ b/llvm/test/CodeGen/AMDGPU/saddo.ll @@ -90,7 +90,7 @@ define amdgpu_kernel void @saddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b ; GFX10-NEXT: s_xor_b32 s2, s2, s3 ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 ; GFX10-NEXT: v_add_co_u32 v0, s0, s0, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] ; GFX10-NEXT: s_endpgm %sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll index d5d8817783fb92..9391ca1fc2b1ba 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll @@ -39,7 +39,7 @@ define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, < ; MUBUF-NEXT: buffer_load_dword v2, v0, s[36:39], 0 offen offset:4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: v_add_nc_u32_e32 v0, v2, v1 -; MUBUF-NEXT: v_mad_u64_u32 v[0:1], s0, v0, s0, 0x3039 +; MUBUF-NEXT: v_mad_u64_u32 v[0:1], null, v0, s0, 0x3039 ; MUBUF-NEXT: buffer_store_dword v0, v0, s[36:39], 0 offen ; MUBUF-NEXT: .LBB0_2: ; %shader_eval_surface.exit ; MUBUF-NEXT: s_endpgm @@ -71,7 +71,7 @@ define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, < ; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, vcc_lo offset:4 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: v_add_nc_u32_e32 v0, v1, v0 -; FLATSCR-NEXT: v_mad_u64_u32 v[0:1], s0, v0, s0, 0x3039 +; FLATSCR-NEXT: v_mad_u64_u32 v[0:1], null, v0, s0, 0x3039 ; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; FLATSCR-NEXT: .LBB0_2: ; %shader_eval_surface.exit ; FLATSCR-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll index e4fdafe265515b..ab2322d03cc886 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv.ll @@ -2788,9 +2788,9 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; GFX1030-NEXT: v_add_co_u32 v5, vcc_lo, v2, v3 ; GFX1030-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, s4, v4, vcc_lo ; GFX1030-NEXT: v_mul_hi_u32 v8, v0, v5 -; GFX1030-NEXT: v_mad_u64_u32 v[4:5], s4, v1, v5, 0 -; GFX1030-NEXT: v_mad_u64_u32 v[2:3], s4, v0, v6, 0 -; GFX1030-NEXT: v_mad_u64_u32 v[6:7], s4, v1, v6, 0 +; GFX1030-NEXT: v_mad_u64_u32 v[4:5], null, v1, v5, 0 +; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, v0, v6, 0 +; GFX1030-NEXT: v_mad_u64_u32 v[6:7], null, v1, v6, 0 ; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v8, v2 ; GFX1030-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 @@ -2798,8 +2798,8 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; GFX1030-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v7, vcc_lo ; GFX1030-NEXT: v_add_co_u32 v5, vcc_lo, v2, v6 ; GFX1030-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, 0, v3, vcc_lo -; GFX1030-NEXT: v_mad_u64_u32 v[2:3], s4, 0x186a0, v5, 0 -; GFX1030-NEXT: v_mad_u64_u32 v[3:4], s4, 0x186a0, v6, v[3:4] +; GFX1030-NEXT: v_mad_u64_u32 v[2:3], null, 0x186a0, v5, 0 +; GFX1030-NEXT: v_mad_u64_u32 v[3:4], null, 0x186a0, v6, v[3:4] ; GFX1030-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 ; GFX1030-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo ; GFX1030-NEXT: v_subrev_co_u32 v2, vcc_lo, 0x186a0, v0 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll index f7285051247cba..13f4b92f42fd08 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll @@ -476,7 +476,7 @@ define amdgpu_kernel void @livevariables_update_missed_block(i8 addrspace(1)* %s ; SI-NEXT: {{ $}} ; SI-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.src1.kernarg.offset.cast, align 4, addrspace 4) ; SI-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, killed %50, 0, implicit $exec - ; SI-NEXT: %43:vgpr_32, dead %45:sreg_32_xm0_xexec = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; SI-NEXT: %43:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; SI-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_ADD_CO_U32_e64_]], %subreg.sub0, killed %43, %subreg.sub1 ; SI-NEXT: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8) from %ir.i10, addrspace 1) ; SI-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index ec91016353a64b..6f1706ce410229 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -360,8 +360,8 @@ bb: } ; GCN-LABEL: {{^}}test_div_scale_f32: -; GFX1032: v_div_scale_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GFX1064: v_div_scale_f32 v{{[0-9]+}}, s[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX1032: v_div_scale_f32 v{{[0-9]+}}, null, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; GFX1064: v_div_scale_f32 v{{[0-9]+}}, null, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @test_div_scale_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid @@ -377,8 +377,8 @@ define amdgpu_kernel void @test_div_scale_f32(float addrspace(1)* %out, float ad } ; GCN-LABEL: {{^}}test_div_scale_f64: -; GFX1032: v_div_scale_f64 v[{{[0-9:]+}}], s{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] -; GFX1064: v_div_scale_f64 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] +; GFX1032: v_div_scale_f64 v[{{[0-9:]+}}], null, v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] +; GFX1064: v_div_scale_f64 v[{{[0-9:]+}}], null, v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}] define amdgpu_kernel void @test_div_scale_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid @@ -394,8 +394,8 @@ define amdgpu_kernel void @test_div_scale_f64(double addrspace(1)* %out, double } ; GCN-LABEL: {{^}}test_mad_i64_i32: -; GFX1032: v_mad_i64_i32 v[{{[0-9:]+}}], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}] -; GFX1064: v_mad_i64_i32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}] +; GFX1032: v_mad_i64_i32 v[{{[0-9:]+}}], null, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}] +; GFX1064: v_mad_i64_i32 v[{{[0-9:]+}}], null, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}] define i64 @test_mad_i64_i32(i32 %arg0, i32 %arg1, i64 %arg2) #0 { %sext0 = sext i32 %arg0 to i64 %sext1 = sext i32 %arg1 to i64 @@ -405,8 +405,8 @@ define i64 @test_mad_i64_i32(i32 %arg0, i32 %arg1, i64 %arg2) #0 { } ; GCN-LABEL: {{^}}test_mad_u64_u32: -; GFX1032: v_mad_u64_u32 v[{{[0-9:]+}}], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}] -; GFX1064: v_mad_u64_u32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}] +; GFX1032: v_mad_u64_u32 v[{{[0-9:]+}}], null, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}] +; GFX1064: v_mad_u64_u32 v[{{[0-9:]+}}], null, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}] define i64 @test_mad_u64_u32(i32 %arg0, i32 %arg1, i64 %arg2) #0 { %sext0 = zext i32 %arg0 to i64 %sext1 = zext i32 %arg1 to i64 @@ -478,8 +478,8 @@ exit: } ; GCN-LABEL: {{^}}fdiv_f32: -; GFX1032: v_div_scale_f32 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -; GFX1064: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} +; GFX1032: v_div_scale_f32 v{{[0-9]+}}, null, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} +; GFX1064: v_div_scale_f32 v{{[0-9]+}}, null, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} ; GCN: v_rcp_f32_e32 v{{[0-9]+}}, v{{[0-9]+}} ; GFX1032: v_div_scale_f32 v{{[0-9]+}}, vcc_lo, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} ; GFX1064: v_div_scale_f32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}