diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 23ef56afc39c9..c8d1542f2a1ad 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -366,6 +366,28 @@ enum Width : unsigned { WIDTH_DEFAULT_ = WIDTH_M1_DEFAULT_ + 1, }; +enum ModeRegisterMasks : uint32_t { + FP_ROUND_MASK = 0xf << 0, // Bits 0..3 + FP_DENORM_MASK = 0xf << 4, // Bits 4..7 + DX10_CLAMP_MASK = 1 << 8, + IEEE_MODE_MASK = 1 << 9, + LOD_CLAMP_MASK = 1 << 10, + DEBUG_MASK = 1 << 11, + + // EXCP_EN fields. + EXCP_EN_INVALID_MASK = 1 << 12, + EXCP_EN_INPUT_DENORMAL_MASK = 1 << 13, + EXCP_EN_FLOAT_DIV0_MASK = 1 << 14, + EXCP_EN_OVERFLOW_MASK = 1 << 15, + EXCP_EN_UNDERFLOW_MASK = 1 << 16, + EXCP_EN_INEXACT_MASK = 1 << 17, + EXCP_EN_INT_DIV0_MASK = 1 << 18, + + GPR_IDX_EN_MASK = 1 << 27, + VSKIP_MASK = 1 << 28, + CSP_MASK = 0x7u << 29 // Bits 29..31 +}; + } // namespace Hwreg namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32. diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 619ce1abeb813..452ff785ec064 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4119,6 +4119,75 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( } return emitGWSMemViolTestLoop(MI, BB); + case AMDGPU::S_SETREG_B32: { + if (!getSubtarget()->hasDenormModeInst()) + return BB; + + // Try to optimize cases that only set the denormal mode or rounding mode. + // + // If the s_setreg_b32 fully sets all of the bits in the rounding mode or + // denormal mode to a constant, we can use s_round_mode or s_denorm_mode + // instead. + // + // FIXME: This could be predicates on the immediate, but tablegen doesn't + // allow you to have a no side effect instruction in the output of a + // sideeffecting pattern. + + // TODO: Should also emit a no side effects pseudo if only FP bits are + // touched, even if not all of them or to a variable. + unsigned ID, Offset, Width; + AMDGPU::Hwreg::decodeHwreg(MI.getOperand(1).getImm(), ID, Offset, Width); + if (ID != AMDGPU::Hwreg::ID_MODE) + return BB; + + const unsigned WidthMask = maskTrailingOnes(Width); + const unsigned SetMask = WidthMask << Offset; + unsigned SetDenormOp = 0; + unsigned SetRoundOp = 0; + + // The dedicated instructions can only set the whole denorm or round mode at + // once, not a subset of bits in either. + if (Width == 8 && (SetMask & (AMDGPU::Hwreg::FP_ROUND_MASK | + AMDGPU::Hwreg::FP_DENORM_MASK)) == SetMask) { + // If this fully sets both the round and denorm mode, emit the two + // dedicated instructions for these. + assert(Offset == 0); + SetRoundOp = AMDGPU::S_ROUND_MODE; + SetDenormOp = AMDGPU::S_DENORM_MODE; + } else if (Width == 4) { + if ((SetMask & AMDGPU::Hwreg::FP_ROUND_MASK) == SetMask) { + SetRoundOp = AMDGPU::S_ROUND_MODE; + assert(Offset == 0); + } else if ((SetMask & AMDGPU::Hwreg::FP_DENORM_MASK) == SetMask) { + SetDenormOp = AMDGPU::S_DENORM_MODE; + assert(Offset == 4); + } + } + + if (SetRoundOp || SetDenormOp) { + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + MachineInstr *Def = MRI.getVRegDef(MI.getOperand(0).getReg()); + if (Def && Def->isMoveImmediate() && Def->getOperand(1).isImm()) { + unsigned ImmVal = Def->getOperand(1).getImm(); + if (SetRoundOp) { + BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetRoundOp)) + .addImm(ImmVal & 0xf); + + // If we also have the denorm mode, get just the denorm mode bits. + ImmVal >>= 4; + } + + if (SetDenormOp) { + BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetDenormOp)) + .addImm(ImmVal & 0xf); + } + + MI.eraseFromParent(); + } + } + + return BB; + } default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); } diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index dbafea5a1347e..774b9cf027853 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -808,6 +808,10 @@ def S_SETREG_B32 : SOPK_Pseudo < (outs), (ins SReg_32:$sdst, hwreg:$simm16), "$simm16, $sdst", [(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]> { + + // Use custom inserter to optimize some cases to + // S_DENORM_MODE/S_ROUND_MODE. + let usesCustomInserter = 1; let Defs = [MODE]; let Uses = [MODE]; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll index 72de32e5a5ff4..531495c53b5c9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll @@ -309,7 +309,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_0() { ; GFX10-LABEL: test_setreg_full_round_mode_0: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 +; GFX10-NEXT: s_round_mode 0x0 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -329,7 +329,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_1() { ; GFX10-LABEL: test_setreg_full_round_mode_1: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1 +; GFX10-NEXT: s_round_mode 0x1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -349,7 +349,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_2() { ; GFX10-LABEL: test_setreg_full_round_mode_2: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2 +; GFX10-NEXT: s_round_mode 0x2 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -369,7 +369,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_4() { ; GFX10-LABEL: test_setreg_full_round_mode_4: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4 +; GFX10-NEXT: s_round_mode 0x4 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -389,7 +389,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_8() { ; GFX10-LABEL: test_setreg_full_round_mode_8: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8 +; GFX10-NEXT: s_round_mode 0x8 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -409,7 +409,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_15() { ; GFX10-LABEL: test_setreg_full_round_mode_15: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 +; GFX10-NEXT: s_round_mode 0xf ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -430,7 +430,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_42() { ; GFX10-LABEL: test_setreg_full_round_mode_42: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 42 +; GFX10-NEXT: s_round_mode 0xa ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -450,7 +450,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_0() { ; GFX10-LABEL: test_setreg_full_denorm_mode_0: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 0 +; GFX10-NEXT: s_denorm_mode 0 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -470,7 +470,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_1() { ; GFX10-LABEL: test_setreg_full_denorm_mode_1: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 1 +; GFX10-NEXT: s_denorm_mode 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -491,7 +491,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_2() { ; GFX10-LABEL: test_setreg_full_denorm_mode_2: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 2 +; GFX10-NEXT: s_denorm_mode 2 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -511,7 +511,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_4() { ; GFX10-LABEL: test_setreg_full_denorm_mode_4: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 4 +; GFX10-NEXT: s_denorm_mode 4 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -531,7 +531,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_8() { ; GFX10-LABEL: test_setreg_full_denorm_mode_8: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 8 +; GFX10-NEXT: s_denorm_mode 8 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -551,7 +551,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_15() { ; GFX10-LABEL: test_setreg_full_denorm_mode_15: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 15 +; GFX10-NEXT: s_denorm_mode 15 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -571,7 +571,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_42() { ; GFX10-LABEL: test_setreg_full_denorm_mode_42: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 42 +; GFX10-NEXT: s_denorm_mode 10 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -591,10 +591,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_0() ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_0: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x0 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 0 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 0) call void asm sideeffect "", ""() @@ -611,10 +612,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_1() ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_1: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x1 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 0 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 1) call void asm sideeffect "", ""() @@ -631,10 +633,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_2() ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_2: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x2 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 2 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 0 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 2) call void asm sideeffect "", ""() @@ -651,10 +654,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_4() ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_4: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x4 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 4 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 0 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 4) call void asm sideeffect "", ""() @@ -671,10 +675,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_8() ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_8: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x8 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 8 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 0 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 8) call void asm sideeffect "", ""() @@ -691,10 +696,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_16() ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_16: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x0 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 16 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 1 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 16) call void asm sideeffect "", ""() @@ -711,10 +717,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_32() ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_32: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x0 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 32 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 2 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 32) call void asm sideeffect "", ""() @@ -731,10 +738,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_64() ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_64: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x0 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 64 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 4 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 64) call void asm sideeffect "", ""() @@ -751,10 +759,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_128( ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_128: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x0 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x80 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 8 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 128) call void asm sideeffect "", ""() @@ -771,10 +780,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_15() ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_15: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0xf ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 15 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 0 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 15) call void asm sideeffect "", ""() @@ -791,10 +801,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_255( ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_255: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0xf ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0xff ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 15 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 255) call void asm sideeffect "", ""() @@ -812,10 +823,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_597( ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_597: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x5 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x255 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 5 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 597) call void asm sideeffect "", ""() diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll index 934e39e5987ff..515b41d066c63 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll @@ -309,7 +309,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_0() { ; GFX10-LABEL: test_setreg_full_round_mode_0: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 +; GFX10-NEXT: s_round_mode 0x0 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -329,7 +329,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_1() { ; GFX10-LABEL: test_setreg_full_round_mode_1: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1 +; GFX10-NEXT: s_round_mode 0x1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -349,7 +349,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_2() { ; GFX10-LABEL: test_setreg_full_round_mode_2: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2 +; GFX10-NEXT: s_round_mode 0x2 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -369,7 +369,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_4() { ; GFX10-LABEL: test_setreg_full_round_mode_4: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4 +; GFX10-NEXT: s_round_mode 0x4 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -389,7 +389,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_8() { ; GFX10-LABEL: test_setreg_full_round_mode_8: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8 +; GFX10-NEXT: s_round_mode 0x8 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -409,7 +409,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_15() { ; GFX10-LABEL: test_setreg_full_round_mode_15: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 +; GFX10-NEXT: s_round_mode 0xf ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -430,7 +430,7 @@ define amdgpu_kernel void @test_setreg_full_round_mode_42() { ; GFX10-LABEL: test_setreg_full_round_mode_42: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 42 +; GFX10-NEXT: s_round_mode 0xa ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -450,7 +450,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_0() { ; GFX10-LABEL: test_setreg_full_denorm_mode_0: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 0 +; GFX10-NEXT: s_denorm_mode 0 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -470,7 +470,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_1() { ; GFX10-LABEL: test_setreg_full_denorm_mode_1: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 1 +; GFX10-NEXT: s_denorm_mode 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -491,7 +491,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_2() { ; GFX10-LABEL: test_setreg_full_denorm_mode_2: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 2 +; GFX10-NEXT: s_denorm_mode 2 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -511,7 +511,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_4() { ; GFX10-LABEL: test_setreg_full_denorm_mode_4: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 4 +; GFX10-NEXT: s_denorm_mode 4 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -531,7 +531,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_8() { ; GFX10-LABEL: test_setreg_full_denorm_mode_8: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 8 +; GFX10-NEXT: s_denorm_mode 8 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -551,7 +551,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_15() { ; GFX10-LABEL: test_setreg_full_denorm_mode_15: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 15 +; GFX10-NEXT: s_denorm_mode 15 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -571,7 +571,7 @@ define amdgpu_kernel void @test_setreg_full_denorm_mode_42() { ; GFX10-LABEL: test_setreg_full_denorm_mode_42: ; GFX10: ; %bb.0: ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 4), 42 +; GFX10-NEXT: s_denorm_mode 10 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_endpgm @@ -591,10 +591,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_0() ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_0: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x0 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 0 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 0) call void asm sideeffect "", ""() @@ -611,10 +612,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_1() ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_1: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x1 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 0 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 1) call void asm sideeffect "", ""() @@ -631,10 +633,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_2() ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_2: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x2 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 2 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 0 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 2) call void asm sideeffect "", ""() @@ -651,10 +654,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_4() ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_4: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x4 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 4 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 0 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 4) call void asm sideeffect "", ""() @@ -671,10 +675,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_8() ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_8: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x8 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 8 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 0 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 8) call void asm sideeffect "", ""() @@ -691,10 +696,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_16() ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_16: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x0 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 16 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 1 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 16) call void asm sideeffect "", ""() @@ -711,10 +717,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_32() ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_32: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x0 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 32 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 2 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 32) call void asm sideeffect "", ""() @@ -731,10 +738,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_64() ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_64: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x0 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 64 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 4 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 64) call void asm sideeffect "", ""() @@ -751,10 +759,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_128( ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_128: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x0 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x80 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 8 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 128) call void asm sideeffect "", ""() @@ -771,10 +780,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_15() ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_15: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0xf ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 15 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 0 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 15) call void asm sideeffect "", ""() @@ -791,10 +801,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_255( ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_255: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0xf ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0xff ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 15 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 255) call void asm sideeffect "", ""() @@ -812,10 +823,11 @@ define amdgpu_kernel void @test_setreg_full_both_round_mode_and_denorm_mode_597( ; ; GFX10-LABEL: test_setreg_full_both_round_mode_and_denorm_mode_597: ; GFX10: ; %bb.0: +; GFX10-NEXT: s_round_mode 0x5 ; GFX10-NEXT: ; implicit-def: $vcc_hi -; GFX10-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 8), 0x255 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: s_denorm_mode 5 ; GFX10-NEXT: s_endpgm call void @llvm.amdgcn.s.setreg(i32 14337, i32 597) call void asm sideeffect "", ""()