-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Sign extend simm16 in setreg intrinsic #77997
Conversation
We currently force users to use a negative contant in the intrinsic call. Changing it zext would break existing programs, so just sign extend an argument.
@llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) ChangesWe currently force users to use a negative contant in the intrinsic call. Changing it zext would break existing programs, so just sign extend an argument. Full diff: https://github.com/llvm/llvm-project/pull/77997.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 46fa3d57a21cb2..5b35d4dcac2e4f 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1117,14 +1117,12 @@ def S_GETREG_B32 : SOPK_Pseudo <
let Defs = [MODE], Uses = [MODE] in {
// FIXME: Need to truncate immediate to 16-bits.
-class S_SETREG_B32_Pseudo <list<dag> pattern=[]> : SOPK_Pseudo <
+class S_SETREG_B32_Pseudo : SOPK_Pseudo <
"s_setreg_b32",
(outs), (ins SReg_32:$sdst, hwreg:$simm16),
- "$simm16, $sdst",
- pattern>;
+ "$simm16, $sdst">;
-def S_SETREG_B32 : S_SETREG_B32_Pseudo <
- [(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]> {
+def S_SETREG_B32 : S_SETREG_B32_Pseudo {
// Use custom inserter to optimize some cases to
// S_DENORM_MODE/S_ROUND_MODE/S_SETREG_B32_mode.
let usesCustomInserter = 1;
@@ -1160,6 +1158,9 @@ def S_SETREG_IMM32_B32_mode : S_SETREG_IMM32_B32_Pseudo {
} // End Defs = [MODE], Uses = [MODE]
+def : GCNPat<(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst),
+ (S_SETREG_B32 $sdst, (as_i16timm $simm16))>;
+
class SOPK_WAITCNT<string opName, list<dag> pat=[]> :
SOPK_Pseudo<
opName,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
index d2c14f2401fc35..99d80b5dd14b33 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
@@ -1433,6 +1433,72 @@ define amdgpu_kernel void @test_setreg_set_4_bits_straddles_round_and_denorm() {
ret void
}
+define amdgpu_ps void @test_63489(i32 inreg %var.mode) {
+; GFX6-LABEL: test_63489:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x80,0xb9]
+; GFX6-NEXT: ;;#ASMSTART
+; GFX6-NEXT: ;;#ASMEND
+; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX789-LABEL: test_63489:
+; GFX789: ; %bb.0:
+; GFX789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x00,0xb9]
+; GFX789-NEXT: ;;#ASMSTART
+; GFX789-NEXT: ;;#ASMEND
+; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: test_63489:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x80,0xb9]
+; GFX10-NEXT: ;;#ASMSTART
+; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX11-LABEL: test_63489:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x00,0xb9]
+; GFX11-NEXT: ;;#ASMSTART
+; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
+ call void @llvm.amdgcn.s.setreg(i32 63489, i32 %var.mode)
+ call void asm sideeffect "", ""()
+ ret void
+}
+
+define amdgpu_ps void @test_minus_2047(i32 inreg %var.mode) {
+; GFX6-LABEL: test_minus_2047:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x80,0xb9]
+; GFX6-NEXT: ;;#ASMSTART
+; GFX6-NEXT: ;;#ASMEND
+; GFX6-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX789-LABEL: test_minus_2047:
+; GFX789: ; %bb.0:
+; GFX789-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x00,0xb9]
+; GFX789-NEXT: ;;#ASMSTART
+; GFX789-NEXT: ;;#ASMEND
+; GFX789-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: test_minus_2047:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x80,0xb9]
+; GFX10-NEXT: ;;#ASMSTART
+; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX11-LABEL: test_minus_2047:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE), s0 ; encoding: [0x01,0xf8,0x00,0xb9]
+; GFX11-NEXT: ;;#ASMSTART
+; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
+ call void @llvm.amdgcn.s.setreg(i32 -2047, i32 %var.mode)
+ call void asm sideeffect "", ""()
+ ret void
+}
+
; FIXME: Broken for DAG
; define void @test_setreg_roundingmode_var_vgpr(i32 %var.mode) {
; call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
|
This broke UBSan bots
|
Reverts #77997 Broke UBSan bots.
Thanks. The culprit is not the patch itself, but the added test has uncovered pre-existing problem:
The test runs this part of code with Width == 32 which overflows unsigned int. |
|
We currently force users to use a negative contant in the intrinsic call. Changing it zext would break existing programs, so just sign extend an argument.
Reverts llvm#77997 Broke UBSan bots.
We currently force users to use a negative contant in the intrinsic call. Changing it zext would break existing programs, so just sign extend an argument.