-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AMDGPU][True16][CodeGen] lower flat_d16_saddr_t16 to saddr inst #166603
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
fdf68d1 to
7a0bab7
Compare
|
@llvm/pr-subscribers-backend-amdgpu Author: Brox Chen (broxigarchen) ChangesFor D16 inst in true16 mode, a pseudo t16 inst is created, and is lowered to hi/lo inst in MC lowering using D16T16 table. The D16T16 table selects both The global/scratch are correct while the flat seems to be the only one with this issue Patch is 46.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166603.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 6ef224148e44b..bd357e2f38410 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -262,8 +262,16 @@ multiclass FLAT_Flat_Load_Pseudo<string opName, RegisterOperand regClass = AVLdS
multiclass FLAT_Flat_Load_Pseudo_t16<string opName> {
defm "" : FLAT_Flat_Load_Pseudo<opName, AVLdSt_32, 1>;
- let True16Predicate = UseRealTrue16Insts in
- defm _t16 : FLAT_Flat_Load_Pseudo<opName#"_t16", VGPROp_16>, True16D16Table<NAME#"_HI", NAME>;
+
+ defvar Name16 = opName#"_t16";
+ let OtherPredicates = [HasFlatGVSMode, HasTrue16BitInsts] in {
+ def _t16 : FLAT_Load_Pseudo<Name16, VGPROp_16>,
+ GlobalSaddrTable<0, Name16>,
+ True16D16Table<NAME#"_HI", NAME>;
+ def _t16_SADDR : FLAT_Load_Pseudo<Name16, VGPROp_16, 0, 1, 1>,
+ GlobalSaddrTable<1, Name16>,
+ True16D16Table<NAME#"_HI_SADDR", NAME#"_SADDR">;
+ }
}
class FLAT_Store_Pseudo <string opName, RegisterOperand vdataClass,
diff --git a/llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll b/llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll
index 31344c78990b8..5d8037a34757e 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll
@@ -2,6 +2,9 @@
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -mattr=+real-true16,-sramecc < %s | FileCheck -check-prefixes=GFX1250,GFX1250-NOECC,GFX1250-NOECC-SDAG-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 -mattr=-real-true16,-sramecc < %s | FileCheck -check-prefixes=GFX1250,GFX1250-NOECC,GFX1250-NOECC-SDAG-FAKE16 %s
+
; Test using saddr addressing mode of flat_*load_* instructions.
; --------------------------------------------------------------------------------
@@ -87,6 +90,15 @@ define amdgpu_ps float @flat_load_saddr_i8_offset_neg8388609(ptr inreg %sbase) {
; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-LABEL: flat_load_saddr_i8_offset_neg8388609:
+; GFX1250-NOECC: ; %bb.0:
+; GFX1250-NOECC-NEXT: v_add_co_u32 v0, s0, 0xff800000, s2
+; GFX1250-NOECC-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NOECC-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s0
+; GFX1250-NOECC-NEXT: flat_load_u8 v0, v[0:1] offset:-1
+; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-NEXT: ; return to shader part epilog
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 -8388609
%load = load i8, ptr %gep0
%zext = zext i8 %load to i32
@@ -113,6 +125,15 @@ define amdgpu_ps float @flat_load_saddr_i8_offset_0xFFFFFFFF(ptr inreg %sbase) {
; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-LABEL: flat_load_saddr_i8_offset_0xFFFFFFFF:
+; GFX1250-NOECC: ; %bb.0:
+; GFX1250-NOECC-NEXT: v_add_co_u32 v0, s0, 0xff800000, s2
+; GFX1250-NOECC-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NOECC-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s3, s0
+; GFX1250-NOECC-NEXT: flat_load_u8 v0, v[0:1] offset:8388607
+; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-NEXT: ; return to shader part epilog
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 4294967295
%load = load i8, ptr %gep0
%zext = zext i8 %load to i32
@@ -138,6 +159,14 @@ define amdgpu_ps float @flat_load_saddr_i8_offset_0x100000000(ptr inreg %sbase)
; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-LABEL: flat_load_saddr_i8_offset_0x100000000:
+; GFX1250-NOECC: ; %bb.0:
+; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NOECC-NEXT: s_add_co_i32 s3, s3, 1
+; GFX1250-NOECC-NEXT: flat_load_u8 v0, v0, s[2:3]
+; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-NEXT: ; return to shader part epilog
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 4294967296
%load = load i8, ptr %gep0
%zext = zext i8 %load to i32
@@ -164,6 +193,15 @@ define amdgpu_ps float @flat_load_saddr_i8_offset_0x100000001(ptr inreg %sbase)
; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-LABEL: flat_load_saddr_i8_offset_0x100000001:
+; GFX1250-NOECC: ; %bb.0:
+; GFX1250-NOECC-NEXT: v_add_co_u32 v0, s0, 0, s2
+; GFX1250-NOECC-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NOECC-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s3, s0
+; GFX1250-NOECC-NEXT: flat_load_u8 v0, v[0:1] offset:1
+; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-NEXT: ; return to shader part epilog
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 4294967297
%load = load i8, ptr %gep0
%zext = zext i8 %load to i32
@@ -190,6 +228,15 @@ define amdgpu_ps float @flat_load_saddr_i8_offset_0x100000FFF(ptr inreg %sbase)
; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-LABEL: flat_load_saddr_i8_offset_0x100000FFF:
+; GFX1250-NOECC: ; %bb.0:
+; GFX1250-NOECC-NEXT: v_add_co_u32 v0, s0, 0, s2
+; GFX1250-NOECC-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NOECC-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s3, s0
+; GFX1250-NOECC-NEXT: flat_load_u8 v0, v[0:1] offset:4095
+; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-NEXT: ; return to shader part epilog
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 4294971391
%load = load i8, ptr %gep0
%zext = zext i8 %load to i32
@@ -216,6 +263,15 @@ define amdgpu_ps float @flat_load_saddr_i8_offset_0x100001000(ptr inreg %sbase)
; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-LABEL: flat_load_saddr_i8_offset_0x100001000:
+; GFX1250-NOECC: ; %bb.0:
+; GFX1250-NOECC-NEXT: v_add_co_u32 v0, s0, 0, s2
+; GFX1250-NOECC-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NOECC-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s3, s0
+; GFX1250-NOECC-NEXT: flat_load_u8 v0, v[0:1] offset:4096
+; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-NEXT: ; return to shader part epilog
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 4294971392
%load = load i8, ptr %gep0
%zext = zext i8 %load to i32
@@ -242,6 +298,15 @@ define amdgpu_ps float @flat_load_saddr_i8_offset_neg0xFFFFFFFF(ptr inreg %sbase
; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-LABEL: flat_load_saddr_i8_offset_neg0xFFFFFFFF:
+; GFX1250-NOECC: ; %bb.0:
+; GFX1250-NOECC-NEXT: v_add_co_u32 v0, s0, 0x800000, s2
+; GFX1250-NOECC-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NOECC-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s0
+; GFX1250-NOECC-NEXT: flat_load_u8 v0, v[0:1] offset:-8388607
+; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-NEXT: ; return to shader part epilog
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 -4294967295
%load = load i8, ptr %gep0
%zext = zext i8 %load to i32
@@ -267,6 +332,14 @@ define amdgpu_ps float @flat_load_saddr_i8_offset_neg0x100000000(ptr inreg %sbas
; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-LABEL: flat_load_saddr_i8_offset_neg0x100000000:
+; GFX1250-NOECC: ; %bb.0:
+; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NOECC-NEXT: s_add_co_i32 s3, s3, -1
+; GFX1250-NOECC-NEXT: flat_load_u8 v0, v0, s[2:3]
+; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-NEXT: ; return to shader part epilog
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 -4294967296
%load = load i8, ptr %gep0
%zext = zext i8 %load to i32
@@ -293,6 +366,15 @@ define amdgpu_ps float @flat_load_saddr_i8_offset_neg0x100000001(ptr inreg %sbas
; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-LABEL: flat_load_saddr_i8_offset_neg0x100000001:
+; GFX1250-NOECC: ; %bb.0:
+; GFX1250-NOECC-NEXT: v_add_co_u32 v0, s0, 0, s2
+; GFX1250-NOECC-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NOECC-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s0
+; GFX1250-NOECC-NEXT: flat_load_u8 v0, v[0:1] offset:-1
+; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-NEXT: ; return to shader part epilog
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 -4294967297
%load = load i8, ptr %gep0
%zext = zext i8 %load to i32
@@ -361,6 +443,18 @@ define amdgpu_ps float @flat_load_saddr_i8_zext_vgpr_offset_8388608(ptr inreg %s
; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-LABEL: flat_load_saddr_i8_zext_vgpr_offset_8388608:
+; GFX1250-NOECC: ; %bb.0:
+; GFX1250-NOECC-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NOECC-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NOECC-NEXT: v_add_nc_u64_e32 v[0:1], s[2:3], v[0:1]
+; GFX1250-NOECC-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0
+; GFX1250-NOECC-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NOECC-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX1250-NOECC-NEXT: flat_load_u8 v0, v[0:1]
+; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 8388608
@@ -517,6 +611,17 @@ define amdgpu_ps float @flat_load_saddr_uniform_ptr_in_vgprs(i32 %voffset) {
; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-LABEL: flat_load_saddr_uniform_ptr_in_vgprs:
+; GFX1250-NOECC: ; %bb.0:
+; GFX1250-NOECC-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NOECC-NEXT: ds_load_b64 v[2:3], v1
+; GFX1250-NOECC-NEXT: s_wait_dscnt 0x0
+; GFX1250-NOECC-NEXT: v_readfirstlane_b32 s0, v2
+; GFX1250-NOECC-NEXT: v_readfirstlane_b32 s1, v3
+; GFX1250-NOECC-NEXT: flat_load_u8 v0, v0, s[0:1]
+; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-NEXT: ; return to shader part epilog
%sbase = load ptr, ptr addrspace(3) @ptr.in.lds
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -550,6 +655,17 @@ define amdgpu_ps float @flat_load_saddr_uniform_ptr_in_vgprs_immoffset(i32 %voff
; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:42
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-LABEL: flat_load_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX1250-NOECC: ; %bb.0:
+; GFX1250-NOECC-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NOECC-NEXT: ds_load_b64 v[2:3], v1
+; GFX1250-NOECC-NEXT: s_wait_dscnt 0x0
+; GFX1250-NOECC-NEXT: v_readfirstlane_b32 s0, v2
+; GFX1250-NOECC-NEXT: v_readfirstlane_b32 s1, v3
+; GFX1250-NOECC-NEXT: flat_load_u8 v0, v0, s[0:1] offset:42
+; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-NEXT: ; return to shader part epilog
%sbase = load ptr, ptr addrspace(3) @ptr.in.lds
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -577,6 +693,13 @@ define amdgpu_ps float @flat_load_saddr_i8_zext_uniform_offset(ptr inreg %sbase,
; GFX1250-GISEL-NEXT: flat_load_u8 v0, v0, s[0:1]
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-LABEL: flat_load_saddr_i8_zext_uniform_offset:
+; GFX1250-NOECC: ; %bb.0:
+; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, s4
+; GFX1250-NOECC-NEXT: flat_load_u8 v0, v0, s[2:3]
+; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %soffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%load = load i8, ptr %gep0
@@ -602,6 +725,13 @@ define amdgpu_ps float @flat_load_saddr_i8_zext_uniform_offset_immoffset(ptr inr
; GFX1250-GISEL-NEXT: flat_load_u8 v0, v0, s[0:1] offset:-24
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-LABEL: flat_load_saddr_i8_zext_uniform_offset_immoffset:
+; GFX1250-NOECC: ; %bb.0:
+; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, s4
+; GFX1250-NOECC-NEXT: flat_load_u8 v0, v0, s[2:3] offset:-24
+; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %soffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 -24
@@ -628,6 +758,13 @@ define amdgpu_ps float @flat_load_saddr_i8_zext_sgpr_ptrtoint_commute_add(ptr in
; GFX1250-GISEL-NEXT: flat_load_u8 v0, v0, s[0:1]
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-LABEL: flat_load_saddr_i8_zext_sgpr_ptrtoint_commute_add:
+; GFX1250-NOECC: ; %bb.0:
+; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, s4
+; GFX1250-NOECC-NEXT: flat_load_u8 v0, v0, s[2:3]
+; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %soffset to i64
%sbase.as.int = ptrtoint ptr %sbase to i64
%add = add i64 %zext.offset, %sbase.as.int
@@ -655,6 +792,13 @@ define amdgpu_ps float @flat_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_of
; GFX1250-GISEL-NEXT: flat_load_u8 v0, v0, s[0:1] offset:128
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-LABEL: flat_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0:
+; GFX1250-NOECC: ; %bb.0:
+; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, s4
+; GFX1250-NOECC-NEXT: flat_load_u8 v0, v0, s[2:3] offset:128
+; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %soffset to i64
%sbase.as.int = ptrtoint ptr %sbase to i64
%add = add i64 %zext.offset, %sbase.as.int
@@ -688,6 +832,15 @@ define amdgpu_ps float @flat_load_i8_vgpr64_sgpr32(ptr %vbase, i32 inreg %soffse
; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1]
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-LABEL: flat_load_i8_vgpr64_sgpr32:
+; GFX1250-NOECC: ; %bb.0:
+; GFX1250-NOECC-NEXT: s_mov_b32 s3, 0
+; GFX1250-NOECC-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NOECC-NEXT: v_add_nc_u64_e32 v[0:1], s[2:3], v[0:1]
+; GFX1250-NOECC-NEXT: flat_load_u8 v0, v[0:1]
+; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %soffset to i64
%gep0 = getelementptr inbounds i8, ptr %vbase, i64 %zext.offset
%load = load i8, ptr %gep0
@@ -718,6 +871,15 @@ define amdgpu_ps float @flat_load_i8_vgpr64_sgpr32_offset_8388607(ptr %vbase, i3
; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:8388607
; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-LABEL: flat_load_i8_vgpr64_sgpr32_offset_8388607:
+; GFX1250-NOECC: ; %bb.0:
+; GFX1250-NOECC-NEXT: s_mov_b32 s3, 0
+; GFX1250-NOECC-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NOECC-NEXT: v_add_nc_u64_e32 v[0:1], s[2:3], v[0:1]
+; GFX1250-NOECC-NEXT: flat_load_u8 v0, v[0:1] offset:8388607
+; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %soffset to i64
%gep0 = getelementptr inbounds i8, ptr %vbase, i64 %zext.offset
%gep1 = getelementptr inbounds i8, ptr %gep0, i64 8388607
@@ -818,11 +980,29 @@ define amdgpu_ps float @flat_load_f32_saddr_zext_vgpr_range_too_large(ptr inreg
; --------------------------------------------------------------------------------
define amdgpu_ps half @flat_load_saddr_i16(ptr inreg %sbase, i32 %voffset) {
-; GFX1250-LABEL: flat_load_saddr_i16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3]
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: ; return to shader part epilog
+; GFX1250-SDAG-LABEL: flat_load_saddr_i16:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: flat_load_u16 v0, v0, s[2:3]
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-LABEL: flat_load_saddr_i16:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: flat_load_u16 v0, v0, s[2:3]
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-SDAG-TRUE16-LABEL: flat_load_saddr_i16:
+; GFX1250-NOECC-SDAG-TRUE16: ; %bb.0:
+; GFX1250-NOECC-SDAG-TRUE16-NEXT: flat_load_d16_b16 v0, v0, s[2:3]
+; GFX1250-NOECC-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-SDAG-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-SDAG-FAKE16-LABEL: flat_load_saddr_i16:
+; GFX1250-NOECC-SDAG-FAKE16: ; %bb.0:
+; GFX1250-NOECC-SDAG-FAKE16-NEXT: flat_load_u16 v0, v0, s[2:3]
+; GFX1250-NOECC-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-SDAG-FAKE16-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%load = load i16, ptr %gep0
@@ -831,11 +1011,29 @@ define amdgpu_ps half @flat_load_saddr_i16(ptr inreg %sbase, i32 %voffset) {
}
define amdgpu_ps half @flat_load_saddr_i16_immneg128(ptr inreg %sbase, i32 %voffset) {
-; GFX1250-LABEL: flat_load_saddr_i16_immneg128:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: ; return to shader part epilog
+; GFX1250-SDAG-LABEL: flat_load_saddr_i16_immneg128:
+; GFX1250-SDAG: ; %bb.0:
+; GFX1250-SDAG-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128
+; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-SDAG-NEXT: ; return to shader part epilog
+;
+; GFX1250-GISEL-LABEL: flat_load_saddr_i16_immneg128:
+; GFX1250-GISEL: ; %bb.0:
+; GFX1250-GISEL-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128
+; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-GISEL-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-SDAG-TRUE16-LABEL: flat_load_saddr_i16_immneg128:
+; GFX1250-NOECC-SDAG-TRUE16: ; %bb.0:
+; GFX1250-NOECC-SDAG-TRUE16-NEXT: flat_load_d16_b16 v0, v0, s[2:3] offset:-128
+; GFX1250-NOECC-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-SDAG-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1250-NOECC-SDAG-FAKE16-LABEL: flat_load_saddr_i16_immneg128:
+; GFX1250-NOECC-SDAG-FAKE16: ; %bb.0:
+; GFX1250-NOECC-SDAG-FAKE16-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128
+; GFX1250-NOECC-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NOECC-SDAG-FAKE16-NEXT: ; return to shader part epilog
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
%gep1 = g...
[truncated]
|
Sisyph
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is it only affecting gfx1250? I would expect we could do the same on gfx11 or gfx12.
We do the same on gfx1100/gfx1200, but we never select to the saddr inst and thus never hit this there since the saddr inst requires FlatGVSMode |
|
|
||
| defvar Name16 = opName#"_t16"; | ||
| let OtherPredicates = [HasFlatGVSMode, HasTrue16BitInsts] in { | ||
| def _t16 : FLAT_Load_Pseudo<Name16, VGPROp_16>, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should not have HasFlatGVSMode, only SADDR form? Just like above in the FLAT_Flat_Load_Pseudo.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agree
7a0bab7 to
d33b64c
Compare
rampitec
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
In true16 mode, D16 insts are lowered to a pseudo t16 first, and then lowered to hi/lo inst in MC lowering using D16T16 table.
However, the D16T16 table selects both
flat_load_d16_t16 / flat_load_d16_t16_saddrtoflat_load_d16_(hi)_b16which is wrong. saddr pseudo instflat_load_d16_t16_saddrshould be selected to saddr hi/lo instThe global/scratch are correct while the flat seems to be the only one with this issue.