diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 8ea64d17417f7..4b79ab3c3c1ba 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -262,8 +262,18 @@ multiclass FLAT_Flat_Load_Pseudo { defm "" : FLAT_Flat_Load_Pseudo; - let True16Predicate = UseRealTrue16Insts in - defm _t16 : FLAT_Flat_Load_Pseudo, True16D16Table; + + defvar Name16 = opName#"_t16"; + let True16Predicate = UseRealTrue16Insts in { + def _t16 : FLAT_Load_Pseudo, + GlobalSaddrTable<0, Name16>, + True16D16Table; + + let OtherPredicates = [HasFlatGVSMode] in + def _t16_SADDR : FLAT_Load_Pseudo, + GlobalSaddrTable<1, Name16>, + True16D16Table; + } } class FLAT_Store_Pseudo @atomic_flat_load_saddr_i64_immneg128(ptr inreg %sb ; -------------------------------------------------------------------------------- define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_undef_hi(ptr inreg %sbase, i32 %voffset) { -; GFX1250-LABEL: flat_load_saddr_i16_d16lo_undef_hi: -; GFX1250: ; %bb.0: -; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3] -; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: ; return to shader part epilog +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16lo_undef_hi: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_u16 v0, v0, s[2:3] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16lo_undef_hi: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_u16 v0, v0, s[2:3] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-LABEL: flat_load_saddr_i16_d16lo_undef_hi: +; GFX1250-NOECC: ; %bb.0: +; GFX1250-NOECC-NEXT: flat_load_d16_b16 v0, v0, s[2:3] +; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load i16, ptr %gep0 @@ -1637,11 +1883,23 @@ define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_undef_hi(ptr inreg %sbase } define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_undef_hi_immneg128(ptr inreg %sbase, i32 %voffset) { -; GFX1250-LABEL: flat_load_saddr_i16_d16lo_undef_hi_immneg128: -; GFX1250: ; %bb.0: -; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 -; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: ; return to shader part epilog +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16lo_undef_hi_immneg128: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16lo_undef_hi_immneg128: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-LABEL: flat_load_saddr_i16_d16lo_undef_hi_immneg128: +; GFX1250-NOECC: ; %bb.0: +; GFX1250-NOECC-NEXT: flat_load_d16_b16 v0, v0, s[2:3] offset:-128 +; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1652,12 +1910,27 @@ define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_undef_hi_immneg128(ptr in } define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_zero_hi(ptr inreg %sbase, i32 %voffset) { -; GFX1250-LABEL: flat_load_saddr_i16_d16lo_zero_hi: -; GFX1250: ; %bb.0: -; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3] -; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX1250-NEXT: ; return to shader part epilog +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16lo_zero_hi: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_u16 v0, v0, s[2:3] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16lo_zero_hi: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_u16 v0, v0, s[2:3] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-LABEL: flat_load_saddr_i16_d16lo_zero_hi: +; GFX1250-NOECC: ; %bb.0: +; GFX1250-NOECC-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-NOECC-NEXT: flat_load_d16_b16 v1, v0, s[2:3] +; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, v1 +; GFX1250-NOECC-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load i16, ptr %gep0 @@ -1667,12 +1940,27 @@ define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_zero_hi(ptr inreg %sbase, } define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_zero_hi_immneg128(ptr inreg %sbase, i32 %voffset) { -; GFX1250-LABEL: flat_load_saddr_i16_d16lo_zero_hi_immneg128: -; GFX1250: ; %bb.0: -; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 -; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX1250-NEXT: ; return to shader part epilog +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16lo_zero_hi_immneg128: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16lo_zero_hi_immneg128: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-LABEL: flat_load_saddr_i16_d16lo_zero_hi_immneg128: +; GFX1250-NOECC: ; %bb.0: +; GFX1250-NOECC-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-NOECC-NEXT: flat_load_d16_b16 v1, v0, s[2:3] offset:-128 +; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, v1 +; GFX1250-NOECC-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1696,6 +1984,13 @@ define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_reg_hi(ptr inreg %sbase, ; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff0000, v1, v0 ; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-LABEL: flat_load_saddr_i16_d16lo_reg_hi: +; GFX1250-NOECC: ; %bb.0: +; GFX1250-NOECC-NEXT: flat_load_d16_b16 v1, v0, s[2:3] +; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, v1 +; GFX1250-NOECC-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load i16, ptr %gep0 @@ -1718,6 +2013,13 @@ define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_reg_hi_immneg128(ptr inre ; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff0000, v1, v0 ; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-LABEL: flat_load_saddr_i16_d16lo_reg_hi_immneg128: +; GFX1250-NOECC: ; %bb.0: +; GFX1250-NOECC-NEXT: flat_load_d16_b16 v1, v0, s[2:3] offset:-128 +; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, v1 +; GFX1250-NOECC-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1741,6 +2043,13 @@ define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_zexti8_reg_hi(ptr inreg % ; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff0000, v1, v0 ; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-LABEL: flat_load_saddr_i16_d16lo_zexti8_reg_hi: +; GFX1250-NOECC: ; %bb.0: +; GFX1250-NOECC-NEXT: flat_load_d16_u8 v1, v0, s[2:3] +; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, v1 +; GFX1250-NOECC-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load i8, ptr %gep0 @@ -1764,6 +2073,13 @@ define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_zexti8_reg_hi_immneg128(p ; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff0000, v1, v0 ; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-LABEL: flat_load_saddr_i16_d16lo_zexti8_reg_hi_immneg128: +; GFX1250-NOECC: ; %bb.0: +; GFX1250-NOECC-NEXT: flat_load_d16_u8 v1, v0, s[2:3] offset:-128 +; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, v1 +; GFX1250-NOECC-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1790,6 +2106,13 @@ define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_sexti8_reg_hi(ptr inreg % ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff0000, v1, v0 ; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-LABEL: flat_load_saddr_i16_d16lo_sexti8_reg_hi: +; GFX1250-NOECC: ; %bb.0: +; GFX1250-NOECC-NEXT: flat_load_d16_i8 v1, v0, s[2:3] +; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, v1 +; GFX1250-NOECC-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load i8, ptr %gep0 @@ -1815,6 +2138,13 @@ define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_sexti8_reg_hi_immneg128(p ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff0000, v1, v0 ; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-LABEL: flat_load_saddr_i16_d16lo_sexti8_reg_hi_immneg128: +; GFX1250-NOECC: ; %bb.0: +; GFX1250-NOECC-NEXT: flat_load_d16_i8 v1, v0, s[2:3] offset:-128 +; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, v1 +; GFX1250-NOECC-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1830,12 +2160,25 @@ define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_sexti8_reg_hi_immneg128(p ; -------------------------------------------------------------------------------- define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_undef_hi(ptr inreg %sbase, i32 %voffset) { -; GFX1250-LABEL: flat_load_saddr_i16_d16hi_undef_hi: -; GFX1250: ; %bb.0: -; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3] -; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX1250-NEXT: ; return to shader part epilog +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16hi_undef_hi: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_u16 v0, v0, s[2:3] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16hi_undef_hi: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_u16 v0, v0, s[2:3] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-LABEL: flat_load_saddr_i16_d16hi_undef_hi: +; GFX1250-NOECC: ; %bb.0: +; GFX1250-NOECC-NEXT: flat_load_d16_hi_b16 v0, v0, s[2:3] +; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load i16, ptr %gep0 @@ -1845,12 +2188,25 @@ define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_undef_hi(ptr inreg %sbase } define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_undef_hi_immneg128(ptr inreg %sbase, i32 %voffset) { -; GFX1250-LABEL: flat_load_saddr_i16_d16hi_undef_hi_immneg128: -; GFX1250: ; %bb.0: -; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 -; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX1250-NEXT: ; return to shader part epilog +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16hi_undef_hi_immneg128: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16hi_undef_hi_immneg128: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-LABEL: flat_load_saddr_i16_d16hi_undef_hi_immneg128: +; GFX1250-NOECC: ; %bb.0: +; GFX1250-NOECC-NEXT: flat_load_d16_hi_b16 v0, v0, s[2:3] offset:-128 +; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1874,6 +2230,22 @@ define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_zero_hi(ptr inreg %sbase, ; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-SDAG-TRUE16-LABEL: flat_load_saddr_i16_d16hi_zero_hi: +; GFX1250-NOECC-SDAG-TRUE16: ; %bb.0: +; GFX1250-NOECC-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0 +; GFX1250-NOECC-SDAG-TRUE16-NEXT: flat_load_d16_hi_b16 v1, v0, s[2:3] +; GFX1250-NOECC-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, v1 +; GFX1250-NOECC-SDAG-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-SDAG-FAKE16-LABEL: flat_load_saddr_i16_d16hi_zero_hi: +; GFX1250-NOECC-SDAG-FAKE16: ; %bb.0: +; GFX1250-NOECC-SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-NOECC-SDAG-FAKE16-NEXT: flat_load_d16_hi_b16 v1, v0, s[2:3] +; GFX1250-NOECC-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, v1 +; GFX1250-NOECC-SDAG-FAKE16-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load i16, ptr %gep0 @@ -1896,6 +2268,22 @@ define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_zero_hi_immneg128(ptr inr ; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-SDAG-TRUE16-LABEL: flat_load_saddr_i16_d16hi_zero_hi_immneg128: +; GFX1250-NOECC-SDAG-TRUE16: ; %bb.0: +; GFX1250-NOECC-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0 +; GFX1250-NOECC-SDAG-TRUE16-NEXT: flat_load_d16_hi_b16 v1, v0, s[2:3] offset:-128 +; GFX1250-NOECC-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, v1 +; GFX1250-NOECC-SDAG-TRUE16-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-SDAG-FAKE16-LABEL: flat_load_saddr_i16_d16hi_zero_hi_immneg128: +; GFX1250-NOECC-SDAG-FAKE16: ; %bb.0: +; GFX1250-NOECC-SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-NOECC-SDAG-FAKE16-NEXT: flat_load_d16_hi_b16 v1, v0, s[2:3] offset:-128 +; GFX1250-NOECC-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, v1 +; GFX1250-NOECC-SDAG-FAKE16-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1921,6 +2309,13 @@ define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_reg_hi(ptr inreg %sbase, ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff, v1, v0 ; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-LABEL: flat_load_saddr_i16_d16hi_reg_hi: +; GFX1250-NOECC: ; %bb.0: +; GFX1250-NOECC-NEXT: flat_load_d16_hi_b16 v1, v0, s[2:3] +; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, v1 +; GFX1250-NOECC-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load i16, ptr %gep0 @@ -1945,6 +2340,13 @@ define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_reg_hi_immneg128(ptr inre ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff, v1, v0 ; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-LABEL: flat_load_saddr_i16_d16hi_reg_hi_immneg128: +; GFX1250-NOECC: ; %bb.0: +; GFX1250-NOECC-NEXT: flat_load_d16_hi_b16 v1, v0, s[2:3] offset:-128 +; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, v1 +; GFX1250-NOECC-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1970,6 +2372,13 @@ define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_zexti8_reg_hi(ptr inreg % ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff, v1, v0 ; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-LABEL: flat_load_saddr_i16_d16hi_zexti8_reg_hi: +; GFX1250-NOECC: ; %bb.0: +; GFX1250-NOECC-NEXT: flat_load_d16_hi_u8 v1, v0, s[2:3] +; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, v1 +; GFX1250-NOECC-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load i8, ptr %gep0 @@ -1995,6 +2404,13 @@ define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_zexti8_reg_hi_immneg128(p ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff, v1, v0 ; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-LABEL: flat_load_saddr_i16_d16hi_zexti8_reg_hi_immneg128: +; GFX1250-NOECC: ; %bb.0: +; GFX1250-NOECC-NEXT: flat_load_d16_hi_u8 v1, v0, s[2:3] offset:-128 +; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, v1 +; GFX1250-NOECC-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -2022,6 +2438,13 @@ define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_sexti8_reg_hi(ptr inreg % ; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff, v1, v0 ; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-LABEL: flat_load_saddr_i16_d16hi_sexti8_reg_hi: +; GFX1250-NOECC: ; %bb.0: +; GFX1250-NOECC-NEXT: flat_load_d16_hi_i8 v1, v0, s[2:3] +; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, v1 +; GFX1250-NOECC-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load i8, ptr %gep0 @@ -2048,6 +2471,13 @@ define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_sexti8_reg_hi_immneg128(p ; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff, v1, v0 ; GFX1250-GISEL-NEXT: ; return to shader part epilog +; +; GFX1250-NOECC-LABEL: flat_load_saddr_i16_d16hi_sexti8_reg_hi_immneg128: +; GFX1250-NOECC: ; %bb.0: +; GFX1250-NOECC-NEXT: flat_load_d16_hi_i8 v1, v0, s[2:3] offset:-128 +; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, v1 +; GFX1250-NOECC-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -2136,6 +2566,23 @@ define amdgpu_ps void @flat_addr_64bit_lsr_iv(ptr inreg %arg) { ; GFX1250-GISEL-NEXT: s_cbranch_vccz .LBB116_1 ; GFX1250-GISEL-NEXT: ; %bb.2: ; %bb2 ; GFX1250-GISEL-NEXT: s_endpgm +; +; GFX1250-NOECC-LABEL: flat_addr_64bit_lsr_iv: +; GFX1250-NOECC: ; %bb.0: ; %bb +; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NOECC-NEXT: s_mov_b64 s[0:1], 0 +; GFX1250-NOECC-NEXT: .LBB116_1: ; %bb3 +; GFX1250-NOECC-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1250-NOECC-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NOECC-NEXT: s_add_nc_u64 s[4:5], s[2:3], s[0:1] +; GFX1250-NOECC-NEXT: s_add_nc_u64 s[0:1], s[0:1], 4 +; GFX1250-NOECC-NEXT: s_wait_dscnt 0x0 +; GFX1250-NOECC-NEXT: flat_load_b32 v1, v0, s[4:5] scope:SCOPE_SYS +; GFX1250-NOECC-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NOECC-NEXT: s_cmp_eq_u32 s0, 0x400 +; GFX1250-NOECC-NEXT: s_cbranch_scc0 .LBB116_1 +; GFX1250-NOECC-NEXT: ; %bb.2: ; %bb2 +; GFX1250-NOECC-NEXT: s_endpgm bb: br label %bb3 @@ -2196,6 +2643,26 @@ define amdgpu_ps void @flat_addr_64bit_lsr_iv_multiload(ptr inreg %arg, ptr inre ; GFX1250-GISEL-NEXT: s_cbranch_vccz .LBB117_1 ; GFX1250-GISEL-NEXT: ; %bb.2: ; %bb2 ; GFX1250-GISEL-NEXT: s_endpgm +; +; GFX1250-NOECC-LABEL: flat_addr_64bit_lsr_iv_multiload: +; GFX1250-NOECC: ; %bb.0: ; %bb +; GFX1250-NOECC-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NOECC-NEXT: s_mov_b64 s[0:1], 0 +; GFX1250-NOECC-NEXT: .LBB117_1: ; %bb3 +; GFX1250-NOECC-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1250-NOECC-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NOECC-NEXT: s_add_nc_u64 s[4:5], s[2:3], s[0:1] +; GFX1250-NOECC-NEXT: s_add_nc_u64 s[0:1], s[0:1], 4 +; GFX1250-NOECC-NEXT: s_wait_dscnt 0x0 +; GFX1250-NOECC-NEXT: flat_load_b32 v1, v0, s[4:5] scope:SCOPE_SYS +; GFX1250-NOECC-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NOECC-NEXT: flat_load_b32 v1, v0, s[4:5] scope:SCOPE_SYS +; GFX1250-NOECC-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NOECC-NEXT: s_cmp_eq_u32 s0, 0x400 +; GFX1250-NOECC-NEXT: ; kill: killed $sgpr4_sgpr5 +; GFX1250-NOECC-NEXT: s_cbranch_scc0 .LBB117_1 +; GFX1250-NOECC-NEXT: ; %bb.2: ; %bb2 +; GFX1250-NOECC-NEXT: s_endpgm bb: br label %bb3