diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index c84962b52befd..c8a4e22ed1dae 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -1724,6 +1724,7 @@ let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predi defm : FlatLoadPats ; defm : FlatLoadPats ; +defm : FlatLoadPats ; defm : FlatStorePats ; defm : FlatStorePats ; @@ -1735,7 +1736,7 @@ defm : FlatStorePats ; foreach vt = VReg_64.RegTypes in { defm : FlatStorePats ; -def : FlatLoadPat ; +defm : FlatLoadPats ; } defm : FlatStorePats ; @@ -1747,6 +1748,7 @@ defm : FlatStorePats ; defm : FlatStorePats ; defm : FlatStorePats ; +defm : FlatStorePats ; defm : FlatStorePats ; defm : FlatStorePats ; @@ -1792,6 +1794,9 @@ defm : FlatAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_"#as, f64>; } // end foreach as +defm : FlatStorePats ; +defm : FlatStorePats ; + let SubtargetPredicate = isGFX12Plus in { defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32 >; @@ -1806,19 +1811,19 @@ defm : FlatStorePats ; let OtherPredicates = [D16PreservesUnusedBits] in { // TODO: Handle atomic loads -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; +defm : FlatLoadPats_D16 ; +defm : FlatLoadPats_D16 ; +defm : FlatLoadPats_D16 ; +defm : FlatLoadPats_D16 ; +defm : FlatLoadPats_D16 ; +defm : FlatLoadPats_D16 ; -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; -def : FlatLoadPat_D16 ; +defm : FlatLoadPats_D16 ; +defm : FlatLoadPats_D16 ; +defm : FlatLoadPats_D16 ; +defm : FlatLoadPats_D16 ; +defm : FlatLoadPats_D16 ; +defm : FlatLoadPats_D16 ; } } // End OtherPredicates = [HasFlatAddressSpace] @@ -1890,6 +1895,7 @@ defm : GlobalFLATStorePats ; // appropriate waits. defm : GlobalFLATLoadPats ; defm : GlobalFLATLoadPats ; +defm : GlobalFLATLoadPats ; defm : GlobalFLATStorePats ; defm : GlobalFLATStorePats ; @@ -1929,6 +1935,7 @@ defm : GlobalFLATStorePats ; defm : GlobalFLATStorePats ; defm : GlobalFLATStorePats ; defm : GlobalFLATStorePats ; +defm : GlobalFLATStorePats ; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>; defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir index cebdffc74847c..eba64b853ac05 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir @@ -223,37 +223,37 @@ body: | ; GFX7-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>)) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX9-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>)) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX10-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>)) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX11-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX11: liveins: $vgpr0_vgpr1 ; GFX11-NEXT: {{ $}} - ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX11-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>)) + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX12-LABEL: name: load_atomic_flat_v2s32_seq_cst ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} - ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load seq_cst (<2 x s32>)) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX12-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>)) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 0) $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir index eafc96dd32bdd..474f1308d8e24 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir @@ -252,30 +252,30 @@ body: | ; GFX7-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>), addrspace 1) + ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX7-FLAT-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX7-FLAT: liveins: $vgpr0_vgpr1 ; GFX7-FLAT-NEXT: {{ $}} - ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (<2 x s32>), addrspace 1) + ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] ; ; GFX9-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (<2 x s32>), addrspace 1) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; ; GFX10-LABEL: name: load_atomic_global_v2s32_seq_cst ; GFX10: liveins: $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p1) :: (load seq_cst (<2 x s32>), addrspace 1) - ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst (<2 x s32>), addrspace 1) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load seq_cst (<2 x s32>), align 8, addrspace 1) $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir index 2675295ea98ed..ae010a872a41d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-atomic-flat.mir @@ -22,6 +22,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2 ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s32)) + ; ; GFX9-LABEL: name: atomic_store_flat_s32_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -51,6 +52,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 ; GFX7-NEXT: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p0) :: (store seq_cst (<2 x s16>)) + ; ; GFX9-LABEL: name: atomic_store_flat_v2s16_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -80,6 +82,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 ; GFX7-NEXT: G_STORE [[COPY]](p3), [[COPY1]](p0) :: (store seq_cst (p3)) + ; ; GFX9-LABEL: name: atomic_store_flat_p3_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -109,6 +112,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 ; GFX7-NEXT: G_STORE [[COPY]](p5), [[COPY1]](p0) :: (store seq_cst (p5)) + ; ; GFX9-LABEL: name: atomic_store_flat_p5_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -138,6 +142,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p6) = COPY $vgpr0 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr1_vgpr2 ; GFX7-NEXT: G_STORE [[COPY]](p6), [[COPY1]](p0) :: (store seq_cst (p6)) + ; ; GFX9-LABEL: name: atomic_store_flat_p6_seq_cst ; GFX9: liveins: $vgpr0, $vgpr1_vgpr2 ; GFX9-NEXT: {{ $}} @@ -167,6 +172,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (s64)) + ; ; GFX9-LABEL: name: atomic_store_flat_s64_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -193,15 +199,16 @@ body: | ; GFX7-LABEL: name: atomic_store_flat_v2s32_seq_cst ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} - ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p0) :: (store seq_cst (<2 x s32>)) + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (<2 x s32>)) + ; ; GFX9-LABEL: name: atomic_store_flat_v2s32_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p0) :: (store seq_cst (<2 x s32>)) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst (<2 x s32>)) %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %1:vgpr(p0) = COPY $vgpr2_vgpr3 G_STORE %0, %1 :: (store seq_cst (<2 x s32>), align 8, addrspace 0) @@ -225,6 +232,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 ; GFX7-NEXT: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p0) :: (store seq_cst (<4 x s16>)) + ; ; GFX9-LABEL: name: atomic_store_flat_v4s16_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -254,6 +262,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 ; GFX7-NEXT: G_STORE [[COPY]](p0), [[COPY1]](p0) :: (store seq_cst (p0)) + ; ; GFX9-LABEL: name: atomic_store_flat_p0_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} @@ -282,6 +291,7 @@ body: | ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(p0) = COPY $vgpr2_vgpr3 ; GFX7-NEXT: G_STORE [[COPY]](p1), [[COPY1]](p0) :: (store seq_cst (p1)) + ; ; GFX9-LABEL: name: atomic_store_flat_p1_seq_cst ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll b/llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll index f0988a17b35f0..f54fbbaabe9f5 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll @@ -998,24 +998,11 @@ define amdgpu_ps <2 x half> @flat_load_saddr_p3_immneg128(ptr inreg %sbase, i32 } define amdgpu_ps <2 x float> @flat_load_saddr_f64(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_f64: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_f64: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_f64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load double, ptr %gep0 @@ -1024,24 +1011,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_f64(ptr inreg %sbase, i32 %voffset } define amdgpu_ps <2 x float> @flat_load_saddr_f64_immneg128(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_f64_immneg128: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_f64_immneg128: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_f64_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1051,24 +1025,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_f64_immneg128(ptr inreg %sbase, i3 } define amdgpu_ps <2 x float> @flat_load_saddr_i64(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_i64: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_i64: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load i64, ptr %gep0 @@ -1077,24 +1038,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_i64(ptr inreg %sbase, i32 %voffset } define amdgpu_ps <2 x float> @flat_load_saddr_i64_immneg128(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_i64_immneg128: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_i64_immneg128: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_i64_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1104,24 +1052,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_i64_immneg128(ptr inreg %sbase, i3 } define amdgpu_ps <2 x float> @flat_load_saddr_v2f32(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_v2f32: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_v2f32: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_v2f32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load <2 x float>, ptr %gep0 @@ -1129,24 +1064,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v2f32(ptr inreg %sbase, i32 %voffs } define amdgpu_ps <2 x float> @flat_load_saddr_v2f32_immneg128(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_v2f32_immneg128: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_v2f32_immneg128: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_v2f32_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1155,24 +1077,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v2f32_immneg128(ptr inreg %sbase, } define amdgpu_ps <2 x float> @flat_load_saddr_v2i32(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_v2i32: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_v2i32: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_v2i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load <2 x i32>, ptr %gep0 @@ -1181,24 +1090,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v2i32(ptr inreg %sbase, i32 %voffs } define amdgpu_ps <2 x float> @flat_load_saddr_v2i32_immneg128(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_v2i32_immneg128: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_v2i32_immneg128: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_v2i32_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1208,24 +1104,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v2i32_immneg128(ptr inreg %sbase, } define amdgpu_ps <2 x float> @flat_load_saddr_v4i16(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_v4i16: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_v4i16: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_v4i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load <4 x i16>, ptr %gep0 @@ -1234,24 +1117,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v4i16(ptr inreg %sbase, i32 %voffs } define amdgpu_ps <2 x float> @flat_load_saddr_v4i16_immneg128(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_v4i16_immneg128: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_v4i16_immneg128: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_v4i16_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1261,24 +1131,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v4i16_immneg128(ptr inreg %sbase, } define amdgpu_ps <2 x float> @flat_load_saddr_v4f16(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_v4f16: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_v4f16: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_v4f16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load <4 x half>, ptr %gep0 @@ -1287,24 +1144,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v4f16(ptr inreg %sbase, i32 %voffs } define amdgpu_ps <2 x float> @flat_load_saddr_v4f16_immneg128(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_v4f16_immneg128: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_v4f16_immneg128: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_v4f16_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 @@ -1314,24 +1158,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_v4f16_immneg128(ptr inreg %sbase, } define amdgpu_ps <2 x float> @flat_load_saddr_p1(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_p1: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_p1: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_p1: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %load = load ptr, ptr %gep0 @@ -1341,24 +1172,11 @@ define amdgpu_ps <2 x float> @flat_load_saddr_p1(ptr inreg %sbase, i32 %voffset) } define amdgpu_ps <2 x float> @flat_load_saddr_p1_immneg128(ptr inreg %sbase, i32 %voffset) { -; GFX1250-SDAG-LABEL: flat_load_saddr_p1_immneg128: -; GFX1250-SDAG: ; %bb.0: -; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] -; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-SDAG-NEXT: ; return to shader part epilog -; -; GFX1250-GISEL-LABEL: flat_load_saddr_p1_immneg128: -; GFX1250-GISEL: ; %bb.0: -; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 -; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo -; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 -; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GFX1250-GISEL-NEXT: ; return to shader part epilog +; GFX1250-LABEL: flat_load_saddr_p1_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll index 91a2a0b651132..81db7354757d9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll @@ -1,10 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; xUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefix=SDAG-REAL16 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefix=SDAG-REAL16 %s ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefix=SDAG-FAKE16 %s ; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefix=GISEL-REAL16 %s ; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefix=GISEL-FAKE16 %s -; FIXME: t16 doesn't work at the moment because the store of s16 under t16 mode fails to select. ; FIXME: GlobalISel does not work with bf16 declare float @llvm.amdgcn.tanh.f32(float) #0