134 changes: 65 additions & 69 deletions llvm/lib/Target/AMDGPU/FLATInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,8 @@
//
//===----------------------------------------------------------------------===//

def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [SDNPWantRoot], -10>;
def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>;

def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>;
def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [SDNPWantRoot], -10>;
def FLATOffset : ComplexPattern<i64, 2, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>;
def FLATOffsetSigned : ComplexPattern<i64, 2, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>;

//===----------------------------------------------------------------------===//
// FLAT classes
Expand Down Expand Up @@ -140,10 +137,13 @@ class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
(outs regClass:$vdst),
!con(
!con(
!con((ins VReg_64:$vaddr),
!if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
(ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
!if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
!if(EnableSaddr,
(ins SReg_64:$saddr, VGPR_32:$vaddr),
(ins VReg_64:$vaddr)),
(ins flat_offset:$offset)),
// FIXME: Operands with default values do not work with following non-optional operands.
!if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, regClass:$vdst_in),
(ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc))),
" $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
let has_data = 0;
let mayLoad = 1;
Expand All @@ -161,9 +161,10 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
opName,
(outs),
!con(
!con((ins VReg_64:$vaddr, vdataClass:$vdata),
!if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
(ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
!if(EnableSaddr,
(ins VGPR_32:$vaddr, vdataClass:$vdata, SReg_64:$saddr),
(ins VReg_64:$vaddr, vdataClass:$vdata)),
(ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc)),
" $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
let mayLoad = 0;
let mayStore = 1;
Expand All @@ -187,7 +188,7 @@ class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
bit HasTiedOutput = 0, bit HasSignedOffset = 0> : FLAT_Pseudo<
opName,
(outs regClass:$vdst),
!con((ins SReg_64:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
!con((ins SReg_64:$saddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc),
!if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
" $vdst, $saddr$offset$glc$slc$dlc"> {
let is_flat_global = 1;
Expand Down Expand Up @@ -310,7 +311,7 @@ multiclass FLAT_Atomic_Pseudo<
bit isFP = isFloatType<data_vt>.ret> {
def "" : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
(ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
(ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC_0:$slc),
" $vaddr, $vdata$offset$slc">,
GlobalSaddrTable<0, opName>,
AtomicNoRet <opName, 0> {
Expand All @@ -321,10 +322,10 @@ multiclass FLAT_Atomic_Pseudo<

def _RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_rc:$vdst),
(ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
(ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC_0:$slc),
" $vdst, $vaddr, $vdata$offset glc$slc",
[(set vt:$vdst,
(atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
(atomic (FLATOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
GlobalSaddrTable<0, opName#"_rtn">,
AtomicNoRet <opName, 1>{
let FPAtomic = isFP;
Expand All @@ -343,7 +344,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<

def "" : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
(ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
(ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC_0:$slc),
" $vaddr, $vdata, off$offset$slc">,
GlobalSaddrTable<0, opName>,
AtomicNoRet <opName, 0> {
Expand All @@ -354,7 +355,7 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<

def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
(ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc),
(ins VGPR_32:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC_0:$slc),
" $vaddr, $vdata, $saddr$offset$slc">,
GlobalSaddrTable<1, opName>,
AtomicNoRet <opName#"_saddr", 0> {
Expand All @@ -376,10 +377,10 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<

def _RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_rc:$vdst),
(ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
(ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC_0:$slc),
" $vdst, $vaddr, $vdata, off$offset glc$slc",
[(set vt:$vdst,
(atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
(atomic (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
GlobalSaddrTable<0, opName#"_rtn">,
AtomicNoRet <opName, 1> {
let has_saddr = 1;
Expand All @@ -388,7 +389,7 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<

def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_rc:$vdst),
(ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc),
(ins VGPR_32:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC_0:$slc),
" $vdst, $vaddr, $vdata, $saddr$offset glc$slc">,
GlobalSaddrTable<1, opName#"_rtn">,
AtomicNoRet <opName#"_saddr", 1> {
Expand Down Expand Up @@ -728,69 +729,64 @@ defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <

// Patterns for global loads with no offset.
class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))),
(inst $vaddr, $offset, 0, 0, $slc)
(vt (node (FLATOffset i64:$vaddr, i16:$offset))),
(inst $vaddr, $offset)
>;

class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
(inst $vaddr, $offset, 0, 0, $slc, $in)
(node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in),
(inst $vaddr, $offset, 0, 0, 0, $in)
>;

class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
(inst $vaddr, $offset, 0, 0, $slc, $in)
>;

class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (FLATAtomic (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
(inst $vaddr, $offset, 0, 0, $slc)
(node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset), vt:$in),
(inst $vaddr, $offset, 0, 0, 0, $in)
>;

class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
(inst $vaddr, $offset, 0, 0, $slc)
(vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset))),
(inst $vaddr, $offset)
>;

class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
(node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)),
(inst $vaddr, rc:$data, $offset, 0, 0, $slc)
class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node vt:$data, (FLATOffset i64:$vaddr, i16:$offset)),
(inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
>;

class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
(node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)),
(inst $vaddr, rc:$data, $offset, 0, 0, $slc)
class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset)),
(inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
>;

class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
// atomic store follows atomic binop convention so the address comes
// first.
(node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
(inst $vaddr, rc:$data, $offset, 0, 0, $slc)
(node (FLATOffset i64:$vaddr, i16:$offset), vt:$data),
(inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
>;

class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
// atomic store follows atomic binop convention so the address comes
// first.
(node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
(inst $vaddr, rc:$data, $offset, 0, 0, $slc)
(node (FLATOffset i64:$vaddr, i16:$offset), vt:$data),
(inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
>;

class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
ValueType data_vt = vt> : GCNPat <
(vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
(inst $vaddr, $data, $offset, $slc)
(vt (node (FLATOffset i64:$vaddr, i16:$offset), data_vt:$data)),
(inst $vaddr, $data, $offset)
>;

class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
(inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset, $slc)
(node (FLATOffset i64:$vaddr, i16:$offset), vt:$data),
(inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
>;

class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
ValueType data_vt = vt> : GCNPat <
(vt (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
(inst $vaddr, $data, $offset, $slc)
(vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$data)),
(inst $vaddr, $data, $offset)
>;

let OtherPredicates = [HasFlatAddressSpace] in {
Expand All @@ -807,8 +803,8 @@ def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;

def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;

def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
Expand All @@ -819,19 +815,19 @@ def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
}

foreach vt = VReg_64.RegTypes in {
def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt, VReg_64>;
def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>;
def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
}

def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32, VReg_96>;
def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>;

foreach vt = VReg_128.RegTypes in {
def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt, VReg_128>;
def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>;
}

def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>;
def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64, VReg_64>;
def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64>;

def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>;
Expand Down Expand Up @@ -900,29 +896,29 @@ def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, load_global, i16>;

foreach vt = Reg32Types.types in {
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, vt>;
def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, vt, VGPR_32>;
def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, vt>;
}

foreach vt = VReg_64.RegTypes in {
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, vt>;
def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, vt, VReg_64>;
def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, vt>;
}

def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;

foreach vt = VReg_128.RegTypes in {
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, vt>;
def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, vt, VReg_128>;
def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, vt>;
}

def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>;
def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>;
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>;
def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>;

def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32, VGPR_32>;
def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16, VGPR_32>;
def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32, VGPR_32>;
def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16, VGPR_32>;
def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32, VReg_96>;
def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32>;
def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16>;
def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32>;
def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16>;
def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32>;

let OtherPredicates = [D16PreservesUnusedBits] in {
def : FlatStoreSignedPat <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
Expand All @@ -944,7 +940,7 @@ def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
}

def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, atomic_store_global_32, i32>;
def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, atomic_store_global_64, i64, VReg_64>;
def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, atomic_store_global_64, i64>;

def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>;
def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>;
Expand Down
239 changes: 0 additions & 239 deletions llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp

This file was deleted.

6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1084,8 +1084,14 @@ def clampmod0 : NamedOperandBit_0<"ClampSI", NamedMatchClass<"ClampSI">>;
def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>;

def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>;
def DLC_0 : NamedOperandBit_0<"DLC", NamedMatchClass<"DLC">>;

def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
def GLC_0 : NamedOperandBit_0<"GLC", NamedMatchClass<"GLC">>;

def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
def SLC_0 : NamedOperandBit_0<"SLC", NamedMatchClass<"SLC">>;

def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>;
def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -389,17 +389,7 @@ body: |
; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -2048
Expand Down Expand Up @@ -468,3 +458,81 @@ body: |
...

---

name: load_atomic_global_s64_seq_cst_gep_m2048
legalized: true
regBankSelected: true
tracksRegLiveness: true

body: |
bb.0:
liveins: $vgpr0_vgpr1
; GFX6-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048
; GFX6: liveins: $vgpr0_vgpr1
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
; GFX7-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX7: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
; GFX7-FLAT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -2048
%2:vgpr(p1) = G_PTR_ADD %0, %1
%3:vgpr(s64) = G_LOAD %2 :: (load seq_cst 8, align 8, addrspace 1)
$vgpr0_vgpr1 = COPY %3
...
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/disable_form_clauses.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr -stop-after=si-form-memory-clauses < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -stop-after=si-form-memory-clauses < %s | FileCheck -check-prefix=GCN %s

; GCN-LABEL: {{^}}name:{{[ ]*}}vector_clause
; GCN: BUNDLE
Expand Down
28 changes: 16 additions & 12 deletions llvm/test/CodeGen/AMDGPU/ds_write2.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,CI %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+flat-for-global -amdgpu-enable-global-sgpr-addr < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+flat-for-global < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9 %s

@lds = addrspace(3) global [512 x float] undef, align 4
@lds.f64 = addrspace(3) global [512 x double] undef, align 8
Expand All @@ -9,7 +9,8 @@
; GFX9-NOT: m0

; GCN-DAG: {{buffer|flat|global}}_load_dword [[VAL:v[0-9]+]]
; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; GCN-DAG: v_lshlrev_b32_e32 [[VBASE:v[0-9]+]], 2, v{{[0-9]+}}
; GCN-DAG: v_add_{{[ui]}}32_e32 [[VPTR:v[0-9]+]], {{(vcc, )?}}lds@abs32@lo, [[VBASE]]
; GCN: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:8
; GCN: s_endpgm
define amdgpu_kernel void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
Expand All @@ -31,10 +32,11 @@ define amdgpu_kernel void @simple_write2_one_val_f32(float addrspace(1)* %C, flo
; CI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4

; GFX9-DAG: global_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} offset:4
; GFX9-DAG: global_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off{{$}}
; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off offset:4

; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; GCN-DAG: v_lshlrev_b32_e32 [[VBASE:v[0-9]+]], 2, v{{[0-9]+}}
; GCN-DAG: v_add_{{[ui]}}32_e32 [[VPTR:v[0-9]+]], {{(vcc, )?}}lds@abs32@lo, [[VBASE]]
; GCN: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
; GCN: s_endpgm
define amdgpu_kernel void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
Expand Down Expand Up @@ -194,10 +196,12 @@ define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(float addrspace(1)*
; CI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4

; GFX9-DAG: global_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}{{$}}
; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} offset:4
; GFX9-DAG: global_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off{{$}}
; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off offset:4

; GCN-DAG: v_lshlrev_b32_e32 [[VBASE:v[0-9]+]], 2, v{{[0-9]+}}
; GCN-DAG: v_add_{{[ui]}}32_e32 [[VPTR:v[0-9]+]], {{(vcc, )?}}lds@abs32@lo, [[VBASE]]

; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
; GCN: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
; GCN: s_endpgm
define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
Expand Down Expand Up @@ -379,11 +383,11 @@ define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(double addrspace
; CI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8

; GFX9-DAG: global_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}{{$}}
; GFX9-DAG: global_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} offset:8

; GFX9-DAG: global_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, off{{$}}
; GFX9-DAG: global_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, off offset:8

; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
; GCN-DAG: v_lshlrev_b32_e32 [[VBASE:v[0-9]+]], 3, v{{[0-9]+}}
; GCN-DAG: v_add_{{[ui]}}32_e32 [[VPTR:v[0-9]+]], {{(vcc, )?}}lds.f64@abs32@lo, [[VBASE]]
; GCN: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
; GCN: s_endpgm
define amdgpu_kernel void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/ds_write2st64.ll
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt -amdgpu-enable-global-sgpr-addr < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s

@lds = addrspace(3) global [512 x float] undef, align 4

; GCN-LABEL: @simple_write2st64_one_val_f32_0_1
; CI-DAG: s_mov_b32 m0
; GFX9-NOT: m0n
; GFX9-NOT: m0

; GCN-DAG: {{buffer|global}}_load_dword [[VAL:v[0-9]+]]
; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
Expand All @@ -30,8 +30,8 @@ define amdgpu_kernel void @simple_write2st64_one_val_f32_0_1(float addrspace(1)*
; CI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4

; GFX9-DAG: global_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}{{$}}
; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} offset:4
; GFX9-DAG: global_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off{{$}}
; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off offset:4{{$}}


; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
Expand Down Expand Up @@ -59,8 +59,8 @@ define amdgpu_kernel void @simple_write2st64_two_val_f32_2_5(float addrspace(1)*
; CI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4

; GFX9-DAG: global_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}{{$}}
; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} offset:4
; GFX9-DAG: global_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off{{$}}
; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off offset:4

; GCN-DAG: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 2, v{{[0-9]+}}
; GCN-DAG: v_add_{{i|u}}32_e32 [[VPTR:v[0-9]+]], {{(vcc, )?}}s{{[0-9]+}}, [[SHL]]
Expand All @@ -87,8 +87,8 @@ define amdgpu_kernel void @simple_write2st64_two_val_max_offset_f32(float addrsp
; CI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8

; GFX9-DAG: global_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}{{$}}
; GFX9-DAG: global_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} offset:8
; GFX9-DAG: global_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, off{{$}}
; GFX9-DAG: global_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, off offset:8

; GCN-DAG: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 3, v{{[0-9]+}}
; GCN-DAG: v_add_{{i|u}}32_e32 [[VPTR:v[0-9]+]], {{(vcc, )?}}s{{[0-9]+}}, [[SHL]]
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/fp-atomic-to-s_denormmode.mir
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ body: |
name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
body: |
bb.0:
GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
GLOBAL_ATOMIC_FCMPSWAP_SADDR undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...

Expand All @@ -360,7 +360,7 @@ body: |
name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
%2:vreg_64 = GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...

Expand All @@ -374,7 +374,7 @@ body: |
name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vgpr_32 = GLOBAL_ATOMIC_FMAX_SADDR_RTN undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
%2:vgpr_32 = GLOBAL_ATOMIC_FMAX_SADDR_RTN undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...

Expand All @@ -388,7 +388,7 @@ body: |
name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
%2:vreg_64 = GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...

Expand All @@ -402,7 +402,7 @@ body: |
name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vgpr_32 = GLOBAL_ATOMIC_FMIN_SADDR_RTN undef %0:vreg_64, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
%2:vgpr_32 = GLOBAL_ATOMIC_FMIN_SADDR_RTN undef %0:vgpr_32, undef %1:vgpr_32, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...

Expand All @@ -416,7 +416,7 @@ body: |
name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
body: |
bb.0:
%2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN undef %0:vreg_64, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
%2:vreg_64 = GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN undef %0:vgpr_32, undef %1:vreg_64, undef %3:sgpr_64, 0, 0, implicit $exec :: (volatile load store seq_cst seq_cst 4 on `float addrspace(1)* undef`)
S_DENORM_MODE 0, implicit-def $mode, implicit $mode
...

Expand Down
249 changes: 0 additions & 249 deletions llvm/test/CodeGen/AMDGPU/global-load-store-atomics.mir

This file was deleted.

103 changes: 0 additions & 103 deletions llvm/test/CodeGen/AMDGPU/global-saddr.ll

This file was deleted.

16 changes: 16 additions & 0 deletions llvm/test/CodeGen/AMDGPU/global_atomics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1155,6 +1155,22 @@ entry:
ret void
}

; GCN-LABEL: {{^}}atomic_load_i32_negoffset:
; SI: buffer_load_dword [[RET:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}

; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0xfffffe00
; VI-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, -1
; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}

; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], off offset:-512 glc{{$}}
define amdgpu_kernel void @atomic_load_i32_negoffset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
entry:
%gep = getelementptr i32, i32 addrspace(1)* %in, i64 -128
%val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4
store i32 %val, i32 addrspace(1)* %out
ret void
}

; GCN-LABEL: {{^}}atomic_load_f32_offset:
; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
Expand Down
20 changes: 20 additions & 0 deletions llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1105,6 +1105,26 @@ entry:
ret void
}

; GCN-LABEL: {{^}}atomic_load_i64_neg_offset:
; CI: v_mov_b32_e32 v[[LO:[0-9]+]], 0xffffffe0
; CI: v_mov_b32_e32 v[[HI:[0-9]+]], -1
; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]{{\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}

; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0xffffffe0
; VI-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, -1
; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}

; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}], off offset:-32 glc{{$}}
define amdgpu_kernel void @atomic_load_i64_neg_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out) {
entry:
%gep = getelementptr i64, i64 addrspace(1)* %in, i64 -4
%val = load atomic i64, i64 addrspace(1)* %gep seq_cst, align 8
store i64 %val, i64 addrspace(1)* %out
ret void
}

; GCN-LABEL: {{^}}atomic_load_i64:
; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-amdgpu-aa=0 -mattr=+flat-for-global -denormal-fp-math=preserve-sign -amdgpu-enable-global-sgpr-addr -enable-misched=false < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s
; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-amdgpu-aa=0 -mattr=+flat-for-global -denormal-fp-math=preserve-sign -enable-misched=false < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s

; GCN-LABEL: {{^}}v_insertelement_v2i16_dynamic_vgpr:

Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/madak.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6_8_9,MAD %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,GFX10-MAD %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -fp-contract=fast -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,GFX10-MAD %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA %s

declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
declare float @llvm.fabs.f32(float) nounwind readnone
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/memory-legalizer-load.ll
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10CU %s
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10CU %s

declare i32 @llvm.amdgcn.workitem.id.x()

Expand Down Expand Up @@ -494,8 +494,8 @@ entry:

; GCN-LABEL: {{^}}nontemporal_global_1:
; GFX8: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
; GFX9: global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
; GFX10: global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] slc{{$}}
; GFX9: global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], off glc slc{{$}}
; GFX10: global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], off slc{{$}}
; GFX10: .amdhsa_kernel nontemporal_global_1
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/memory-legalizer-store.ll
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10CU %s
; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10,GFX10CU %s

declare i32 @llvm.amdgcn.workitem.id.x()

Expand Down Expand Up @@ -363,8 +363,8 @@ entry:

; GCN-LABEL: {{^}}nontemporal_global_1:
; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}}
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}}
; GFX10: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] slc{{$}}
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}}
; GFX10: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off slc{{$}}
; GFX10: .amdhsa_kernel nontemporal_global_1
; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0
; GFX10CU: .amdhsa_workgroup_processor_mode 0
Expand Down
34 changes: 20 additions & 14 deletions llvm/test/CodeGen/AMDGPU/memory_clause.ll
Original file line number Diff line number Diff line change
@@ -1,27 +1,31 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s

define amdgpu_kernel void @vector_clause(<4 x i32> addrspace(1)* noalias nocapture readonly %arg, <4 x i32> addrspace(1)* noalias nocapture %arg1) {
; GCN-LABEL: vector_clause:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
; GCN-NEXT: v_mov_b32_e32 v17, 0
; GCN-NEXT: v_lshlrev_b32_e32 v16, 4, v0
; GCN-NEXT: v_lshlrev_b32_e32 v18, 4, v0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_load_dwordx4 v[0:3], v[16:17], s[2:3]
; GCN-NEXT: global_load_dwordx4 v[4:7], v[16:17], s[2:3] offset:16
; GCN-NEXT: global_load_dwordx4 v[8:11], v[16:17], s[2:3] offset:32
; GCN-NEXT: global_load_dwordx4 v[12:15], v[16:17], s[2:3] offset:48
; GCN-NEXT: s_nop 0
; GCN-NEXT: v_mov_b32_e32 v0, s3
; GCN-NEXT: v_add_co_u32_e32 v16, vcc, s2, v18
; GCN-NEXT: v_addc_co_u32_e32 v17, vcc, 0, v0, vcc
; GCN-NEXT: global_load_dwordx4 v[0:3], v[16:17], off
; GCN-NEXT: global_load_dwordx4 v[4:7], v[16:17], off offset:16
; GCN-NEXT: global_load_dwordx4 v[8:11], v[16:17], off offset:32
; GCN-NEXT: global_load_dwordx4 v[12:15], v[16:17], off offset:48
; GCN-NEXT: v_mov_b32_e32 v17, s5
; GCN-NEXT: v_add_co_u32_e32 v16, vcc, s4, v18
; GCN-NEXT: v_addc_co_u32_e32 v17, vcc, 0, v17, vcc
; GCN-NEXT: s_waitcnt vmcnt(3)
; GCN-NEXT: global_store_dwordx4 v[16:17], v[0:3], s[4:5]
; GCN-NEXT: global_store_dwordx4 v[16:17], v[0:3], off
; GCN-NEXT: s_waitcnt vmcnt(3)
; GCN-NEXT: global_store_dwordx4 v[16:17], v[4:7], s[4:5] offset:16
; GCN-NEXT: global_store_dwordx4 v[16:17], v[4:7], off offset:16
; GCN-NEXT: s_waitcnt vmcnt(3)
; GCN-NEXT: global_store_dwordx4 v[16:17], v[8:11], s[4:5] offset:32
; GCN-NEXT: global_store_dwordx4 v[16:17], v[8:11], off offset:32
; GCN-NEXT: s_waitcnt vmcnt(3)
; GCN-NEXT: global_store_dwordx4 v[16:17], v[12:15], s[4:5] offset:48
; GCN-NEXT: global_store_dwordx4 v[16:17], v[12:15], off offset:48
; GCN-NEXT: s_endpgm
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
Expand Down Expand Up @@ -189,10 +193,12 @@ define amdgpu_kernel void @vector_clause_indirect(i64 addrspace(1)* noalias noca
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x34
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_load_dwordx2 v[8:9], v[0:1], s[2:3]
; GCN-NEXT: v_mov_b32_e32 v1, s3
; GCN-NEXT: v_add_co_u32_e32 v0, vcc, s2, v0
; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GCN-NEXT: global_load_dwordx2 v[8:9], v[0:1], off
; GCN-NEXT: s_nop 0
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: global_load_dwordx4 v[0:3], v[8:9], off
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/AMDGPU/sdwa-ops.mir
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,19 @@ body: |
%63:vgpr_32, %65:sreg_64_xexec = nsw V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
%64:vgpr_32, dead %66:sreg_64_xexec = nuw V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
%161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
%163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec
%164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec
%162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1
GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %162, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
%171:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
%173:vgpr_32, %175:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %171, 0, implicit $exec
%174:vgpr_32, dead %176:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %175, 0, implicit $exec
%172:vreg_64 = REG_SEQUENCE %173, %subreg.sub0, %174, %subreg.sub1
GLOBAL_STORE_DWORDX2_SADDR %30, %172, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %172, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...

Expand Down Expand Up @@ -77,13 +77,13 @@ body: |
%64:vgpr_32, dead %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
%161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
%163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec
%164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec
%162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1
GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %162, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...

Expand Down Expand Up @@ -113,7 +113,7 @@ body: |
%63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %66, %subreg.sub1
GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...

Expand Down Expand Up @@ -143,7 +143,7 @@ body: |
%63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %65, %subreg.sub1
GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
Expand Down Expand Up @@ -172,7 +172,7 @@ body: |
%63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
Expand Down Expand Up @@ -201,7 +201,7 @@ body: |
%30:vreg_64 = COPY $sgpr0_sgpr1
%63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %23, %subreg.sub1
GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
Expand Down Expand Up @@ -232,7 +232,7 @@ body: |
%30:vreg_64 = COPY $sgpr0_sgpr1
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %24, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %23, %subreg.sub0, %23, %subreg.sub1
GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
Expand Down Expand Up @@ -263,7 +263,7 @@ body: |
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%31:vreg_64 = COPY $vcc
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...
Expand Down Expand Up @@ -294,7 +294,7 @@ body: |
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%31:vreg_64 = COPY $vcc
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...

Expand Down Expand Up @@ -325,7 +325,7 @@ body: |
%32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
GLOBAL_STORE_DWORDX2_SADDR %32, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
GLOBAL_STORE_DWORDX2_SADDR %32.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...

Expand Down Expand Up @@ -356,7 +356,7 @@ body: |
%32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
GLOBAL_STORE_DWORDX2_SADDR %32, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
GLOBAL_STORE_DWORDX2_SADDR %32.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
...

Expand Down Expand Up @@ -386,5 +386,5 @@ body: |
%31:vreg_64 = COPY killed $vcc
%64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
%62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi -amdgpu-enable-global-sgpr-addr < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s

@stored_lds_ptr = addrspace(3) global i32 addrspace(3)* undef, align 4
@stored_constant_ptr = addrspace(3) global i32 addrspace(4)* undef, align 8
Expand Down Expand Up @@ -257,14 +257,14 @@ define amdgpu_kernel void @reorder_global_offsets(i32 addrspace(1)* nocapture %o
; CI: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:36{{$}}
; CI-NEXT: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:52{{$}}

; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:12
; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:28
; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:44
; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:12
; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:28
; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:44

; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}{{$}}
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:20
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:36
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:52
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, off{{$}}
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, off offset:20
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, off offset:36
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, off offset:52

define amdgpu_kernel void @reorder_global_offsets_addr64_soffset0(i32 addrspace(1)* noalias nocapture %ptr.base) #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
Expand Down
40 changes: 20 additions & 20 deletions llvm/test/MC/AMDGPU/flat-global.s
Original file line number Diff line number Diff line change
Expand Up @@ -179,42 +179,42 @@ global_store_dwordx4 v[3:4], v[1:4], off dlc
global_store_dword v[3:4], v1, off offset:12
// GFX10: encoding: [0x0c,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00]
// GFX9: global_store_dword v[3:4], v1, off offset:12 ; encoding: [0x0c,0x80,0x70,0xdc,0x03,0x01,0x7f,0x00]
// VI-ERR: :36: error: not a valid operand
// VI-ERR: [[@LINE-3]]:36: error: not a valid operand

global_load_dword v1, v[3:4], s[2:3]
global_load_dword v1, v3, s[2:3]
// GFX10: encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x02,0x01]
// GFX9: global_load_dword v1, v[3:4], s[2:3] ; encoding: [0x00,0x80,0x50,0xdc,0x03,0x00,0x02,0x01]
// VI-ERR: instruction not supported on this GPU
// GFX9: global_load_dword v1, v3, s[2:3] ; encoding: [0x00,0x80,0x50,0xdc,0x03,0x00,0x02,0x01]
// VI-ERR: [[@LINE-3]]:1: error: instruction not supported on this GPU

global_load_dword v1, v[3:4], s[2:3] offset:24
global_load_dword v1, v3, s[2:3] offset:24
// GFX10: encoding: [0x18,0x80,0x30,0xdc,0x03,0x00,0x02,0x01]
// GFX9: global_load_dword v1, v[3:4], s[2:3] offset:24 ; encoding: [0x18,0x80,0x50,0xdc,0x03,0x00,0x02,0x01]
// VI-ERR: :38: error: not a valid operand.
// GFX9: global_load_dword v1, v3, s[2:3] offset:24 ; encoding: [0x18,0x80,0x50,0xdc,0x03,0x00,0x02,0x01]
// VI-ERR: [[@LINE-3]]:34: error: not a valid operand.

global_load_dword v1, v[3:4], s[2:3] offset:-8
global_load_dword v1, v3, s[2:3] offset:-8
// GFX10: encoding: [0xf8,0x8f,0x30,0xdc,0x03,0x00,0x02,0x01]
// GFX9: global_load_dword v1, v[3:4], s[2:3] offset:-8 ; encoding: [0xf8,0x9f,0x50,0xdc,0x03,0x00,0x02,0x01]
// VI-ERR: :38: error: not a valid operand.
// GFX9: global_load_dword v1, v3, s[2:3] offset:-8 ; encoding: [0xf8,0x9f,0x50,0xdc,0x03,0x00,0x02,0x01]
// VI-ERR: [[@LINE-3]]:34: error: not a valid operand.

global_store_dword v[3:4], v1, s[2:3]
global_store_dword v3, v1, s[2:3]
// GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x02,0x00]
// GFX9: global_store_dword v[3:4], v1, s[2:3] ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x02,0x00]
// GFX9: global_store_dword v3, v1, s[2:3] ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x02,0x00]
// VI-ERR: instruction not supported on this GPU

global_store_dword v[3:4], v1, s[2:3] offset:24
global_store_dword v3, v1, s[2:3] offset:24
// GFX10: encoding: [0x18,0x80,0x70,0xdc,0x03,0x01,0x02,0x00]
// GFX9: global_store_dword v[3:4], v1, s[2:3] offset:24 ; encoding: [0x18,0x80,0x70,0xdc,0x03,0x01,0x02,0x00]
// VI-ERR: :39: error: not a valid operand.
// GFX9: global_store_dword v3, v1, s[2:3] offset:24 ; encoding: [0x18,0x80,0x70,0xdc,0x03,0x01,0x02,0x00]
// VI-ERR: [[@LINE-3]]:35: error: not a valid operand.

global_store_dword v[3:4], v1, s[2:3] offset:-8
global_store_dword v3, v1, s[2:3] offset:-8
// GFX10: encoding: [0xf8,0x8f,0x70,0xdc,0x03,0x01,0x02,0x00]
// GFX9: global_store_dword v[3:4], v1, s[2:3] offset:-8 ; encoding: [0xf8,0x9f,0x70,0xdc,0x03,0x01,0x02,0x00]
// VI-ERR: :39: error: not a valid operand.
// GFX9: global_store_dword v3, v1, s[2:3] offset:-8 ; encoding: [0xf8,0x9f,0x70,0xdc,0x03,0x01,0x02,0x00]
// VI-ERR: :35: error: not a valid operand.

// XXX: Is this valid?
global_store_dword v[3:4], v1, exec
global_store_dword v3, v1, exec
// GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00]
// GFX9: global_store_dword v[3:4], v1, exec ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00]
// GFX9: global_store_dword v3, v1, exec ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00]
// VI-ERR: instruction not supported on this GPU

global_load_dword v1, v[3:4], s2
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/MC/AMDGPU/gfx1030_new.s
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ global_load_dword_addtid v1, s[2:3] offset:16 glc slc dlc
global_store_dword_addtid v1, s[2:3] offset:16 glc slc dlc
// GFX10: encoding: [0x10,0x90,0x5f,0xdc,0x00,0x01,0x02,0x00]

global_store_dword v[254:255], v1, s[2:3] offset:16
global_store_dword v254, v1, s[2:3] offset:16
// GFX10: encoding: [0x10,0x80,0x70,0xdc,0xfe,0x01,0x02,0x00]

global_atomic_csub v2, v[0:1], v2, off offset:100 glc slc
Expand All @@ -19,10 +19,10 @@ global_atomic_csub v2, v[0:1], v2, off offset:100 glc slc
global_atomic_csub v2, v[0:1], v2, off
// GFX10: encoding: [0x00,0x80,0xd1,0xdc,0x00,0x02,0x7d,0x02]

global_atomic_csub v2, v[0:1], v2, s[2:3]
global_atomic_csub v2, v0, v2, s[2:3]
// GFX10: encoding: [0x00,0x80,0xd1,0xdc,0x00,0x02,0x02,0x02]

global_atomic_csub v2, v[0:1], v2, s[2:3] offset:100 glc slc
global_atomic_csub v2, v0, v2, s[2:3] offset:100 glc slc
// GFX10: encoding: [0x64,0x80,0xd3,0xdc,0x00,0x02,0x02,0x02]

buffer_atomic_csub v5, off, s[8:11], s3
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/MC/Disassembler/AMDGPU/flat_gfx9.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,34 +42,34 @@
# CHECK: flat_store_byte v[0:1], v0 offset:4095 glc ; encoding: [0xff,0x0f,0x61,0xdc,0x00,0x00,0x00,0x00]
0xff,0x0f,0x61,0xdc,0x00,0x00,0x00,0x00

# CHECK: global_atomic_add v[2:3], v4, s[0:1] ; encoding: [0x00,0x80,0x08,0xdd,0x02,0x04,0x00,0x00]
# CHECK: global_atomic_add v2, v4, s[0:1] ; encoding: [0x00,0x80,0x08,0xdd,0x02,0x04,0x00,0x00]
0x00,0x80,0x08,0xdd,0x02,0x04,0x00,0x00

# CHECK: global_atomic_add v[2:3], v4, s[0:1] offset:4095 ; encoding: [0xff,0x8f,0x08,0xdd,0x02,0x04,0x00,0x00]
# CHECK: global_atomic_add v2, v4, s[0:1] offset:4095 ; encoding: [0xff,0x8f,0x08,0xdd,0x02,0x04,0x00,0x00]
0xff,0x8f,0x08,0xdd,0x02,0x04,0x00,0x00

# CHECK: global_atomic_add v[2:3], v4, s[0:1] offset:-4096 ; encoding: [0x00,0x90,0x08,0xdd,0x02,0x04,0x00,0x00]
# CHECK: global_atomic_add v2, v4, s[0:1] offset:-4096 ; encoding: [0x00,0x90,0x08,0xdd,0x02,0x04,0x00,0x00]
0x00,0x90,0x08,0xdd,0x02,0x04,0x00,0x00

# CHECK: global_atomic_add v0, v[2:3], v4, s[0:1] offset:-1 glc ; encoding: [0xff,0x9f,0x09,0xdd,0x02,0x04,0x00,0x00]
# CHECK: global_atomic_add v0, v2, v4, s[0:1] offset:-1 glc ; encoding: [0xff,0x9f,0x09,0xdd,0x02,0x04,0x00,0x00]
0xff,0x9f,0x09,0xdd,0x02,0x04,0x00,0x00

# CHECK: global_load_sbyte v0, v[2:3], s[0:1] ; encoding: [0x00,0x80,0x44,0xdc,0x02,0x00,0x00,0x00]
# CHECK: global_load_sbyte v0, v2, s[0:1] ; encoding: [0x00,0x80,0x44,0xdc,0x02,0x00,0x00,0x00]
0x00,0x80,0x44,0xdc,0x02,0x00,0x00,0x00

# CHECK: global_load_sbyte v0, v[2:3], s[0:1] offset:4095 ; encoding: [0xff,0x8f,0x44,0xdc,0x02,0x00,0x00,0x00]
# CHECK: global_load_sbyte v0, v2, s[0:1] offset:4095 ; encoding: [0xff,0x8f,0x44,0xdc,0x02,0x00,0x00,0x00]
0xff,0x8f,0x44,0xdc,0x02,0x00,0x00,0x00

# CHECK: global_load_sbyte v0, v[2:3], s[0:1] offset:-4096 ; encoding: [0x00,0x90,0x44,0xdc,0x02,0x00,0x00,0x00]
# CHECK: global_load_sbyte v0, v2, s[0:1] offset:-4096 ; encoding: [0x00,0x90,0x44,0xdc,0x02,0x00,0x00,0x00]
0x00,0x90,0x44,0xdc,0x02,0x00,0x00,0x00

# CHECK: global_store_dwordx2 v[2:3], v[4:5], s[0:1] ; encoding: [0x00,0x80,0x74,0xdc,0x02,0x04,0x00,0x00]
# CHECK: global_store_dwordx2 v2, v[4:5], s[0:1] ; encoding: [0x00,0x80,0x74,0xdc,0x02,0x04,0x00,0x00]
0x00,0x80,0x74,0xdc,0x02,0x04,0x00,0x00

# CHECK: global_store_dwordx2 v[2:3], v[4:5], s[0:1] offset:4095 ; encoding: [0xff,0x8f,0x74,0xdc,0x02,0x04,0x00,0x00]
# CHECK: global_store_dwordx2 v2, v[4:5], s[0:1] offset:4095 ; encoding: [0xff,0x8f,0x74,0xdc,0x02,0x04,0x00,0x00]
0xff,0x8f,0x74,0xdc,0x02,0x04,0x00,0x00

# CHECK: global_store_dwordx2 v[2:3], v[4:5], s[0:1] offset:-4096 ; encoding: [0x00,0x90,0x74,0xdc,0x02,0x04,0x00,0x00]
# CHECK: global_store_dwordx2 v2, v[4:5], s[0:1] offset:-4096 ; encoding: [0x00,0x90,0x74,0xdc,0x02,0x04,0x00,0x00]
0x00,0x90,0x74,0xdc,0x02,0x04,0x00,0x00

# CHECK: scratch_load_dword v0, v0, off offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x7f,0x00]
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/MC/Disassembler/AMDGPU/gfx1030_dasm_new.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# GFX10: global_store_dword_addtid v1, s[2:3] offset:16 glc slc dlc
0x10,0x90,0x5f,0xdc,0x00,0x01,0x02,0x00

# GFX10: global_store_dword v[254:255], v1, s[2:3] offset:16
# GFX10: global_store_dword v254, v1, s[2:3] offset:16
0x10,0x80,0x70,0xdc,0xfe,0x01,0x02,0x00

# GFX10: global_atomic_csub v2, v[0:1], v2, off offset:100 glc slc
Expand All @@ -19,10 +19,10 @@
# GFX10: global_atomic_csub v2, v[0:1], v2, off glc
0x00,0x80,0xd1,0xdc,0x00,0x02,0x7d,0x02

# GFX10: global_atomic_csub v2, v[0:1], v2, s[2:3] glc
# GFX10: global_atomic_csub v2, v0, v2, s[2:3] glc
0x00,0x80,0xd1,0xdc,0x00,0x02,0x02,0x02

# GFX10: global_atomic_csub v2, v[0:1], v2, s[2:3] offset:100 glc slc
# GFX10: global_atomic_csub v2, v0, v2, s[2:3] offset:100 glc slc
0x64,0x80,0xd3,0xdc,0x00,0x02,0x02,0x02

# GFX10: buffer_atomic_csub v5, off, s[8:11], s3
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8237,13 +8237,13 @@
# GFX10: global_load_dword v1, v[3:4], off offset:2047 ; encoding: [0xff,0x87,0x30,0xdc,0x03,0x00,0x7d,0x01]
0xff,0x87,0x30,0xdc,0x03,0x00,0x7d,0x01

# GFX10: global_load_dword v1, v[3:4], s[2:3] ; encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x02,0x01]
# GFX10: global_load_dword v1, v3, s[2:3] ; encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x02,0x01]
0x00,0x80,0x30,0xdc,0x03,0x00,0x02,0x01

# GFX10: global_load_dword v1, v[3:4], s[2:3] offset:2040 ; encoding: [0xf8,0x87,0x30,0xdc,0x03,0x00,0x02,0x01]
# GFX10: global_load_dword v1, v3, s[2:3] offset:2040 ; encoding: [0xf8,0x87,0x30,0xdc,0x03,0x00,0x02,0x01]
0xf8,0x87,0x30,0xdc,0x03,0x00,0x02,0x01

# GFX10: global_load_dword v1, v[3:4], s[2:3] offset:24 ; encoding: [0x18,0x80,0x30,0xdc,0x03,0x00,0x02,0x01]
# GFX10: global_load_dword v1, v3, s[2:3] offset:24 ; encoding: [0x18,0x80,0x30,0xdc,0x03,0x00,0x02,0x01]
0x18,0x80,0x30,0xdc,0x03,0x00,0x02,0x01

# GFX10: global_load_dwordx2 v[1:2], v[3:4], off ; encoding: [0x00,0x80,0x34,0xdc,0x03,0x00,0x7d,0x01]
Expand Down Expand Up @@ -8315,7 +8315,7 @@
# GFX10: global_store_byte_d16_hi v[3:4], v1, off ; encoding: [0x00,0x80,0x64,0xdc,0x03,0x01,0x7d,0x00]
0x00,0x80,0x64,0xdc,0x03,0x01,0x7d,0x00

# GFX10: global_store_dword v[3:4], v1, exec ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00]
# GFX10: global_store_dword v3, v1, exec ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00]
0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00

# GFX10: global_store_dword v[3:4], v1, off ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00]
Expand All @@ -8327,13 +8327,13 @@
# GFX10: global_store_dword v[3:4], v1, off offset:12 ; encoding: [0x0c,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00]
0x0c,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00

# GFX10: global_store_dword v[3:4], v1, s[2:3] ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x02,0x00]
# GFX10: global_store_dword v3, v1, s[2:3] ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x02,0x00]
0x00,0x80,0x70,0xdc,0x03,0x01,0x02,0x00

# GFX10: global_store_dword v[3:4], v1, s[2:3] offset:2040 ; encoding: [0xf8,0x87,0x70,0xdc,0x03,0x01,0x02,0x00]
# GFX10: global_store_dword v3, v1, s[2:3] offset:2040 ; encoding: [0xf8,0x87,0x70,0xdc,0x03,0x01,0x02,0x00]
0xf8,0x87,0x70,0xdc,0x03,0x01,0x02,0x00

# GFX10: global_store_dword v[3:4], v1, s[2:3] offset:24 ; encoding: [0x18,0x80,0x70,0xdc,0x03,0x01,0x02,0x00]
# GFX10: global_store_dword v3, v1, s[2:3] offset:24 ; encoding: [0x18,0x80,0x70,0xdc,0x03,0x01,0x02,0x00]
0x18,0x80,0x70,0xdc,0x03,0x01,0x02,0x00

# GFX10: global_store_dwordx2 v[3:4], v[1:2], off ; encoding: [0x00,0x80,0x74,0xdc,0x03,0x01,0x7d,0x00]
Expand Down