Skip to content

Commit

Permalink
Fixed/Recommitted r267733 "[AMDGPU][llvm-mc] Add support of TTMP quad…
Browse files Browse the repository at this point in the history
…s. Rework M0 exclusion for SMRD."

Previously reverted by r267752.

r267733 review:
Differential Revision: http://reviews.llvm.org/D19342

llvm-svn: 268066
  • Loading branch information
atamazov committed Apr 29, 2016
1 parent e801f6a commit 38e496b
Show file tree
Hide file tree
Showing 9 changed files with 147 additions and 19 deletions.
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Expand Up @@ -644,13 +644,14 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
default: return -1;
case 1: return AMDGPU::TTMP_32RegClassID;
case 2: return AMDGPU::TTMP_64RegClassID;
case 4: return AMDGPU::TTMP_128RegClassID;
}
} else if (Is == IS_SGPR) {
switch (RegWidth) {
default: return -1;
case 1: return AMDGPU::SGPR_32RegClassID;
case 2: return AMDGPU::SGPR_64RegClassID;
case 4: return AMDGPU::SReg_128RegClassID;
case 4: return AMDGPU::SGPR_128RegClassID;
case 8: return AMDGPU::SReg_256RegClassID;
case 16: return AMDGPU::SReg_512RegClassID;
}
Expand Down
11 changes: 6 additions & 5 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Expand Up @@ -66,8 +66,8 @@ DECODE_OPERAND(VReg_64)
DECODE_OPERAND(VReg_96)
DECODE_OPERAND(VReg_128)

DECODE_OPERAND(SGPR_32)
DECODE_OPERAND(SReg_32)
DECODE_OPERAND(SReg_32_XM0)
DECODE_OPERAND(SReg_64)
DECODE_OPERAND(SReg_128)
DECODE_OPERAND(SReg_256)
Expand Down Expand Up @@ -237,17 +237,18 @@ MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const {
return createRegOperand(AMDGPU::VReg_128RegClassID, Val);
}

MCOperand AMDGPUDisassembler::decodeOperand_SGPR_32(unsigned Val) const {
return createSRegOperand(AMDGPU::SGPR_32RegClassID, Val);
}

MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
// table-gen generated disassembler doesn't care about operand types
// leaving only registry class so SSrc_32 operand turns into SReg_32
// and therefore we accept immediates and literals here as well
return decodeSrcOp(OP32, Val);
}

MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0(unsigned Val) const {
// SReg_32_XM0 is SReg_32 without M0
return decodeOperand_SReg_32(Val);
}

MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
// see decodeOperand_SReg_32 comment
return decodeSrcOp(OP64, Val);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
Expand Up @@ -62,8 +62,8 @@ namespace llvm {
MCOperand decodeOperand_VReg_96(unsigned Val) const;
MCOperand decodeOperand_VReg_128(unsigned Val) const;

MCOperand decodeOperand_SGPR_32(unsigned Val) const;
MCOperand decodeOperand_SReg_32(unsigned Val) const;
MCOperand decodeOperand_SReg_32_XM0(unsigned Val) const;
MCOperand decodeOperand_SReg_64(unsigned Val) const;
MCOperand decodeOperand_SReg_128(unsigned Val) const;
MCOperand decodeOperand_SReg_256(unsigned Val) const;
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
Expand Up @@ -250,9 +250,12 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O,
} else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(reg)) {
Type = "v";
NumRegs = 4;
} else if (MRI.getRegClass(AMDGPU::SReg_128RegClassID).contains(reg)) {
} else if (MRI.getRegClass(AMDGPU::SGPR_128RegClassID).contains(reg)) {
Type = "s";
NumRegs = 4;
} else if (MRI.getRegClass(AMDGPU::TTMP_128RegClassID).contains(reg)) {
Type = "ttmp";
NumRegs = 4;
} else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(reg)) {
Type = "v";
NumRegs = 3;
Expand Down
14 changes: 7 additions & 7 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Expand Up @@ -60,17 +60,17 @@ defm EXP : EXP_m;
// SMRD Instructions
//===----------------------------------------------------------------------===//

// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
// SMRD instructions, because the SGPR_32 register class does not include M0
// We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit
// SMRD instructions, because the SReg_32_XM0 register class does not include M0
// and writing to M0 from an SMRD instruction will hang the GPU.
defm S_LOAD_DWORD : SMRD_Helper <smrd<0x00>, "s_load_dword", SReg_64, SGPR_32>;
defm S_LOAD_DWORD : SMRD_Helper <smrd<0x00>, "s_load_dword", SReg_64, SReg_32_XM0>;
defm S_LOAD_DWORDX2 : SMRD_Helper <smrd<0x01>, "s_load_dwordx2", SReg_64, SReg_64>;
defm S_LOAD_DWORDX4 : SMRD_Helper <smrd<0x02>, "s_load_dwordx4", SReg_64, SReg_128>;
defm S_LOAD_DWORDX8 : SMRD_Helper <smrd<0x03>, "s_load_dwordx8", SReg_64, SReg_256>;
defm S_LOAD_DWORDX16 : SMRD_Helper <smrd<0x04>, "s_load_dwordx16", SReg_64, SReg_512>;

defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
smrd<0x08>, "s_buffer_load_dword", SReg_128, SGPR_32
smrd<0x08>, "s_buffer_load_dword", SReg_128, SReg_32_XM0
>;

defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
Expand Down Expand Up @@ -2087,9 +2087,9 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
}

// It's unclear whether you can use M0 as the output of v_readlane_b32
// instructions, so use SGPR_32 register class for spills to prevent
// instructions, so use SReg_32_XM0 register class for spills to prevent
// this from happening.
defm SI_SPILL_S32 : SI_SPILL_SGPR <SGPR_32>;
defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32_XM0>;
defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>;
defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
Expand Down Expand Up @@ -3431,7 +3431,7 @@ def : ZExt_i64_i1_Pat<anyext>;
def : Pat <
(i64 (sext i32:$src)),
(REG_SEQUENCE SReg_64, $src, sub0,
(i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, 31), SGPR_32)), sub1)
(i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, 31), SReg_32_XM0)), sub1)
>;

def : Pat <
Expand Down
23 changes: 19 additions & 4 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Expand Up @@ -132,7 +132,7 @@ def SGPR_64Regs : RegisterTuples<[sub0, sub1],
(add (decimate (shl SGPR_32, 1), 2))]>;

// SGPR 128-bit registers
def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
def SGPR_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
[(add (decimate SGPR_32, 4)),
(add (decimate (shl SGPR_32, 1), 4)),
(add (decimate (shl SGPR_32, 2), 4)),
Expand Down Expand Up @@ -255,6 +255,13 @@ def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)
>;

// Subset of SReg_32 without M0 for SMRD instructions and alike.
// See comments in SIInstructions.td for more info.
def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32], 32,
(add SGPR_32, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)
>;

def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)>;

def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add TTMP_64Regs)> {
Expand All @@ -265,11 +272,19 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32,
(add SGPR_64, VCC, EXEC, FLAT_SCR, TTMP_64, TBA, TMA)
>;

def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128)> {
// Requires 2 s_mov_b64 to copy
let CopyCost = 2;
// Requires 2 s_mov_b64 to copy
let CopyCost = 2 in {

def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128Regs)>;

def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add TTMP_128Regs)> {
let isAllocatable = 0;
}

def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128, TTMP_128)>;

} // End CopyCost = 2

def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> {
// Requires 4 s_mov_b64 to copy
let CopyCost = 4;
Expand Down
68 changes: 68 additions & 0 deletions llvm/test/MC/AMDGPU/mubuf.s
Expand Up @@ -18,6 +18,10 @@ buffer_load_dword v1, off, s[4:7], s1
// SICI: buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
// VI: buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]

buffer_load_dword v1, off, ttmp[4:7], s1
// SICI: buffer_load_dword v1, off, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x1d,0x01]
// VI: buffer_load_dword v1, off, ttmp[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x1d,0x01]

buffer_load_dword v1, off, s[4:7], s1 offset:4
// SICI: buffer_load_dword v1, off, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
// VI: buffer_load_dword v1, off, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
Expand All @@ -42,6 +46,10 @@ buffer_load_dword v1, off, s[4:7], s1 offset:4 glc slc tfe
// SICI: buffer_load_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
// VI: buffer_load_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x52,0xe0,0x00,0x01,0x81,0x01]

buffer_load_dword v1, off, ttmp[4:7], s1 offset:4 glc slc tfe
// SICI: buffer_load_dword v1, off, ttmp[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xdd,0x01]
// VI: buffer_load_dword v1, off, ttmp[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x52,0xe0,0x00,0x01,0x9d,0x01]

//===----------------------------------------------------------------------===//
// load - vgpr offset
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -74,6 +82,10 @@ buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe
// SICI: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
// VI: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x52,0xe0,0x02,0x01,0x81,0x01]

buffer_load_dword v1, v2, ttmp[4:7], s1 offen offset:4 glc slc tfe
// SICI: buffer_load_dword v1, v2, ttmp[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xdd,0x01]
// VI: buffer_load_dword v1, v2, ttmp[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x52,0xe0,0x02,0x01,0x9d,0x01]

//===----------------------------------------------------------------------===//
// load - vgpr index
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -106,6 +118,10 @@ buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe
// SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
// VI: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x52,0xe0,0x02,0x01,0x81,0x01]

buffer_load_dword v1, v2, ttmp[4:7], s1 idxen offset:4 glc slc tfe
// SICI: buffer_load_dword v1, v2, ttmp[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xdd,0x01]
// VI: buffer_load_dword v1, v2, ttmp[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x52,0xe0,0x02,0x01,0x9d,0x01]

//===----------------------------------------------------------------------===//
// load - vgpr index and offset
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -138,6 +154,10 @@ buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe
// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
// VI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x52,0xe0,0x02,0x01,0x81,0x01]

buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe
// SICI: buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xdd,0x71]
// VI: buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x52,0xe0,0x02,0x01,0x9d,0x71]

//===----------------------------------------------------------------------===//
// load - addr64
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -170,6 +190,10 @@ buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe
// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
// NOVI: error: instruction not supported on this GPU

buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe
// SICI: buffer_load_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xdd,0x71]
// NOVI: error: instruction not supported on this GPU

//===----------------------------------------------------------------------===//
// store - immediate offset only
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -202,6 +226,10 @@ buffer_store_dword v1, off, s[4:7], s1 offset:4 glc slc tfe
// SICI: buffer_store_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
// VI: buffer_store_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x72,0xe0,0x00,0x01,0x81,0x01]

buffer_store_dword v1, off, ttmp[4:7], ttmp1 offset:4 glc slc tfe
// SICI: buffer_store_dword v1, off, ttmp[4:7], ttmp1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xdd,0x71]
// VI: buffer_store_dword v1, off, ttmp[4:7], ttmp1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x72,0xe0,0x00,0x01,0x9d,0x71]

//===----------------------------------------------------------------------===//
// store - vgpr offset
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -234,6 +262,10 @@ buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe
// SICI: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
// VI: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x72,0xe0,0x02,0x01,0x81,0x01]

buffer_store_dword v1, v2, ttmp[4:7], ttmp1 offen offset:4 glc slc tfe
// SICI: buffer_store_dword v1, v2, ttmp[4:7], ttmp1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xdd,0x71]
// VI: buffer_store_dword v1, v2, ttmp[4:7], ttmp1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x72,0xe0,0x02,0x01,0x9d,0x71]

//===----------------------------------------------------------------------===//
// store - vgpr index
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -266,6 +298,10 @@ buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe
// SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
// VI: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x72,0xe0,0x02,0x01,0x81,0x01]

buffer_store_dword v1, v2, ttmp[4:7], ttmp1 idxen offset:4 glc slc tfe
// SICI: buffer_store_dword v1, v2, ttmp[4:7], ttmp1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xdd,0x71]
// VI: buffer_store_dword v1, v2, ttmp[4:7], ttmp1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x72,0xe0,0x02,0x01,0x9d,0x71]

//===----------------------------------------------------------------------===//
// store - vgpr index and offset
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -298,6 +334,10 @@ buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe
// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
// VI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x72,0xe0,0x02,0x01,0x81,0x01]

buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe
// SICI: buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xdd,0x71]
// VI: buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x72,0xe0,0x02,0x01,0x9d,0x71]

//===----------------------------------------------------------------------===//
// store - addr64
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -330,6 +370,10 @@ buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe
// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
// NOVI: error: instruction not supported on this GPU

buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe
// SICI: buffer_store_dword v1, v[2:3], ttmp[4:7], ttmp1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xdd,0x71]
// NOVI: error: instruction not supported on this GPU

//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -366,10 +410,18 @@ buffer_store_format_xyzw v[1:4], off, s[4:7], s1
// SICI: buffer_store_format_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
// VI: buffer_store_format_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]

buffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1
// SICI: buffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71]
// VI: buffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x1d,0x71]

buffer_load_ubyte v1, off, s[4:7], s1
// SICI: buffer_load_ubyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x01,0x01]
// VI: buffer_load_ubyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x01,0x01]

buffer_load_ubyte v1, off, ttmp[4:7], ttmp1
// SICI: buffer_load_ubyte v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x1d,0x71]
// VI: buffer_load_ubyte v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x1d,0x71]

buffer_load_sbyte v1, off, s[4:7], s1
// SICI: buffer_load_sbyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01]
// VI: buffer_load_sbyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x44,0xe0,0x00,0x01,0x01,0x01]
Expand All @@ -386,6 +438,10 @@ buffer_load_dword v1, off, s[4:7], s1
// SICI: buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
// VI: buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]

buffer_load_dword v1, off, ttmp[4:7], ttmp1
// SICI: buffer_load_dword v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x1d,0x71]
// VI: buffer_load_dword v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x1d,0x71]

buffer_load_dwordx2 v[1:2], off, s[4:7], s1
// SICI: buffer_load_dwordx2 v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01]
// VI: buffer_load_dwordx2 v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x54,0xe0,0x00,0x01,0x01,0x01]
Expand All @@ -394,10 +450,18 @@ buffer_load_dwordx4 v[1:4], off, s[4:7], s1
// SICI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
// VI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01]

buffer_load_dwordx4 v[1:4], off, ttmp[4:7], ttmp1
// SICI: buffer_load_dwordx4 v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x1d,0x71]
// VI: buffer_load_dwordx4 v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x1d,0x71]

buffer_store_byte v1, off, s[4:7], s1
// SICI: buffer_store_byte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
// VI: buffer_store_byte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]

buffer_store_byte v1, off, ttmp[4:7], ttmp1
// SICI: buffer_store_byte v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x1d,0x71]
// VI: buffer_store_byte v1, off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x1d,0x71]

buffer_store_short v1, off, s[4:7], s1
// SICI: buffer_store_short v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
// VI: buffer_store_short v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
Expand All @@ -414,6 +478,10 @@ buffer_store_dwordx4 v[1:4], off, s[4:7], s1
// SICI: buffer_store_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x01,0x01]
// VI: buffer_store_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x01,0x01]

buffer_store_dwordx4 v[1:4], off, ttmp[4:7], ttmp1
// SICI: buffer_store_dwordx4 v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x1d,0x71]
// VI: buffer_store_dwordx4 v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x1d,0x71]

//===----------------------------------------------------------------------===//
// Cache invalidation
//===----------------------------------------------------------------------===//
Expand Down

0 comments on commit 38e496b

Please sign in to comment.