Skip to content

Commit

Permalink
[NVPTX] [TableGen] Use new features of TableGen to simplify and clarify.
Browse files Browse the repository at this point in the history
Differential Revision: https://reviews.llvm.org/D90861
  • Loading branch information
Paul C. Anagnostopoulos committed Nov 6, 2020
1 parent 7914e4f commit eed768b
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 59 deletions.
32 changes: 16 additions & 16 deletions llvm/lib/Target/NVPTX/NVPTXInstrFormats.td
Expand Up @@ -31,28 +31,28 @@ class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern>

// TSFlagFields
bits<4> VecInstType = VecNOP.Value;
bit IsSimpleMove = 0;
bit IsLoad = 0;
bit IsStore = 0;
bit IsSimpleMove = false;
bit IsLoad = false;
bit IsStore = false;

bit IsTex = 0;
bit IsSust = 0;
bit IsSurfTexQuery = 0;
bit IsTexModeUnified = 0;
bit IsTex = false;
bit IsSust = false;
bit IsSurfTexQuery = false;
bit IsTexModeUnified = false;

// The following field is encoded as log2 of the vector size minus one,
// with 0 meaning the operation is not a surface instruction. For example,
// if IsSuld == 2, then the instruction is a suld instruction with vector size
// 2**(2-1) = 2.
bits<2> IsSuld = 0;

let TSFlags{3-0} = VecInstType;
let TSFlags{4-4} = IsSimpleMove;
let TSFlags{5-5} = IsLoad;
let TSFlags{6-6} = IsStore;
let TSFlags{7} = IsTex;
let TSFlags{9-8} = IsSuld;
let TSFlags{10} = IsSust;
let TSFlags{11} = IsSurfTexQuery;
let TSFlags{12} = IsTexModeUnified;
let TSFlags{3...0} = VecInstType;
let TSFlags{4...4} = IsSimpleMove;
let TSFlags{5...5} = IsLoad;
let TSFlags{6...6} = IsStore;
let TSFlags{7} = IsTex;
let TSFlags{9...8} = IsSuld;
let TSFlags{10} = IsSust;
let TSFlags{11} = IsSurfTexQuery;
let TSFlags{12} = IsTexModeUnified;
}
26 changes: 13 additions & 13 deletions llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Expand Up @@ -13,7 +13,7 @@
include "NVPTXInstrFormats.td"

// A NOP instruction
let hasSideEffects = 0 in {
let hasSideEffects = false in {
def NOP : NVPTXInst<(outs), (ins), "", []>;
}

Expand Down Expand Up @@ -407,7 +407,7 @@ multiclass F2<string OpcStr, SDNode OpNode> {
// Type Conversion
//-----------------------------------

let hasSideEffects = 0 in {
let hasSideEffects = false in {
// Generate a cvt to the given type from all possible types. Each instance
// takes a CvtMode immediate that defines the conversion mode to use. It can
// be CvtNONE to omit a conversion mode.
Expand Down Expand Up @@ -1367,7 +1367,7 @@ multiclass BFE<string TyStr, RegisterClass RC> {
!strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
}

let hasSideEffects = 0 in {
let hasSideEffects = false in {
defm BFE_S32 : BFE<"s32", Int32Regs>;
defm BFE_U32 : BFE<"u32", Int32Regs>;
defm BFE_S64 : BFE<"s64", Int64Regs>;
Expand All @@ -1381,7 +1381,7 @@ let hasSideEffects = 0 in {
// FIXME: This doesn't cover versions of set and setp that combine with a
// boolean predicate, e.g. setp.eq.and.b16.

let hasSideEffects = 0 in {
let hasSideEffects = false in {
multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr :
NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp),
Expand Down Expand Up @@ -1427,7 +1427,7 @@ def SETP_f16x2rr :
// "set.CmpOp{.ftz}.dtype.stype", where dtype is the type of the destination
// reg, either u32, s32, or f32. Anyway these aren't used at the moment.

let hasSideEffects = 0 in {
let hasSideEffects = false in {
multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr : NVPTXInst<(outs Int32Regs:$dst),
(ins RC:$a, RC:$b, CmpMode:$cmp),
Expand Down Expand Up @@ -1462,7 +1462,7 @@ defm SET_f64 : SET<"f64", Float64Regs, f64imm>;

// selp instructions that don't have any pattern matches; we explicitly use
// them within this file.
let hasSideEffects = 0 in {
let hasSideEffects = false in {
multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr : NVPTXInst<(outs RC:$dst),
(ins RC:$a, RC:$b, Int1Regs:$p),
Expand Down Expand Up @@ -1572,7 +1572,7 @@ def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
[(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;

// Get pointer to local stack.
let hasSideEffects = 0 in {
let hasSideEffects = false in {
def MOV_DEPOT_ADDR : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
"mov.u32 \t$d, __local_depot$num;", []>;
def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
Expand Down Expand Up @@ -1988,7 +1988,7 @@ def ProxyReg :
SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;

let mayLoad = 1 in {
let mayLoad = true in {
class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
!strconcat("ld.param", opstr, " \t$dst, [retval0+$b];"),
Expand All @@ -2013,7 +2013,7 @@ class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
!strconcat("mov", opstr, " \t$dst, retval$b;"),
[(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;

let mayStore = 1 in {
let mayStore = true in {
class StoreParamInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
!strconcat("st.param", opstr, " \t[param$a+$b], $val;"),
Expand Down Expand Up @@ -2823,7 +2823,7 @@ def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;


let hasSideEffects = 0 in {
let hasSideEffects = false in {
// pack a set of smaller int registers to a larger int register
def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
(ins Int16Regs:$s1, Int16Regs:$s2,
Expand Down Expand Up @@ -2856,7 +2856,7 @@ let hasSideEffects = 0 in {

}

let hasSideEffects = 0 in {
let hasSideEffects = false in {
// Extract element of f16x2 register. PTX does not provide any way
// to access elements of f16x2 vector directly, so we need to
// extract it using a temporary register.
Expand Down Expand Up @@ -2899,7 +2899,7 @@ let hasSideEffects = 0 in {
}

// Count leading zeros
let hasSideEffects = 0 in {
let hasSideEffects = false in {
def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
"clz.b32 \t$d, $a;", []>;
def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
Expand Down Expand Up @@ -2937,7 +2937,7 @@ def : Pat<(i32 (zext (i16 (ctlz Int16Regs:$a)))),
(SUBi32ri (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 16)>;

// Population count
let hasSideEffects = 0 in {
let hasSideEffects = false in {
def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
"popc.b32 \t$d, $a;", []>;
def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
Expand Down
57 changes: 29 additions & 28 deletions llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Expand Up @@ -51,19 +51,19 @@ def ptx : PTX;
// Generates list of n sequential register names.
// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
class RegSeq<int n, string prefix> {
list<string> ret = !if(n, !listconcat(RegSeq<!add(n,-1), prefix>.ret,
[prefix # !add(n, -1)]),
list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
[prefix # !sub(n, 1)]),
[]);
}

class THREADMASK_INFO<bit sync> {
list<bit> ret = !if(sync, [0,1], [0]);
list<bit> ret = !if(sync, [0, 1], [0]);
}

//-----------------------------------
// Synchronization and shuffle functions
//-----------------------------------
let isConvergent = 1 in {
let isConvergent = true in {
def INT_BARRIER0 : NVPTXInst<(outs), (ins),
"bar.sync \t0;",
[(int_nvvm_barrier0)]>;
Expand Down Expand Up @@ -173,12 +173,12 @@ class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
)];
}

foreach sync = [0, 1] in {
foreach sync = [false, true] in {
foreach mode = ["up", "down", "bfly", "idx"] in {
foreach regclass = ["i32", "f32"] in {
foreach return_pred = [0, 1] in {
foreach offset_imm = [0, 1] in {
foreach mask_imm = [0, 1] in {
foreach return_pred = [false, true] in {
foreach offset_imm = [false, true] in {
foreach mask_imm = [false, true] in {
foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
def : SHFL_INSTR<sync, mode, regclass, return_pred,
offset_imm, mask_imm, threadmask_imm>,
Expand Down Expand Up @@ -274,7 +274,7 @@ defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_s
defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
i64imm>;

} // isConvergent = 1
} // isConvergent = true

//-----------------------------------
// Explicit Memory Fence Functions
Expand Down Expand Up @@ -1548,7 +1548,7 @@ multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
!cast<Intrinsic>(
"int_nvvm_atomic_" # OpStr
# "_" # SpaceStr # "_" # IntTypeStr
# !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
# !if(!empty(ScopeStr), "", "_" # ScopeStr)),
regclass, ImmType, Imm, ImmTy, Preds>;
}
multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
Expand All @@ -1562,7 +1562,7 @@ multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
!cast<Intrinsic>(
"int_nvvm_atomic_" # OpStr
# "_" # SpaceStr # "_" # IntTypeStr
# !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
# !if(!empty(ScopeStr), "", "_" # ScopeStr)),
regclass, ImmType, Imm, ImmTy, Preds>;
}

Expand Down Expand Up @@ -2131,7 +2131,7 @@ def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
(ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
Requires<[noHWROT32]> ;

let hasSideEffects = 0 in {
let hasSideEffects = false in {
def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
!strconcat("{{\n\t",
".reg .b32 %dummy;\n\t",
Expand All @@ -2147,7 +2147,7 @@ let hasSideEffects = 0 in {
[]> ;
}

let hasSideEffects = 0 in {
let hasSideEffects = false in {
def PACK_TWO_INT32
: NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
"mov.b64 \t$dst, {{$lo, $hi}};", []> ;
Expand All @@ -2159,7 +2159,7 @@ def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),

// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so
// no side effects.
let hasSideEffects = 0 in {
let hasSideEffects = false in {
def SHF_L_WRAP_B32_IMM
: NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
Expand Down Expand Up @@ -2242,7 +2242,7 @@ def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
// also defined in NVPTXReplaceImageHandles.cpp

// texmode_independent
let IsTex = 1, IsTexModeUnified = 0 in {
let IsTex = true, IsTexModeUnified = false in {
// Texture fetch instructions using handles
def TEX_1D_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Expand Down Expand Up @@ -2925,7 +2925,7 @@ def TLD4_A_2D_U32_F32


// texmode_unified
let IsTex = 1, IsTexModeUnified = 1 in {
let IsTex = true, IsTexModeUnified = true in {
// Texture fetch instructions using handles
def TEX_UNIFIED_1D_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Expand Down Expand Up @@ -3610,7 +3610,7 @@ def TLD4_UNIFIED_A_2D_U32_F32

//=== Surface load instructions
// .clamp variant
let IsSuld = 1 in {
let IsSuld = true in {
def SULD_1D_I8_CLAMP
: NVPTXInst<(outs Int16Regs:$r),
(ins Int64Regs:$s, Int32Regs:$x),
Expand Down Expand Up @@ -3922,7 +3922,7 @@ def SULD_3D_V4I32_CLAMP


// .trap variant
let IsSuld = 1 in {
let IsSuld = true in {
def SULD_1D_I8_TRAP
: NVPTXInst<(outs Int16Regs:$r),
(ins Int64Regs:$s, Int32Regs:$x),
Expand Down Expand Up @@ -4233,7 +4233,7 @@ def SULD_3D_V4I32_TRAP
}

// .zero variant
let IsSuld = 1 in {
let IsSuld = true in {
def SULD_1D_I8_ZERO
: NVPTXInst<(outs Int16Regs:$r),
(ins Int64Regs:$s, Int32Regs:$x),
Expand Down Expand Up @@ -4547,7 +4547,7 @@ def SULD_3D_V4I32_ZERO
// Texture Query Intrinsics
//-----------------------------------

let IsSurfTexQuery = 1 in {
let IsSurfTexQuery = true in {
def TXQ_CHANNEL_ORDER
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.channel_order.b32 \t$d, [$a];",
Expand Down Expand Up @@ -4604,7 +4604,7 @@ def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
// Surface Query Intrinsics
//-----------------------------------

let IsSurfTexQuery = 1 in {
let IsSurfTexQuery = true in {
def SUQ_CHANNEL_ORDER
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.channel_order.b32 \t$d, [$a];",
Expand Down Expand Up @@ -4663,7 +4663,7 @@ def ISTYPEP_TEXTURE

//===- Surface Stores -----------------------------------------------------===//

let IsSust = 1 in {
let IsSust = true in {
// Unformatted
// .clamp variant
def SUST_B_1D_B8_CLAMP
Expand Down Expand Up @@ -7361,7 +7361,7 @@ class WMMA_REGINFO<WMMA_REGS r>
!eq(ptx_elt_type, "b1") : Int32Regs);

// Instruction input/output arguments for the fragment.
list<NVPTXRegClass> ptx_regs = !foreach(tmp, regs, regclass);
list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));

// List of register names for the fragment -- ["ra0", "ra1",...]
list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
Expand Down Expand Up @@ -7450,12 +7450,13 @@ class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
// To match the right intrinsic, we need to build AS-constrained PatFrag.
// Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
// Build PatFrag that only matches particular address space.
PatFrag IntrFrag = PatFrag<PFOperands,
!foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
PFOperandsIntr,
!cond(!eq(Space, ".shared"): AS_match.shared,
!eq(Space, ".global"): AS_match.global,
1: AS_match.generic)>;
true: AS_match.generic)>;
// Build AS-constrained pattern.
let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;

Expand Down Expand Up @@ -7490,14 +7491,14 @@ class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
// To match the right intrinsic, we need to build AS-constrained PatFrag.
// Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
dag PFOperands = !con((ops node:$dst),
!dag(ops, !foreach(tmp, Frag.regs, node), Frag.reg_names),
!dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
!if(WithStride, (ops node:$ldm), (ops)));
// Build PatFrag that only matches particular address space.
PatFrag IntrFrag = PatFrag<PFOperands,
!foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
!cond(!eq(Space, ".shared"): AS_match.shared,
!eq(Space, ".global"): AS_match.global,
1: AS_match.generic)>;
true: AS_match.generic)>;
// Build AS-constrained pattern.
let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;

Expand All @@ -7518,7 +7519,7 @@ class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
// Create all load/store variants
defset list<WMMA_INSTR> MMA_LDSTs = {
foreach layout = ["row", "col"] in {
foreach stride = [0, 1] in {
foreach stride = [false, true] in {
foreach space = [".global", ".shared", ""] in {
foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
foreach frag = NVVM_MMA_OPS.all_ld_ops in
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
Expand Up @@ -30,7 +30,7 @@ def VRDepot : NVPTXReg<"%Depot">;

// We use virtual registers, but define a few physical registers here to keep
// SDAG and the MachineInstr layers happy.
foreach i = 0-4 in {
foreach i = 0...4 in {
def P#i : NVPTXReg<"%p"#i>; // Predicate
def RS#i : NVPTXReg<"%rs"#i>; // 16-bit
def R#i : NVPTXReg<"%r"#i>; // 32-bit
Expand All @@ -47,7 +47,7 @@ foreach i = 0-4 in {
def da#i : NVPTXReg<"%da"#i>;
}

foreach i = 0-31 in {
foreach i = 0...31 in {
def ENVREG#i : NVPTXReg<"%envreg"#i>;
}

Expand Down

0 comments on commit eed768b

Please sign in to comment.