Skip to content

Commit

Permalink
Use a smaller expansion of GT_INDEX in MinOpts.
Browse files Browse the repository at this point in the history
We currently expand `GT_INDEX` nodes during morph into an explicit
bounds check followed by a load. For example, this tree:

```
[000059] ------------       /--*  LCL_VAR   int    V09 loc6
[000060] R--XG-------    /--*  INDEX     ref
[000058] ------------    |  \--*  LCL_VAR   ref    V00 arg0
[000062] -A-XG-------    *  ASG       ref
[000061] D------N----    \--*  LCL_VAR   ref    V10 loc7
```

is expanded into this tree:

```
[000060] R--XG+------       /--*  IND       ref
[000491] -----+------       |  |  /--*  CNS_INT   long   16 Fseq[#FirstElem]
[000492] -----+------       |  \--*  ADD       byref
[000488] -----+-N----       |     |     /--*  CNS_INT   long   3
[000489] -----+------       |     |  /--*  LSH       long
[000487] -----+------       |     |  |  \--*  CAST      long <- int
[000484] i----+------       |     |  |     \--*  LCL_VAR   int    V09 loc6
[000490] -----+------       |     \--*  ADD       byref
[000483] -----+------       |        \--*  LCL_VAR   ref    V00 arg0
[000493] ---XG+------    /--*  COMMA     ref
[000486] ---X-+------    |  \--*  ARR_BOUNDS_CHECK_Rng void
[000059] -----+------    |     +--*  LCL_VAR   int    V09 loc6
[000485] ---X-+------    |     \--*  ARR_LENGTH int
[000058] -----+------    |        \--*  LCL_VAR   ref    V00 arg0
[000062] -A-XG+------    *  ASG       ref
[000061] D----+-N----    \--*  LCL_VAR   ref    V10 loc7

```

Even in this simple case where both the array object and the index are
lclVars, this represents a rather large increase in the size of the IR.
In the worst case, the JIT introduces and additional lclVar for both the
array object and the index, adding several additional nodes to the tree.
When optimizing, exposing the structure of the array access may be
helpful, as it may allow the compiler to better analyze the program.
When we are not optimizing, however, the expansion serves little purpose
besides constraining the IR shapes that must be handled by the backend.
Due to its need for lclVars in the worst case, this expansion may even
bloat the size of the generated code, as all lclVar references are
generated as loads/stores from/to the stack when we are not optimizing.
In the case above, the expanded tree generates the following x64
assembly:

```
IN0018: 000092 mov      rdi, gword ptr [V00 rbp-10H]
IN0019: 000096 mov      edi, dword ptr [rdi+8]
IN001a: 000099 cmp      dword ptr [V09 rbp-48H], edi
IN001b: 00009C jae      G_M5106_IG38
IN001c: 0000A2 mov      rdi, gword ptr [V00 rbp-10H]
IN001d: 0000A6 mov      esi, dword ptr [V09 rbp-48H]
IN001e: 0000A9 movsxd   rsi, esi
IN001f: 0000AC mov      rdi, gword ptr [rdi+8*rsi+16]
IN0020: 0000B1 mov      gword ptr [V10 rbp-50H], rdi
```

Inspired by other recent experiments (e.g. dotnet#13188), this change
introduces a new node that replaces the above expansion in MinOpts. This
node, `GT_INDEX_ADDR`, represents the bounds check and address
computation involved in an array access, and returns the address of the
element that is to be loaded or stored. Using this node, the example
tree given above expands to the following:

```
[000489] a--XG+------    /--*  IND       ref
[000059] -----+------    |  |  /--*  LCL_VAR   int    V09 loc6
[000060] R--XG+--R---    |  \--*  INDEX_ADDR byref
[000058] -----+------    |     \--*  LCL_VAR   ref    V00 arg0
[000062] -A-XG+------    *  ASG       ref
[000061] D----+-N----    \--*  LCL_VAR   ref    V10 loc7
```

This expansion requires only the addition of the `GT_IND` node that
represents the memory access itself. This savings in IR size translates
to about a 2% decrease in instructions retired during non-optimizing
compilation. Furthermore, this expansion tends to generate smaller
code; for example, the tree given above is generated in 29 rather than
35 bytes:

```
IN0018: 000092 mov      edi, dword ptr [V09 rbp-48H]
IN0019: 000095 mov      rsi, gword ptr [V00 rbp-10H]
IN001a: 000099 cmp      rdi, qword ptr [rsi+8]
IN001b: 00009D jae      G_M5106_IG38
IN001c: 0000A3 lea      rsi, bword ptr [rsi+8*rdi+16]
IN001d: 0000A8 mov      rdi, gword ptr [rsi]
IN001e: 0000AB mov      gword ptr [V10 rbp-50H], rdi
```
  • Loading branch information
pgavlin committed Aug 15, 2017
1 parent ea1a4a5 commit d0483a3
Show file tree
Hide file tree
Showing 12 changed files with 373 additions and 11 deletions.
101 changes: 101 additions & 0 deletions src/jit/codegenarmarch.cpp
Expand Up @@ -1538,6 +1538,107 @@ void CodeGen::genCodeForLclFld(GenTreeLclFld* tree)
genProduceReg(tree);
}

//------------------------------------------------------------------------
// genCodeForIndexAddr: Produce code for a GT_INDEX_ADDR node.
//
// Arguments:
// tree - the GT_INDEX_ADDR node
//
void CodeGen::genCodeForIndexAddr(GenTreeIndex* node)
{
assert(node->OperIs(GT_INDEX_ADDR));

ssize_t lenOffs;
ssize_t elemOffs;
if (node->gtFlags & GTF_INX_STRING_LAYOUT)
{
lenOffs = offsetof(CORINFO_String, stringLen);
elemOffs = offsetof(CORINFO_String, chars);
}
else if (node->gtFlags & GTF_INX_REFARR_LAYOUT)
{
lenOffs = offsetof(CORINFO_RefArray, length);
elemOffs = compiler->eeGetEEInfo()->offsetOfObjArrayData;
}
else // We have a standard array
{
lenOffs = offsetof(CORINFO_Array, length);
elemOffs = offsetof(CORINFO_Array, u1Elems);
}

GenTree* const base = node->Arr();
GenTree* const index = node->Index();

genConsumeReg(base);
genConsumeReg(index);

const regNumber tmpReg = node->GetSingleTempReg();

// Generate the bounds check if necessary.
if ((node->gtFlags & GTF_INX_RNGCHK) != 0)
{
// Create a GT_IND(GT_LEA)) tree for the array length access and load the length into a register.
GenTreeAddrMode arrLenAddr(base->TypeGet(), base, nullptr, 0, static_cast<unsigned>(lenOffs));
arrLenAddr.gtRegNum = REG_NA;
arrLenAddr.SetContained();
arrLenAddr.gtNext = (GenTree*)(-1);

GenTreeIndir arrLen = indirForm(TYP_INT, &arrLenAddr);
arrLen.gtRegNum = tmpReg;
arrLen.ClearContained();

getEmitter()->emitInsLoadStoreOp(ins_Load(TYP_INT), emitTypeSize(TYP_INT), arrLen.gtRegNum, &arrLen);

#ifdef _TARGET_64BIT_
// The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case that the index
// is a native int on a 64-bit platform, we will need to widen the array length and the compare.
if (index->TypeGet() == TYP_I_IMPL)
{
// Extend the array length as needed.
getEmitter()->emitIns_R_R(ins_Move_Extend(TYP_INT, true), EA_8BYTE, arrLen.gtRegNum, arrLen.gtRegNum);
}
#endif

// Generate the range check.
getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(TYP_I_IMPL), index, &arrLen);
genJumpToThrowHlpBlk(genJumpKindForOper(GT_GE, CK_UNSIGNED), SCK_RNGCHK_FAIL, node->gtIndRngFailBB);
}

// Compute the address of the array element.
switch (node->gtIndElemSize)
{
case 1:
case 2:
case 4:
case 8:
case 16:
{
DWORD lsl;
BitScanForward(&lsl, node->gtIndElemSize);

// dest = base + index * scale
genScaledAdd(emitTypeSize(node), node->gtRegNum, base->gtRegNum, index->gtRegNum, lsl);
break;
}

default:
{
// tmp = scale
CodeGen::genSetRegToIcon(tmpReg, (ssize_t)node->gtIndElemSize, TYP_INT);

// dest = base + index * tmp
getEmitter()->emitIns_R_R_R_R(INS_MULADD, emitTypeSize(node), node->gtRegNum, node->gtRegNum, index->gtRegNum, tmpReg);
break;
}
}

// dest = dest + elemOffs
getEmitter()->emitIns_R_R_I(INS_add, emitTypeSize(node), node->gtRegNum, node->gtRegNum, elemOffs);

genProduceReg(node);
}


//------------------------------------------------------------------------
// genCodeForIndir: Produce code for a GT_IND node.
//
Expand Down
1 change: 1 addition & 0 deletions src/jit/codegenlinear.h
Expand Up @@ -167,6 +167,7 @@ void genCodeForShiftRMW(GenTreeStoreInd* storeInd);

void genCodeForCast(GenTreeOp* tree);
void genCodeForLclAddr(GenTree* tree);
void genCodeForIndexAddr(GenTreeIndex* tree);
void genCodeForIndir(GenTreeIndir* tree);
void genCodeForNegNot(GenTree* tree);
void genCodeForLclVar(GenTreeLclVar* tree);
Expand Down
103 changes: 103 additions & 0 deletions src/jit/codegenxarch.cpp
Expand Up @@ -1787,6 +1787,10 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
genLeaInstruction(treeNode->AsAddrMode());
break;

case GT_INDEX_ADDR:
genCodeForIndexAddr(treeNode->AsIndex());
break;

case GT_IND:
genCodeForIndir(treeNode->AsIndir());
break;
Expand Down Expand Up @@ -4493,6 +4497,105 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree)
}
}

//------------------------------------------------------------------------
// genCodeForIndexAddr: Produce code for a GT_INDEX_ADDR node.
//
// Arguments:
// tree - the GT_INDEX_ADDR node
//
void CodeGen::genCodeForIndexAddr(GenTreeIndex* node)
{
assert(node->OperIs(GT_INDEX_ADDR));

ssize_t lenOffs;
ssize_t elemOffs;
if (node->gtFlags & GTF_INX_STRING_LAYOUT)
{
lenOffs = offsetof(CORINFO_String, stringLen);
elemOffs = offsetof(CORINFO_String, chars);
}
else if (node->gtFlags & GTF_INX_REFARR_LAYOUT)
{
lenOffs = offsetof(CORINFO_RefArray, length);
elemOffs = compiler->eeGetEEInfo()->offsetOfObjArrayData;
}
else // We have a standard array
{
lenOffs = offsetof(CORINFO_Array, length);
elemOffs = offsetof(CORINFO_Array, u1Elems);
}

GenTree* const base = node->Arr();
GenTree* const index = node->Index();

genConsumeReg(base);
genConsumeReg(index);

regNumber tmpReg = REG_NA;

// Generate the bounds check if necessary.
if ((node->gtFlags & GTF_INX_RNGCHK) != 0)
{
// Create a GT_IND(GT_LEA)) tree for the array length access.
GenTreeAddrMode arrLenAddr(base->TypeGet(), base, nullptr, 0, static_cast<unsigned>(lenOffs));
arrLenAddr.gtRegNum = REG_NA;
arrLenAddr.SetContained();
arrLenAddr.gtNext = (GenTree*)(-1);

GenTreeIndir arrLen = indirForm(TYP_INT, &arrLenAddr);

#ifdef _TARGET_64BIT_
// The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case that the index
// is a native int on a 64-bit platform, we will need to widen the array length and the compare.
if (index->TypeGet() == TYP_I_IMPL)
{
// Load the array length into a register.
tmpReg = node->GetSingleTempReg();
arrLen.gtRegNum = tmpReg;
arrLen.ClearContained();
getEmitter()->emitInsLoadInd(ins_Load(TYP_INT), EA_4BYTE, arrLen.gtRegNum, &arrLen);
}
else
#endif
{
assert(varTypeIsIntegral(index->TypeGet()));

arrLen.gtRegNum = REG_NA;
arrLen.SetContained();
arrLen.gtNext = (GenTree*)(-1);
}

// Generate the range check.
getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(TYP_I_IMPL), index, &arrLen);
genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL, node->gtIndRngFailBB);
}

// Compute the address of the array element.
switch (node->gtIndElemSize)
{
case 1:
case 2:
case 4:
case 8:
getEmitter()->emitIns_R_ARX(INS_lea, emitTypeSize(node), node->gtRegNum, base->gtRegNum, index->gtRegNum, node->gtIndElemSize, static_cast<int>(elemOffs));
break;

default:
{
// Multiply the index by the element size.
//
// TODO-CQ: this should really just use `imul index, index, #gtIndElemSize`
tmpReg = (tmpReg == REG_NA) ? node->GetSingleTempReg() : tmpReg;
CodeGen::genSetRegToIcon(tmpReg, (ssize_t)node->gtIndElemSize, TYP_INT);
inst_RV_RV(INS_imul, tmpReg, index->gtRegNum);
getEmitter()->emitIns_R_ARX(INS_lea, emitTypeSize(node), node->gtRegNum, base->gtRegNum, tmpReg, 1, static_cast<int>(elemOffs));
break;
}
}

genProduceReg(node);
}

//------------------------------------------------------------------------
// genCodeForIndir: Produce code for a GT_IND node.
//
Expand Down
7 changes: 7 additions & 0 deletions src/jit/flowgraph.cpp
Expand Up @@ -18338,6 +18338,13 @@ void Compiler::fgSetTreeSeqHelper(GenTreePtr tree, bool isLIR)
noway_assert(!"DYN_BLK nodes should be sequenced as a special case");
break;

case GT_INDEX_ADDR:
// Evaluate the index first, then the array address
assert((tree->gtFlags & GTF_REVERSE_OPS) != 0);
fgSetTreeSeqHelper(tree->gtIndex.Index(), isLIR);
fgSetTreeSeqHelper(tree->gtIndex.Arr(), isLIR);
break;

default:
#ifdef DEBUG
gtDispTree(tree);
Expand Down
57 changes: 53 additions & 4 deletions src/jit/gentree.cpp
Expand Up @@ -299,6 +299,7 @@ void GenTree::InitNodeSize()
GenTree::s_gtNodeSizes[GT_FTN_ADDR] = TREE_NODE_SZ_LARGE;
GenTree::s_gtNodeSizes[GT_BOX] = TREE_NODE_SZ_LARGE;
GenTree::s_gtNodeSizes[GT_INDEX] = TREE_NODE_SZ_LARGE;
GenTree::s_gtNodeSizes[GT_INDEX_ADDR] = TREE_NODE_SZ_LARGE;
GenTree::s_gtNodeSizes[GT_ARR_BOUNDS_CHECK] = TREE_NODE_SZ_LARGE;
#ifdef FEATURE_SIMD
GenTree::s_gtNodeSizes[GT_SIMD_CHK] = TREE_NODE_SZ_LARGE;
Expand Down Expand Up @@ -1287,6 +1288,7 @@ bool GenTree::Compare(GenTreePtr op1, GenTreePtr op2, bool swapOK)
}
break;
case GT_INDEX:
case GT_INDEX_ADDR:
if (op1->gtIndex.gtIndElemSize != op2->gtIndex.gtIndElemSize)
{
return false;
Expand Down Expand Up @@ -1865,6 +1867,7 @@ unsigned Compiler::gtHashValue(GenTree* tree)
hash ^= tree->gtCast.gtCastType;
break;
case GT_INDEX:
case GT_INDEX_ADDR:
hash += tree->gtIndex.gtIndElemSize;
break;
case GT_ALLOCOBJ:
Expand Down Expand Up @@ -1929,6 +1932,7 @@ unsigned Compiler::gtHashValue(GenTree* tree)
case GT_ARR_INDEX:
case GT_QMARK:
case GT_INDEX:
case GT_INDEX_ADDR:
break;

#ifdef FEATURE_SIMD
Expand Down Expand Up @@ -4767,6 +4771,23 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
}
break;

case GT_INDEX_ADDR:
costEx = 6; // cmp reg,reg; jae throw; mov reg, [addrmode] (not taken)
costSz = 9; // jump to cold section

level = gtSetEvalOrder(tree->gtIndex.Index());
costEx += tree->gtIndex.Index()->gtCostEx;
costSz += tree->gtIndex.Index()->gtCostSz;

lvl2 = gtSetEvalOrder(tree->gtIndex.Arr());
if (level < lvl2)
{
level = lvl2;
}
costEx += tree->gtIndex.Arr()->gtCostEx;
costSz += tree->gtIndex.Arr()->gtCostSz;
break;

default:
#ifdef DEBUG
if (verbose)
Expand Down Expand Up @@ -5799,6 +5820,7 @@ bool GenTree::OperMayThrow()
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
case GT_INDEX_ADDR:
return true;
default:
break;
Expand Down Expand Up @@ -5863,6 +5885,15 @@ GenTree::VtablePtr GenTree::GetVtableForOper(genTreeOps oper)
}
break;

case GT_INDEX_ADDR:
{
GenTreeIntCon dummyOp0(TYP_I_IMPL, 0);
GenTreeIntCon dummyOp1(TYP_I_IMPL, 0);
GenTreeIndex index(TYP_REF, &dummyOp0, &dummyOp1, 0);
res = *reinterpret_cast<VtablePtr*>(&index);
}
break;

default:
{
// Should be unary or binary op.
Expand Down Expand Up @@ -7405,6 +7436,15 @@ GenTreePtr Compiler::gtCloneExpr(
}
break;

case GT_INDEX_ADDR:
{
GenTreeIndex* asInd = tree->AsIndex();
copy = new (this, GT_INDEX_ADDR)
GenTreeIndex(asInd->TypeGet(), asInd->Arr(), asInd->Index(), asInd->gtIndElemSize);
copy->AsIndex()->gtIndRngFailBB = asInd->gtIndRngFailBB;
}
break;

case GT_ALLOCOBJ:
{
GenTreeAllocObj* asAllocObj = tree->AsAllocObj();
Expand Down Expand Up @@ -9573,6 +9613,7 @@ void Compiler::gtDispNode(GenTreePtr tree, IndentStack* indentStack, __in __in_z
__fallthrough;

case GT_INDEX:
case GT_INDEX_ADDR:

if ((tree->gtFlags & (GTF_IND_VOLATILE | GTF_IND_UNALIGNED)) == 0) // We prefer printing V or U over R
{
Expand Down Expand Up @@ -15697,6 +15738,7 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
structHnd = tree->gtArgPlace.gtArgPlaceClsHnd;
break;
case GT_INDEX:
case GT_INDEX_ADDR:
structHnd = tree->gtIndex.gtStructElemClass;
break;
case GT_FIELD:
Expand Down Expand Up @@ -15729,10 +15771,17 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
#endif
if (tree->gtFlags & GTF_IND_ARR_INDEX)
{
ArrayInfo arrInfo;
bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
assert(b);
structHnd = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
if (tree->AsIndir()->Addr()->OperIs(GT_INDEX_ADDR))
{
structHnd = tree->AsIndir()->Addr()->AsIndex()->gtStructElemClass;
}
else
{
ArrayInfo arrInfo;
bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
assert(b);
structHnd = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
}
}
break;
#ifdef FEATURE_SIMD
Expand Down
1 change: 1 addition & 0 deletions src/jit/gentree.h
Expand Up @@ -4180,6 +4180,7 @@ struct GenTreeIndex : public GenTreeOp

unsigned gtIndElemSize; // size of elements in the array
CORINFO_CLASS_HANDLE gtStructElemClass; // If the element type is a struct, this is the struct type.
GenTreePtr gtIndRngFailBB; // Label to jump to for array-index-out-of-range

GenTreeIndex(var_types type, GenTreePtr arr, GenTreePtr ind, unsigned indElemSize)
: GenTreeOp(GT_INDEX, type, arr, ind)
Expand Down
1 change: 1 addition & 0 deletions src/jit/gtlist.h
Expand Up @@ -163,6 +163,7 @@ GTNODE(QMARK , GenTreeQmark ,0,GTK_BINOP|GTK_EXOP|GTK_NOTLIR)
GTNODE(COLON , GenTreeColon ,0,GTK_BINOP|GTK_NOTLIR)

GTNODE(INDEX , GenTreeIndex ,0,GTK_BINOP|GTK_EXOP|GTK_NOTLIR) // SZ-array-element
GTNODE(INDEX_ADDR , GenTreeIndex ,0,GTK_BINOP|GTK_EXOP) // addr of SZ-array-element

GTNODE(MKREFANY , GenTreeOp ,0,GTK_BINOP)

Expand Down

0 comments on commit d0483a3

Please sign in to comment.