[TOSA] Add legalization for aten.index_select (#3760)

justin-ngo-arm · web-flow · commit e9ed4af9ced2 · 2024-10-04T12:24:22.000-07:00
- Add Torch to TOSA legalization for aten.index_select
- Fix createOneDimTfIndices function in TosaLegalizeCommon.cpp to
correctly convert Torch indices to TF-style indices, which is used in
convertGatherNdOp
- Update e2e tests in xfail_sets.py
- Update basic.mlir with new LIT test for aten.index_select

Signed-off-by: Justin Ngo &lt;justin.ngo@arm.com&gt;
Change-Id: I52519246183949353a3cf22f0a685fe3df8ec8ff

Signed-off-by: Justin Ngo &lt;justin.ngo@arm.com&gt;
diff --git a/lib/Conversion/TorchToTosa/TorchToTosa.cpp b/lib/Conversion/TorchToTosa/TorchToTosa.cpp
@@ -3821,6 +3821,124 @@ LogicalResult ConvertAtenOp<AtenGatherOp>::matchAndRewrite(
   return success();
 }
 
+template <>
+LogicalResult ConvertAtenOp<AtenIndexSelectOp>::matchAndRewrite(
+    AtenIndexSelectOp op, OpAdaptor adaptor,
+    ConversionPatternRewriter &rewriter) const {
+  // Not a tensor type.
+  auto input = adaptor.getSelf();
+  auto inputType = dyn_cast<RankedTensorType>(input.getType());
+  if (!inputType)
+    return rewriter.notifyMatchFailure(
+        op, "Only RankedTensorType inputs are currently supported");
+
+  auto index = adaptor.getIndex();
+  auto indexType = dyn_cast<RankedTensorType>(index.getType());
+
+  if (!indexType)
+    return rewriter.notifyMatchFailure(
+        op, "Only RankedTensorType indices are currently supported");
+
+  auto inputShape = inputType.getShape();
+  int inputRank = inputType.getRank();
+
+  if (indexType.getRank() == 0)
+    return rewriter.notifyMatchFailure(
+        op, "Rank 0 index tensor is currently not supported");
+
+  // Dynamic shape check
+  if (!inputType.hasStaticShape() || !indexType.hasStaticShape())
+    return rewriter.notifyMatchFailure(
+        op, "AtenIndexSelectOp: support for dynamic input "
+            "shape not implemented");
+
+  // index i64 to i32 for tosa compatible
+  if (indexType.getElementType() != rewriter.getIntegerType(32)) {
+    index = rewriter.create<tosa::CastOp>(
+        op->getLoc(),
+        RankedTensorType::get(indexType.getShape(),
+                              rewriter.getIntegerType(32)),
+        index);
+  }
+
+  // Get positive dim
+  int64_t dim;
+  if (!matchPattern(op.getDim(), m_TorchConstantInt(&dim)))
+    return rewriter.notifyMatchFailure(
+        op, "Value `dim` should be a torch constant int");
+  dim = toPositiveDim(dim, inputRank);
+  if (!isValidDim(dim, inputRank))
+    return rewriter.notifyMatchFailure(op, "Value `dim` is invalid");
+
+  // Get the output type
+  auto outType = getTypeConverter()->convertType(op.getType());
+
+  // Reshape and expand the index tensor to have same rank and same dimensions
+  // (except for the targeted dim) as the input
+  //
+  // For example:
+  // Input shape = (4, 5, 6)
+  // Index vector shape = (2)
+  // Targeted dim = 1
+  // Reshaped and expanded index vector shape = (4, 2, 6)
+  //
+  // By reshaping and expanding the index vector, we can supply it into the
+  // gather op to mimic the functionality of aten.index_select
+  SmallVector<int64_t> indicesInputRankShape;
+  for (int64_t i = 0; i < inputRank; i++) {
+    if (i == dim) {
+      indicesInputRankShape.push_back(indexType.getShape()[0]);
+    } else {
+      indicesInputRankShape.push_back(1);
+    }
+  }
+
+  auto indicesInputRankType =
+      RankedTensorType::get(makeShapeLLVMCompatible(indicesInputRankShape),
+                            rewriter.getIntegerType(32));
+
+  auto reshapedIndices = rewriter.create<tosa::ReshapeOp>(
+      op->getLoc(), indicesInputRankType, index,
+      rewriter.getDenseI64ArrayAttr(indicesInputRankShape));
+
+  SmallVector<int64_t> tileShape(indicesInputRankShape);
+  SmallVector<int64_t> expandedIndicesShape(indicesInputRankShape);
+  for (int64_t i = 0; i < inputRank; i++) {
+    if (tileShape[i] == 1 && i != dim) {
+      tileShape[i] = inputShape[i];
+      expandedIndicesShape[i] = inputShape[i];
+    } else {
+      tileShape[i] = 1;
+    }
+  }
+
+  auto tileType =
+      RankedTensorType::get(makeShapeLLVMCompatible(expandedIndicesShape),
+                            rewriter.getIntegerType(32));
+
+  auto expandedIndices = rewriter.create<tosa::TileOp>(
+      op->getLoc(), tileType, reshapedIndices.getResult(),
+      rewriter.getDenseI64ArrayAttr(tileShape));
+
+  // convert torch style index and dim into tf style indices
+  // tensor<[1,4,2],si64> -> tensor<[1,4,2,3],si64>
+  auto indicesTf = tosa::convertTorchIndexToTfIndices(
+      rewriter, op, input, expandedIndices.getResult(), dim);
+  if (!indicesTf)
+    return rewriter.notifyMatchFailure(
+        op, "Convert TorchIndex To TfIndices failed");
+
+  // do the tf gathernd algorithm with tf style indices as input.
+  auto result =
+      tosa::convertGatherNdOp(rewriter, op, outType, input, indicesTf.value());
+
+  if (!result) {
+    return rewriter.notifyMatchFailure(op, "Convert GatherNdOp failed");
+  }
+  rewriter.replaceOp(op, {result.value()});
+  return success();
+}
+
 template <>
 LogicalResult ConvertAtenOp<AtenIndexPutHackedTwinOp>::matchAndRewrite(
     AtenIndexPutHackedTwinOp op, OpAdaptor adaptor,
@@ -6240,6 +6358,7 @@ class ConvertTorchToTosa : public ConvertTorchToTosaBase<ConvertTorchToTosa> {
     INSERT_ATENOP_PATTERN(Aten__InterpolateSizeListScaleListOp);
     INSERT_ATENOP_PATTERN(AtenTrilOp);
     INSERT_ATENOP_PATTERN(AtenDiagonalOp);
+    INSERT_ATENOP_PATTERN(AtenIndexSelectOp);
 #undef INSERT_ATENOP_PATTERN
 
 #define INSERT_CLONE_ATENOP_PATTERN(AtenOp)                                    \
diff --git a/lib/Conversion/TorchToTosa/TosaLegalizeCommon.cpp b/lib/Conversion/TorchToTosa/TosaLegalizeCommon.cpp
@@ -23,56 +23,71 @@ namespace tosa {
 
 using namespace mlir::torch::Torch;
 
+// This function is a helper for `convertTorchIndexToTfIndices`.
+//
+// We convert PyTorch index to TensorFlow-style indices so that we can use
+// `convertGatherNdOp` and `convertScatterNdOp` functions, which lower Gather
+// and Scatter operators to TOSA using TensorFlow-style indices.
+// The difference between PyTorch/ONNX Gather/Scatter and TensorFlow
+// Gather/Scatter ops is that PyTorch/ONNX take in the dimension that you want
+// to gather/scatter elements, while in TensorFlow, the indices point directly
+// to positions that you want to gather/scatter elements.
 std::optional<Value>
 createOneDimTfIndices(PatternRewriter &rewriter, Operation *op,
                       SmallVector<int64_t> indicesOneDimShape, int32_t dim,
                       ArrayRef<int64_t> indexShape) {
   unsigned indexRank = indexShape.size();
   SmallVector<int32_t> indicesVec;         // input vec to create tosaConstant
   SmallVector<int32_t> indicesMetaElement; // torch.meshgrid inputs
-  int indicesMetaElementRepeatTimes{1};    // For torch.stack(torch.meshgrid)
 
   // Create torch.meshgrid inputs
   // Example: indexShape=[1,4,2]
   // dim0: indicesMetaElement = torch.arange(0, 1) = [0]
   // dim1: indicesMetaElement = torch.arange(0, 4) = [0,1,2,3]
   // dim2: indicesMetaElement = torch.arange(0, 2) = [0,1]
-  for (int i = 0; i < indexShape[dim]; i++) {
+  for (int i = 0; i < indexShape[dim]; i++)
     indicesMetaElement.push_back(i);
-  }
-
-  // Compute total number of meta element repeat times:
-  // = product(indexShape[0:dim]) x product(indexShape[dim+1:-1]), skip dim
-  // dim0: indicesMetaElementRepeatTimes = 1      x 4*2 = 8
-  // dim1: indicesMetaElementRepeatTimes = 1 *1   x   2 = 2
-  // dim2: indicesMetaElementRepeatTimes = 1 *1*4       = 4
-  for (int i = 0; i < static_cast<int>(indexRank); i++) {
-    if (i == dim) {
-      continue;
-    } else {
-      indicesMetaElementRepeatTimes *= indexShape[i];
-    }
-  }
 
-  if (dim != static_cast<int>(indexShape.size()) - 1) {
-    // Create one dim indices for index except for last dim
-    // Create indices raw vector.
-    // torch.stack(torch.meshgrid)
-    // dim0: indicesVec = [0 0 0 0 0 0 0 0]
-    // dim0: indicesVec = [0 0 1 1 2 2 3 3]
+  int preDimMetaElementRepeatTimes = 1;
+  int postDimMetaElementRepeatTimes = 1;
+
+  // Compute total number of times meta element range should repeat
+  // = product(indexShape[0:dim])
+  // dim0: preDimMetaElementRepeatTimes = 1
+  // dim1: preDimMetaElementRepeatTimes = 1
+  // dim2: preDimMetaElementRepeatTimes = 1 x 4 = 4
+  for (int i = 0; i < dim; i++)
+    preDimMetaElementRepeatTimes *= indexShape[i];
+
+  // Compute total number of times meta element repeat
+  // = product(indexShape[dim+1:indexRank])
+  // dim0: postDimMetaElementRepeatTimes = 4 x 2 = 8
+  // dim1: postDimMetaElementRepeatTimes = 2
+  // dim2: postDimMetaElementRepeatTimes = 1
+  for (int i = dim + 1; i < static_cast<int>(indexRank); i++)
+    postDimMetaElementRepeatTimes *= indexShape[i];
+
+  // Example using dim1:
+  // preDimMetaElementRepeatTimes = 1
+  // postDimMetaElementRepeatTimes = 2
+  // Using postDimMetaElementRepeatTimes, we get the meta element range:
+  // [0 0 1 1 2 2 3 3]
+  // Using preDimMetaElementRepeatTimes, we get the full one dim indices:
+  // [0 0 1 1 2 2 3 3]
+  //
+  // Let's use a clearer example:
+  // indexShape = [3, 4, 2]
+  // Target dim = 1
+  // => preDimMetaElementRepeatTimes = 3
+  //    postDimMetaElementRepeatTimes = 2
+  // Using postDimMetaElementRepeatTimes, we get the meta element range:
+  // [0 0 1 1 2 2]
+  // Using preDimMetaElementRepeatTimes, we get the full one dim indices:
+  // [0 0 1 1 2 2 0 0 1 1 2 2 0 0 1 1 2 2]
+  for (int i = 0; i < preDimMetaElementRepeatTimes; i++) {
     for (size_t elementId = 0; elementId < indicesMetaElement.size();
          elementId++) {
-      for (int i = 0; i < indicesMetaElementRepeatTimes; i++) {
-        indicesVec.push_back(indicesMetaElement[elementId]);
-      }
-    }
-  } else { // Create the one dim indices for last dim of index
-    // Create indices raw vector
-    // dim2: indicesVec= [0 1 0 1 0 1 0 1]
-    // Caution: indicesVec != [0 0 0 0 1 1 1 1]
-    for (int i = 0; i < indicesMetaElementRepeatTimes; i++) {
-      for (size_t elementId = 0; elementId < indicesMetaElement.size();
-           elementId++) {
+      for (int j = 0; j < postDimMetaElementRepeatTimes; j++) {
         indicesVec.push_back(indicesMetaElement[elementId]);
       }
     }
diff --git a/projects/pt1/e2e_testing/xfail_sets.py b/projects/pt1/e2e_testing/xfail_sets.py
@@ -1663,6 +1663,17 @@
 # Write the TOSA set as a "passing" set as it is very early in development
 # and very few tests work yet.
 TOSA_PASS_SET = {
+    "AtenLinalgCrossBroadcast_basic",
+    "AtenLinalgCrossCustomDim_basic",
+    "AtenLinalgCrossFloat_basic",
+    "AtenLinalgCrossInt_basic",
+    "AtenLinalgCrossNegativeDim_basic",
+    "BinaryCrossEntropyWithLogitsStaticModule_basic",
+    "IndexSelectNegativeDimModule_basic",
+    "IndexSelectSingleIdxModule_basic",
+    "IndexSelectTwoIdxModule_basic",
+    "IndexSelectWholeDimensionModule_basic",
+    "IndexSelectWholeTensorModule_basic",
     "DiagonalWithStaticShapeModule_basic",
     "EinsumStaticDiagonalDimensionModule_basic",
     "ElementwiseAtenFloorDivideBroadcastModule_basic",
@@ -2342,6 +2353,13 @@
     }
 ) - {
     ### Test failing in make_fx_tosa but not in tosa
+    "ChunkListUnpackUneven_Module_basic",
+    "ChunkListUnpack_Module_basic",
+    "SplitTensorGetItem_Module_basic",
+    "SplitTensorLastSmallerModule_basic",
+    "SplitTensorListUnpackModule_basic",
+    "SplitTensorNegativeDimModule_basic",
+    "SplitWithSizesListUnpackModule_basic",
     # Dynamic shape, has extra unsupported broadcast ops
     "Matmul_3d",
     "MatmulStaticBroadcast_basic",
@@ -3205,6 +3223,17 @@
 }
 
 FX_IMPORTER_TOSA_XFAIL_SET = {
+    "ChunkListUnpackDynamic_Module_basic",
+    "ChunkListUnpackUnevenDynamic_Module_basic",
+    "ChunkListUnpackUneven_Module_basic",
+    "ChunkListUnpack_Module_basic",
+    "SplitTensorGetItem_Module_basic",
+    "SplitTensorLastSmallerModule_basic",
+    "SplitTensorListUnpackModule_basic",
+    "SplitTensorNegativeDimModule_basic",
+    "SplitWithSizesListUnpackModule_basic",
+    "SplitWithSizes_Module_basic",
+    "ElementwiseCreateComplexModule_basic",
     "AdaptiveMaxPool1dDimOneStatic_basic",
     "AtenPolarDoubleModule_basic",
     "AtenPolarFloatModule_basic",
@@ -3302,12 +3331,6 @@
     "AtenIntTensorCharDtypeModule_basic",
     "AtenItemFpOpModule_basic",
     "AtenItemIntOpModule_basic",
-    "AtenLinalgCrossBroadcast_basic",
-    "AtenLinalgCrossCustomDim_basic",
-    "AtenLinalgCrossDynamic_basic",
-    "AtenLinalgCrossFloat_basic",
-    "AtenLinalgCrossInt_basic",
-    "AtenLinalgCrossNegativeDim_basic",
     "AtenMatmulQMixedSigni8Transpose_basic",
     "AtenMatmulQMixedSigni8_basic",
     "AtenMatmulQint8MV_basic",
@@ -3551,15 +3574,7 @@
     "IndexPutImpl3DFloatAccumulateModule_basic",
     "IndexPutImpl3DFloatNonAccumulateModule_basic",
     "IndexPutImplIndexWithNoneModule_basic",
-    "IndexSelectDynamicIndexSizeModule_basic",
-    "IndexSelectDynamicInputSizeModule_basic",
-    "IndexSelectDynamicModulebasic",
-    "IndexSelectNegativeDimModule_basic",
     "IndexSelectRank0IdxModule_basic",
-    "IndexSelectSingleIdxModule_basic",
-    "IndexSelectTwoIdxModule_basic",
-    "IndexSelectWholeDimensionModule_basic",
-    "IndexSelectWholeTensorModule_basic",
     "IndexTensorNegativeIndexModule_basic",
     "InterpolateDynamicModule_sizes_bilinear",
     "InterpolateDynamicModule_sizes_nearest",
@@ -3848,6 +3863,8 @@
 }
 
 ONNX_TOSA_XFAIL_SET = {
+    "ElementwiseCreateComplexModule_basic",
+    "ReduceAllDimFloatModule_basic",
     "AdaptiveMaxPool1dDimOneStatic_basic",
     "ScaledDotProductAttentionDifferentCausalModule_basic",
     "HstackBasicComplexModule_basic",
@@ -4269,7 +4286,6 @@
     "ElementwiseWhereSelfModule_basic",
     "EmbeddingModule1DIndices_basic",
     "EmbeddingModuleF16_basic",
-    "EmbeddingModuleI32Static_basic",
     "EmbeddingModuleI32_basic",
     "EmbeddingModuleI64_basic",
     "EmptyLikeMemoryFormatModule_basic",
@@ -4363,12 +4379,6 @@
     "IndexSelectDynamicIndexSizeModule_basic",
     "IndexSelectDynamicInputSizeModule_basic",
     "IndexSelectDynamicModulebasic",
-    "IndexSelectNegativeDimModule_basic",
-    "IndexSelectRank0IdxModule_basic",
-    "IndexSelectSingleIdxModule_basic",
-    "IndexSelectTwoIdxModule_basic",
-    "IndexSelectWholeDimensionModule_basic",
-    "IndexSelectWholeTensorModule_basic",
     "IndexTensorDyanmicInputContiguousWithNoneModule_basic",
     "IndexTensorDyanmicInputNonContiguousWithNoneModule_basic",
     "IndexTensorHackedTwinModule3dInput_basic",
@@ -4386,10 +4396,8 @@
     "IndexTensorMultiInputOneDim_basic",
     "IndexTensorMultiInputThreeIndexers_basic",
     "IndexTensorMultiInput_basic",
-    "IndexTensorNegativeIndexModule_basic",
     "IndexTensorSelectDimModule_basic",
     "IndexTensorStaticContiguousWithNoneModule_basic",
-    "IndexTensorStaticModule_basic",
     "IndexTensorStaticNonContiguousWithNoneModule_basic",
     "InterpolateDynamicModule_sizes_bilinear",
     "InterpolateDynamicModule_sizes_nearest",
@@ -4688,7 +4696,6 @@
     "ScatterValueFloatModule_basic",
     "ScatterValueIntModule_basic",
     "SelectIntModule_basic",
-    "SelectIntNegativeDimAndIndexStaticModule_basic",
     "SelectScattertModule_basic",
     "SelectScattertStaticModule_basic",
     "SignAndLogarithmOfDeterminantModule_F32",
diff --git a/test/Conversion/TorchToTosa/basic.mlir b/test/Conversion/TorchToTosa/basic.mlir