[mlir][Tensor] Move ParallelInsertSlice to the tensor dialect

This is moslty NFC and will allow tensor.parallel_insert_slice to gain rank-reducing semantics by reusing the vast majority of the tensor.insert_slice impl. Depends on D128857 Differential Revision: https://reviews.llvm.org/D128920
llvm · Jul 4, 2022 · 7fbf55c · 7fbf55c
1 parent f0089fa
commit 7fbf55c
Show file tree

Hide file tree

Showing 17 changed files with 553 additions and 552 deletions.
diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td
@@ -503,115 +503,6 @@ def PerformConcurrentlyOp : SCF_Op<"foreach_thread.perform_concurrently", [
   }];
 }
 
-//===----------------------------------------------------------------------===//
-// ParallelInsertSliceOp
-//===----------------------------------------------------------------------===//
-
-// TODO: Implement PerformConcurrentlyOpInterface.
-def ParallelInsertSliceOp : SCF_Op<"foreach_thread.parallel_insert_slice", [
-       AttrSizedOperandSegments,
-       OffsetSizeAndStrideOpInterface,
-       // TODO: Cannot use an interface here atm, verify this manually for now.
-       // HasParent<"ParallelCombiningOpInterface">
-  ]> {
-  let summary = [{
-    Specify the tensor slice update of a single thread within the terminator of
-    an `scf.foreach_thread`.
-  }];
-  let description = [{
-    The parent `scf.foreach_thread` returns values that are formed by aggregating
-    the actions of all the ops contained within the `perform_concurrently`
-    terminator of all the threads, in some unspecified order.
-    The `scf.foreach_thread.parallel_insert_slice` is one such op allowed in
-    the `scf.foreach_thread.perform_concurrently` terminator.
-
-    Conflicting writes result in undefined semantics, in that the indices written
-    to by multiple parallel updates might contain data from any of the updates, or
-    even a malformed bit pattern.
-
-    If an index is updated exactly once, the value contained at that index
-    in the resulting tensor will be equal to the value at a corresponding index of a
-    slice that was used for the updated. If an index is not updated at all, its value
-    will be equal to the one in the original tensor.
-
-    This op does not create a new value, which allows maintaining a clean
-    separation between the subset and full tensor.
-    Note that we cannot mark this operation as pure (NoSideEffects), even
-    though it has no side effects, because it will get DCEd during
-    canonicalization.
-  }];
-
-  let arguments = (ins
-    AnyRankedTensor:$source,
-    AnyRankedTensor:$dest,
-    Variadic<Index>:$offsets,
-    Variadic<Index>:$sizes,
-    Variadic<Index>:$strides,
-    I64ArrayAttr:$static_offsets,
-    I64ArrayAttr:$static_sizes,
-    I64ArrayAttr:$static_strides
-  );
-  let assemblyFormat = [{
-    $source `into` $dest ``
-    custom<OperandsOrIntegersOffsetsOrStridesList>($offsets, $static_offsets)
-    custom<OperandsOrIntegersSizesList>($sizes, $static_sizes)
-    custom<OperandsOrIntegersOffsetsOrStridesList>($strides, $static_strides)
-    attr-dict `:` type($source) `into` type($dest)
-  }];
-
-  let extraClassDeclaration = [{
-    ::mlir::Operation::operand_range offsets() { return getOffsets(); }
-    ::mlir::Operation::operand_range sizes() { return getSizes(); }
-    ::mlir::Operation::operand_range strides() { return getStrides(); }
-    ::mlir::ArrayAttr static_offsets() { return getStaticOffsets(); }
-    ::mlir::ArrayAttr static_sizes() { return getStaticSizes(); }
-    ::mlir::ArrayAttr static_strides() { return getStaticStrides(); }
-
-    Type yieldedType() { return getDest().getType(); }
-
-    RankedTensorType getSourceType() {
-      return getSource().getType().cast<RankedTensorType>();
-    }
-
-    ParallelCombiningOpInterface getParallelCombiningParent() {
-      return dyn_cast<ParallelCombiningOpInterface>(
-        getOperation()->getParentOp());
-    }
-
-    /// Return the expected rank of each of the `static_offsets`, `static_sizes`
-    /// and `static_strides` attributes.
-    std::array<unsigned, 3> getArrayAttrMaxRanks() {
-      unsigned rank = getSourceType().getRank();
-      return {rank, rank, rank};
-    }
-
-    /// Return the number of leading operands before `offsets`, `sizes` and
-    /// `strides` operands.
-    static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; }
-
-    /// Return the OpResult of the enclosing ForeachThreadOp that is
-    /// corresponding to this ParallelInsertSliceOp.
-    OpResult getTiedOpResult();
-  }];
-
-  let builders = [
-    // Build a ParallelInsertSliceOp with mixed static and dynamic entries.
-    OpBuilder<(ins "Value":$source, "Value":$dest,
-      "ArrayRef<OpFoldResult>":$offsets, "ArrayRef<OpFoldResult>":$sizes,
-      "ArrayRef<OpFoldResult>":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
-
-    // Build a ParallelInsertSliceOp with dynamic entries.
-    OpBuilder<(ins "Value":$source, "Value":$dest,
-      "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
-  ];
-
-  let hasCanonicalizer = 1;
-  let hasFolder = 1;
-  let hasVerifier = 1;
-}
-
 //===----------------------------------------------------------------------===//
 // IfOp
 //===----------------------------------------------------------------------===//

diff --git a/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h b/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h
@@ -17,6 +17,7 @@
 #include "mlir/Interfaces/CastInterfaces.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/InferTypeOpInterface.h"
+#include "mlir/Interfaces/ParallelCombiningOpInterface.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
 #include "mlir/Interfaces/TilingInterface.h"
 #include "mlir/Interfaces/ViewLikeInterface.h"

diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
@@ -13,6 +13,7 @@ include "mlir/Dialect/Tensor/IR/TensorBase.td"
 include "mlir/Interfaces/CastInterfaces.td"
 include "mlir/Interfaces/ControlFlowInterfaces.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
+include "mlir/Interfaces/ParallelCombiningOpInterface.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/Interfaces/TilingInterface.td"
 include "mlir/Interfaces/ViewLikeInterface.td"
@@ -1051,6 +1052,110 @@ def Tensor_PadOp : Tensor_Op<"pad", [AttrSizedOperandSegments, NoSideEffect,
   let hasRegionVerifier = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// ParallelInsertSliceOp
+//===----------------------------------------------------------------------===//
+
+// TODO: Implement PerformConcurrentlyOpInterface.
+def Tensor_ParallelInsertSliceOp : Tensor_Op<"parallel_insert_slice", [
+       AttrSizedOperandSegments,
+       OffsetSizeAndStrideOpInterface,
+       // TODO: Cannot use an interface here atm, verify this manually for now.
+       // HasParent<"ParallelCombiningOpInterface">
+  ]> {
+  let summary = [{
+    Specify the tensor slice update of a single thread of a parent 
+    ParallelCombiningOpInterface op.
+  }];
+  let description = [{
+    The `parallel_insert_slice` yields a subset tensor value to its parent 
+    ParallelCombiningOpInterface. These subset tensor values are aggregated to
+    in some unspecified order into a full tensor value returned by the parent 
+    parallel iterating op. 
+    The `parallel_insert_slice` is one such op allowed in the 
+    ParallelCombiningOpInterface op.
+
+    Conflicting writes result in undefined semantics, in that the indices written
+    to by multiple parallel updates might contain data from any of the updates,
+    or even a malformed bit pattern.
+
+    If an index is updated exactly once, the value contained at that index
+    in the resulting tensor will be equal to the value at a corresponding index
+    of a slice that was used for the updated. If an index is not updated at all,
+    its value will be equal to the one in the original tensor.
+
+    This op does not create a new value, which allows maintaining a clean
+    separation between the subset and full tensor.
+
+    Note that we cannot mark this operation as pure (NoSideEffects), even
+    though it has no side effects, because it will get DCEd during
+    canonicalization.
+  }];
+
+  let arguments = (ins
+    AnyRankedTensor:$source,
+    AnyRankedTensor:$dest,
+    Variadic<Index>:$offsets,
+    Variadic<Index>:$sizes,
+    Variadic<Index>:$strides,
+    I64ArrayAttr:$static_offsets,
+    I64ArrayAttr:$static_sizes,
+    I64ArrayAttr:$static_strides
+  );
+  let assemblyFormat = [{
+    $source `into` $dest ``
+    custom<OperandsOrIntegersOffsetsOrStridesList>($offsets, $static_offsets)
+    custom<OperandsOrIntegersSizesList>($sizes, $static_sizes)
+    custom<OperandsOrIntegersOffsetsOrStridesList>($strides, $static_strides)
+    attr-dict `:` type($source) `into` type($dest)
+  }];
+
+  let extraClassDeclaration = [{
+    Type yieldedType() { return getDest().getType(); }
+
+    RankedTensorType getSourceType() {
+      return getSource().getType().cast<RankedTensorType>();
+    }
+
+    ParallelCombiningOpInterface getParallelCombiningParent() {
+      return dyn_cast<ParallelCombiningOpInterface>(
+        getOperation()->getParentOp());
+    }
+
+    /// Return the expected rank of each of the `static_offsets`, `static_sizes`
+    /// and `static_strides` attributes.
+    std::array<unsigned, 3> getArrayAttrMaxRanks() {
+      unsigned rank = getSourceType().getRank();
+      return {rank, rank, rank};
+    }
+
+    /// Return the number of leading operands before `offsets`, `sizes` and
+    /// `strides` operands.
+    static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; }
+
+    /// Return the OpResult of the enclosing ForeachThreadOp that is
+    /// corresponding to this ParallelInsertSliceOp.
+    OpResult getTiedOpResult();
+  }];
+
+  let builders = [
+    // Build a ParallelInsertSliceOp with mixed static and dynamic entries.
+    OpBuilder<(ins "Value":$source, "Value":$dest,
+      "ArrayRef<OpFoldResult>":$offsets, "ArrayRef<OpFoldResult>":$sizes,
+      "ArrayRef<OpFoldResult>":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+
+    // Build a ParallelInsertSliceOp with dynamic entries.
+    OpBuilder<(ins "Value":$source, "Value":$dest,
+      "ValueRange":$offsets, "ValueRange":$sizes, "ValueRange":$strides,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>
+  ];
+
+  let hasCanonicalizer = 1;
+  let hasFolder = 1;
+  let hasVerifier = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // SplatOp
 //===----------------------------------------------------------------------===//

diff --git a/mlir/lib/Dialect/SCF/IR/CMakeLists.txt b/mlir/lib/Dialect/SCF/IR/CMakeLists.txt
@@ -13,7 +13,6 @@ add_mlir_dialect_library(MLIRSCFDialect
   MLIRControlFlowDialect
   MLIRIR
   MLIRLoopLikeInterface
-  MLIRParallelCombiningOpInterface
   MLIRSideEffectInterfaces
   )