llvm-ml
diff --git a/‎mlir/include/mlir/Dialect/Linalg/Utils/Utils.h‎
Lines changed: 1 addition & 1 deletion b/‎mlir/include/mlir/Dialect/Linalg/Utils/Utils.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mlir/include/mlir/Dialect/StandardOps/IR/Ops.h‎
Lines changed: 0 additions & 25 deletions b/‎mlir/include/mlir/Dialect/StandardOps/IR/Ops.h‎
Lines changed: 0 additions & 25 deletions
diff --git a/‎mlir/include/mlir/Dialect/StandardOps/IR/Ops.td‎
Lines changed: 1 addition & 51 deletions b/‎mlir/include/mlir/Dialect/StandardOps/IR/Ops.td‎
Lines changed: 1 addition & 51 deletions
diff --git a/‎mlir/include/mlir/Dialect/Tensor/IR/Tensor.h‎
Lines changed: 34 additions & 0 deletions b/‎mlir/include/mlir/Dialect/Tensor/IR/Tensor.h‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td‎
Lines changed: 46 additions & 0 deletions b/‎mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎mlir/include/mlir/IR/OpDefinition.h‎
Lines changed: 7 additions & 0 deletions b/‎mlir/include/mlir/IR/OpDefinition.h‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎mlir/integration_test/Dialect/Linalg/CPU/test-elementwise.mlir‎
Lines changed: 2 additions & 2 deletions b/‎mlir/integration_test/Dialect/Linalg/CPU/test-elementwise.mlir‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎mlir/integration_test/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir‎
Lines changed: 5 additions & 3 deletions b/‎mlir/integration_test/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎mlir/integration_test/Dialect/Linalg/CPU/test-subtensor-insert.mlir‎
Lines changed: 4 additions & 2 deletions b/‎mlir/integration_test/Dialect/Linalg/CPU/test-subtensor-insert.mlir‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎mlir/integration_test/Dialect/Linalg/CPU/test-tensor-e2e.mlir‎
Lines changed: 2 additions & 2 deletions b/‎mlir/integration_test/Dialect/Linalg/CPU/test-tensor-e2e.mlir‎
Lines changed: 2 additions & 2 deletions
@@ -34,7 +34,7 @@ namespace linalg {
 class LinalgDependenceGraph;
 
 /// A struct containing the Linalg producer before and after fusion.
-/// When operating on tensors, `fusedProducer` may feed into a `tensor_cast` op
+/// When operating on tensors, `fusedProducer` may feed into a `tensor.cast` op
 /// before the consumer Linalg op, until enough canonicalizations have applied.
 struct FusionInfo {
   LinalgOp originalProducer;
 
@@ -354,31 +354,6 @@ computeRankReductionMask(ArrayRef<int64_t> originalShape,
 /// ```
 bool canFoldIntoConsumerOp(MemRefCastOp castOp);
 
-/// Counterpart of `canFoldIntoConsumerOp(MemRefCastOp castOp)` for tensors.
-/// Determines whether TensorCastOp casts to a more dynamic version of the
-/// source tensor. This is useful to fold a tensor_cast into a consuming op and
-/// implement canonicalization patterns for ops in different dialects that may
-/// consume the results of tensor_cast operations. Such foldable tensor_cast
-/// operations are typically inserted as `subtensor` ops and are canonicalized,
-/// to preserve the type compatibility of their uses.
-///
-/// Returns true when all conditions are met:
-/// 1. source and result are ranked tensors with same element type and rank.
-/// 2. the tensor type has more static information than the result
-///
-/// Example:
-/// ```mlir
-///   %1 = tensor_cast %0 : tensor<8x16xf32> to tensor<?x?xf32>
-///   %2 = consumer %1 ... : tensor<?x?xf32> ...
-/// ```
-///
-/// folds into:
-///
-/// ```mlir
-///   %2 = consumer %0 ... : tensor<8x16xf32> ...
-/// ```
-bool canFoldIntoConsumerOp(TensorCastOp castOp);
-
 /// Compute `lhs` `pred` `rhs`, where `pred` is one of the known integer
 /// comparison predicates.
 bool applyCmpPredicate(CmpIPredicate predicate, const APInt &lhs,
 
@@ -62,7 +62,7 @@ class CastOp<string mnemonic, list<OpTrait> traits = []> :
   let printer = [{
     return printStandardCastOp(this->getOperation(), p);
   }];
-  let verifier = [{ return ::verifyCastOp(*this); }];
+  let verifier = [{ return impl::verifyCastOp(*this, areCastCompatible); }];
 
   let hasFolder = 1;
 }
@@ -3428,56 +3428,6 @@ def TanhOp : FloatUnaryOp<"tanh"> {
   }];
 }
 
-//===----------------------------------------------------------------------===//
-// TensorCastOp
-//===----------------------------------------------------------------------===//
-
-def TensorCastOp : CastOp<"tensor_cast"> {
-  let summary = "tensor cast operation";
-  let description = [{
-    Syntax:
-
-    ```
-    operation ::= ssa-id `=` `std.tensor_cast` ssa-use `:` type `to` type
-    ```
-
-    Convert a tensor from one type to an equivalent type without changing any
-    data elements. The source and destination types must both be tensor types
-    with the same element type. If both are ranked, then the rank should be the
-    same and static dimensions should match. The operation is invalid if
-    converting to a mismatching constant dimension.
-
-    Example:
-
-    ```mlir
-    // Convert from unknown rank to rank 2 with unknown dimension sizes.
-    %2 = "std.tensor_cast"(%1) : (tensor<*xf32>) -> tensor<?x?xf32>
-    %2 = tensor_cast %1 : tensor<*xf32> to tensor<?x?xf32>
-
-    // Convert to a type with more known dimensions.
-    %3 = "std.tensor_cast"(%2) : (tensor<?x?xf32>) -> tensor<4x?xf32>
-
-    // Discard static dimension and rank information.
-    %4 = "std.tensor_cast"(%3) : (tensor<4x?xf32>) -> tensor<?x?xf32>
-    %5 = "std.tensor_cast"(%4) : (tensor<?x?xf32>) -> tensor<*xf32>
-    ```
-  }];
-
-  let arguments = (ins AnyTensor:$source);
-  let results = (outs AnyTensor);
-
-  let extraClassDeclaration = [{
-    /// Return true if `a` and `b` are valid operand and result pairs for
-    /// the operation.
-    static bool areCastCompatible(Type a, Type b);
-
-    /// The result of a tensor_cast is always a tensor.
-    TensorType getType() { return getResult().getType().cast<TensorType>(); }
-  }];
-
-  let hasCanonicalizer = 1;
-}
-
 //===----------------------------------------------------------------------===//
 // TensorLoadOp
 //===----------------------------------------------------------------------===//
 
@@ -28,4 +28,38 @@
 #define GET_OP_CLASSES
 #include "mlir/Dialect/Tensor/IR/TensorOps.h.inc"
 
+//===----------------------------------------------------------------------===//
+// Tensor Dialect Helpers
+//===----------------------------------------------------------------------===//
+
+namespace mlir {
+namespace tensor {
+
+/// Determines whether tensor::CastOp casts to a more dynamic version of the
+/// source tensor. This is useful to fold a tensor.cast into a consuming op and
+/// implement canonicalization patterns for ops in different dialects that may
+/// consume the results of tensor.cast operations. Such foldable tensor.cast
+/// operations are typically inserted as `subtensor` ops and are canonicalized,
+/// to preserve the type compatibility of their uses.
+///
+/// Returns true when all conditions are met:
+/// 1. source and result are ranked tensors with same element type and rank.
+/// 2. the tensor type has more static information than the result
+///
+/// Example:
+/// ```mlir
+///   %1 = tensor.cast %0 : tensor<8x16xf32> to tensor<?x?xf32>
+///   %2 = consumer %1 ... : tensor<?x?xf32> ...
+/// ```
+///
+/// folds into:
+///
+/// ```mlir
+///   %2 = consumer %0 ... : tensor<8x16xf32> ...
+/// ```
+bool canFoldIntoConsumerOp(CastOp castOp);
+
+} // namespace tensor
+} // namespace mlir
+
 #endif // MLIR_DIALECT_TENSOR_IR_TENSOR_H_
@@ -19,6 +19,52 @@ class Tensor_Op<string mnemonic, list<OpTrait> traits = []>
   let parser = [{ return ::parse$cppClass(parser, result); }];
 }
 
+//===----------------------------------------------------------------------===//
+// CastOp
+//===----------------------------------------------------------------------===//
+
+def Tensor_CastOp : Tensor_Op<"cast", [NoSideEffect]> {
+  let summary = "tensor cast operation";
+  let description = [{
+    Convert a tensor from one type to an equivalent type without changing any
+    data elements. The source and destination types must both be tensor types
+    with the same element type. If both are ranked, then the rank should be the
+    same and static dimensions should match. The operation is invalid if
+    converting to a mismatching constant dimension.
+
+    Example:
+
+    ```mlir
+    // Convert from unknown rank to rank 2 with unknown dimension sizes.
+    %2 = tensor.cast %1 : tensor<*xf32> to tensor<?x?xf32>
+
+    // Convert to a type with more known dimensions.
+    %3 = tensor.cast %2 : tensor<?x?xf32> to tensor<4x?xf32>
+
+    // Discard static dimension and rank information.
+    %4 = tensor.cast %3 : tensor<4x?xf32> to tensor<?x?xf32>
+    %5 = tensor.cast %4 : tensor<?x?xf32> to tensor<*xf32>
+    ```
+  }];
+
+  let arguments = (ins AnyTensor:$source);
+  let results = (outs AnyTensor:$dest);
+  let assemblyFormat = "$source attr-dict `:` type($source) `to` type($dest)";
+  let verifier = "return impl::verifyCastOp(*this, areCastCompatible);";
+
+  let extraClassDeclaration = [{
+    /// Return true if `a` and `b` are valid operand and result pairs for
+    /// the operation.
+    static bool areCastCompatible(Type a, Type b);
+
+    /// The result of a tensor.cast is always a tensor.
+    TensorType getType() { return getResult().getType().cast<TensorType>(); }
+  }];
+
+  let hasFolder = 1;
+  let hasCanonicalizer = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // ExtractOp
 //===----------------------------------------------------------------------===//
 
@@ -1775,11 +1775,18 @@ void printOneResultOp(Operation *op, OpAsmPrinter &p);
 // These functions are out-of-line implementations of the methods in CastOp,
 // which avoids them being template instantiated/duplicated.
 namespace impl {
+// TODO: Remove the parse/print/build here (new ODS functionality obsoletes the
+// need for them, but some older ODS code in `std` still depends on them).
 void buildCastOp(OpBuilder &builder, OperationState &result, Value source,
                  Type destType);
 ParseResult parseCastOp(OpAsmParser &parser, OperationState &result);
 void printCastOp(Operation *op, OpAsmPrinter &p);
+// TODO: Create a CastOpInterface with a method areCastCompatible.
+// Also, consider adding functionality to CastOpInterface to be able to perform
+// the ChainedTensorCast canonicalization generically.
 Value foldCastOp(Operation *op);
+LogicalResult verifyCastOp(Operation *op,
+                           function_ref<bool(Type, Type)> areCastCompatible);
 } // namespace impl
 } // end namespace mlir
 
 
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-elementwise-to-linalg -std-bufferize -tensor-constant-bufferize -linalg-bufferize -func-bufferize -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \
+// RUN: mlir-opt %s -convert-elementwise-to-linalg -std-bufferize -tensor-constant-bufferize -linalg-bufferize -tensor-bufferize -func-bufferize -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
@@ -8,7 +8,7 @@ func @main() {
   %b = constant dense<[10.0, 20.0, 30.0]> : tensor<3xf32>
 
   %addf = addf %a, %b : tensor<3xf32>
-  %addf_unranked = tensor_cast %addf : tensor<3xf32> to tensor<*xf32>
+  %addf_unranked = tensor.cast %addf : tensor<3xf32> to tensor<*xf32>
   call @print_memref_f32(%addf_unranked) : (tensor<*xf32>) -> ()
   // CHECK: Unranked Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [3] strides = [1] data =
   // CHECK-NEXT: [11,  22,  33]
 
@@ -1,4 +1,6 @@
-// RUN: mlir-opt %s -linalg-bufferize -std-bufferize -tensor-constant-bufferize -func-bufferize \
+// RUN: mlir-opt %s -linalg-bufferize -std-bufferize \
+// RUN: -tensor-constant-bufferize -tensor-bufferize -func-bufferize \
+// RUN: -finalizing-bufferize \
 // RUN: -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
@@ -15,14 +17,14 @@ func @main() {
   %inserted_at_position_0 = subtensor_insert %insert_val into %const[0][1][1] : tensor<1xf32> into tensor<2xf32>
   %inserted_at_position_1 = subtensor_insert %insert_val into %const[1][1][1] : tensor<1xf32> into tensor<2xf32>
 
-  %unranked_at_position_0 = tensor_cast %inserted_at_position_0 : tensor<2xf32> to tensor<*xf32>
+  %unranked_at_position_0 = tensor.cast %inserted_at_position_0 : tensor<2xf32> to tensor<*xf32>
   call @print_memref_f32(%unranked_at_position_0) : (tensor<*xf32>) -> ()
 
   //      CHECK: Unranked Memref base@ = {{0x[-9a-f]*}}
   // CHECK-SAME: rank = 1 offset = 0 sizes = [2] strides = [1] data =
   // CHECK-NEXT: [20, 10]
 
-  %unranked_at_position_1 = tensor_cast %inserted_at_position_1 : tensor<2xf32> to tensor<*xf32>
+  %unranked_at_position_1 = tensor.cast %inserted_at_position_1 : tensor<2xf32> to tensor<*xf32>
   call @print_memref_f32(%unranked_at_position_1) : (tensor<*xf32>) -> ()
 
   //      CHECK: Unranked Memref base@ = {{0x[-9a-f]*}}
 
@@ -1,4 +1,6 @@
-// RUN: mlir-opt %s -linalg-bufferize -std-bufferize -tensor-constant-bufferize -func-bufferize \
+// RUN: mlir-opt %s -linalg-bufferize -std-bufferize \
+// RUN: -tensor-constant-bufferize -tensor-bufferize -func-bufferize \
+// RUN: -finalizing-bufferize \
 // RUN: -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
@@ -9,7 +11,7 @@ func @main() {
   %insert_val = constant dense<20.0> : tensor<1xf32>
   %inserted = subtensor_insert %insert_val into %const[0][1][1] : tensor<1xf32> into tensor<2xf32>
 
-  %unranked = tensor_cast %inserted : tensor<2xf32> to tensor<*xf32>
+  %unranked = tensor.cast %inserted : tensor<2xf32> to tensor<*xf32>
   call @print_memref_f32(%unranked) : (tensor<*xf32>) -> ()
 
   //      CHECK: Unranked Memref base@ = {{0x[-9a-f]*}}
 
@@ -1,5 +1,5 @@
 // RUN: mlir-opt %s -tensor-constant-bufferize -std-bufferize -linalg-bufferize \
-// RUN: -func-bufferize -finalizing-bufferize -convert-linalg-to-loops \
+// RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize -convert-linalg-to-loops \
 // RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
@@ -19,7 +19,7 @@ func @main() {
   // Note that this is skipping a step and we would need at least some function
   // attribute to declare that this conversion is valid (e.g. when we statically
   // know that things will play nicely at the C ABI boundary).
-  %unranked = tensor_cast %0 : tensor<4xf32> to tensor<*xf32>
+  %unranked = tensor.cast %0 : tensor<4xf32> to tensor<*xf32>
   call @print_memref_f32(%unranked) : (tensor<*xf32>) -> ()
 
   //      CHECK: Unranked Memref base@ = {{0x[-9a-f]*}}