llvm
diff --git a/‎mlir/include/mlir/Dialect/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎mlir/include/mlir/Dialect/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎mlir/include/mlir/Dialect/Mesh/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎mlir/include/mlir/Dialect/Mesh/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎mlir/include/mlir/Dialect/Mesh/IR/CMakeLists.txt
Lines changed: 13 additions & 0 deletions b/‎mlir/include/mlir/Dialect/Mesh/IR/CMakeLists.txt
Lines changed: 13 additions & 0 deletions
diff --git a/‎mlir/include/mlir/Dialect/Mesh/IR/MeshBase.td
Lines changed: 128 additions & 0 deletions b/‎mlir/include/mlir/Dialect/Mesh/IR/MeshBase.td
Lines changed: 128 additions & 0 deletions
diff --git a/‎mlir/include/mlir/Dialect/Mesh/IR/MeshOps.h
Lines changed: 27 additions & 0 deletions b/‎mlir/include/mlir/Dialect/Mesh/IR/MeshOps.h
Lines changed: 27 additions & 0 deletions
diff --git a/‎mlir/include/mlir/Dialect/Mesh/IR/MeshOps.td
Lines changed: 174 additions & 0 deletions b/‎mlir/include/mlir/Dialect/Mesh/IR/MeshOps.td
Lines changed: 174 additions & 0 deletions
diff --git a/‎mlir/include/mlir/IR/OpImplementation.h
Lines changed: 13 additions & 1 deletion b/‎mlir/include/mlir/IR/OpImplementation.h
Lines changed: 13 additions & 1 deletion
diff --git a/‎mlir/include/mlir/InitAllDialects.h
Lines changed: 2 additions & 0 deletions b/‎mlir/include/mlir/InitAllDialects.h
Lines changed: 2 additions & 0 deletions
@@ -19,6 +19,7 @@ add_subdirectory(Linalg)
 add_subdirectory(LLVMIR)
 add_subdirectory(Math)
 add_subdirectory(MemRef)
+add_subdirectory(Mesh)
 add_subdirectory(MLProgram)
 add_subdirectory(NVGPU)
 add_subdirectory(OpenACC)
 
@@ -0,0 +1 @@
+add_subdirectory(IR)
@@ -0,0 +1,13 @@
+add_mlir_dialect(MeshOps mesh)
+add_mlir_doc(MeshOps MeshOps Dialects/ -gen-op-doc)
+
+set(LLVM_TARGET_DEFINITIONS MeshBase.td)
+mlir_tablegen(MeshOpsAttributes.h.inc -gen-attrdef-decls)
+mlir_tablegen(MeshOpsAttributes.cpp.inc -gen-attrdef-defs)
+add_public_tablegen_target(MLIRMeshOpsAttrIncGen)
+add_mlir_doc(MeshOps MeshAttributes Dialects/ -gen-attrdef-doc)
+
+set(LLVM_TARGET_DEFINITIONS MeshBase.td)
+mlir_tablegen(MeshOpsEnums.h.inc -gen-enum-decls)
+mlir_tablegen(MeshOpsEnums.cpp.inc -gen-enum-defs)
+add_public_tablegen_target(MLIRMeshOpsEnumsIncGen)
@@ -0,0 +1,128 @@
+//===- MeshBase.td - Mesh Dialect --------------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_MESH_IR_MESHBASE_TD
+#define MLIR_DIALECT_MESH_IR_MESHBASE_TD
+
+include "mlir/IR/OpBase.td"
+include "mlir/IR/AttrTypeBase.td"
+include "mlir/IR/BuiltinTypeInterfaces.td"
+include "mlir/IR/EnumAttr.td"
+
+//===----------------------------------------------------------------------===//
+// Mesh Dialect
+//===----------------------------------------------------------------------===//
+
+def Mesh_Dialect : Dialect {
+  let name = "mesh";
+  let cppNamespace = "::mlir::mesh";
+
+  let description = [{
+    The `mesh` dialect contains a set of attributes, operations, interfaces that
+    are useful for representing sharding and communication on device mesh
+    cluster.
+  }];
+
+  let dependentDialects = [
+    "arith::ArithDialect" // For materializeConstant()
+  ];
+
+  let useDefaultAttributePrinterParser = 1;
+  let hasConstantMaterializer = 1;
+}
+//===----------------------------------------------------------------------===//
+// Mesh Enums.
+//===----------------------------------------------------------------------===//
+
+def Mesh_Partial : I32EnumAttr<"Partial", "partial type of a distributed tensor", [
+  I32EnumAttrCase<"Sum", 1, "sum">,
+  I32EnumAttrCase<"Max", 2, "max">,
+  I32EnumAttrCase<"Min", 3, "min">,
+  I32EnumAttrCase<"Generic", 100, "generic">
+]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::mlir::mesh";
+}
+
+//===----------------------------------------------------------------------===//
+// Mesh Attribute
+//===----------------------------------------------------------------------===//
+
+def MeshSharding : AttrDef<Mesh_Dialect, "MeshSharding"> {
+  let mnemonic = "shard";
+
+  let parameters = (ins
+    AttrParameter<"::mlir::SymbolRefAttr", "cluster placed">:$cluster,
+    ArrayRefParameter<"::mlir::DenseI8ArrayAttr">:$split_axes,
+    OptionalArrayRefParameter<"int8_t">:$partial_axes,
+    OptionalParameter<"::mlir::mesh::Partial">:$partial_type
+  );
+
+  let summary = "Attribute that extends tensor type to distributed tensor type.";
+
+  let description = [{
+    The MeshSharding attribute could be used in the encoding of a
+    `RankedTensorType` or the mesh.shard op. it contains three sub-attributes:
+
+    1. `cluster`: this attribute is a SymbolRefAttr that refers to the mesh
+    cluster where the distributed tensor is placed. The symbol must resolve to a
+    `mesh.cluster` operation.
+
+    2. `split_axes`: is an array composed of int64_t sub-arrays. The outer array's
+    maximum size is the `rank` of the related tensor. For the i-th sub-array, if
+    its value is [x, y], it indicates that the tensor's i-th dimension is splitted
+    along the x and y axes of the device mesh.
+
+    3. `partial_axes`: if not empty, this signifies that the tensor is partial
+    one along the specified mesh axes. An all-reduce should be applied to obtain
+    the complete tensor, with reduction type being specified by `partial_type`.
+
+    4. `partial_type`: indicates the reduction type of the possible all-reduce
+    op. It has 4 possible values:
+    - `partial_sum`: denotes it's an all-reduce-sum
+    - `partial_max`: denotes it's an all-reduce-max
+    - `partial_min`: denotes it's an all-reduce-min
+    - `partial_generic`: denotes that the all-reduce type is complex and cannot
+    be represented merely by a simple sum, max, or min. The exact reduction
+    computation may be derived from the semantics of the corresponding operation
+    or from the reduction computation IR
+
+    Example:
+
+    ```
+    mesh.cluster @mesh0(rank = 3, dim_sizes = [2, 2, 4])
+
+    // The tensor is fully replicated on @mesh0.
+    // Currently, there must be at least one sub-array present in axes, even
+    // if it's empty. Otherwise, a parsing error will occur.
+    tensor<4x8xf32, #mesh.shard<@mesh0, [[]]>>
+
+    // The tensor is sharded on the first dimension along axis 0 of @mesh0
+    tensor<4x8xf32, #mesh.shard<@mesh0, [[0]]>
+
+    // The tensor is sharded on the first dimension along axis 0 of @mesh0 and
+    // it is also a partial_sum along mesh axis 1.
+    tensor<4x8xf32, #mesh.shard<@mesh0, [[0], [], [1]]>
+
+    // The tensor is sharded on the first dimension along axis 0 of @mesh0 and
+    // it is also a partial_max along mesh axis 1.
+    tensor<4x8xf32, #mesh.shard<@mesh0, [[0]], partial = max[1]>
+
+    // Could be used in the attribute of mesh.shard op
+    %0 = mesh.shard %arg0 to <@mesh0, [[0]]> : tensor<4x8xf32>
+    ```
+  }];
+  let assemblyFormat = [{
+    `<` $cluster `,` `[` $split_axes `]` (`,` `partial` `=` $partial_type `[`
+       $partial_axes^ `]`)? `>`
+  }];
+
+  let genVerifyDecl = 1;
+}
+
+#endif // MLIR_DIALECT_MESH_IR_MESHBASE_TD
@@ -0,0 +1,27 @@
+//===- MeshOps.h - Mesh Dialect Operations ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_MESH_IR_MESHOPS_H
+#define MLIR_DIALECT_MESH_IR_MESHOPS_H
+
+#include "mlir/Bytecode/BytecodeOpInterface.h"
+#include "mlir/IR/SymbolTable.h"
+#include "mlir/Interfaces/InferTypeOpInterface.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
+
+#include "mlir/Dialect/Mesh/IR/MeshOpsDialect.h.inc"
+
+#include "mlir/Dialect/Mesh/IR/MeshOpsEnums.h.inc"
+
+#define GET_ATTRDEF_CLASSES
+#include "mlir/Dialect/Mesh/IR/MeshOpsAttributes.h.inc"
+
+#define GET_OP_CLASSES
+#include "mlir/Dialect/Mesh/IR/MeshOps.h.inc"
+
+#endif // MLIR_DIALECT_MESH_IR_MESHOPS_H
@@ -0,0 +1,174 @@
+//===-- MeshOps.td - Mesh dialect operation definitions ----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_MESH_IR_MESHOPS_TD
+#define MLIR_DIALECT_MESH_IR_MESHOPS_TD
+
+include "mlir/Dialect/Mesh/IR/MeshBase.td"
+include "mlir/Interfaces/InferTypeOpInterface.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/IR/BuiltinTypes.td"
+include "mlir/IR/SymbolInterfaces.td"
+
+//===----------------------------------------------------------------------===//
+// Mesh Dialect operations.
+//===----------------------------------------------------------------------===//
+
+class Mesh_Op<string mnemonic, list<Trait> traits = []> :
+    Op<Mesh_Dialect, mnemonic, traits> {
+}
+
+def Mesh_ClusterOp : Mesh_Op<"cluster", [Symbol]> {
+  let summary = "representing a mesh cluster";
+  let description = [{
+    The mesh.cluster operation is a symbol operation that identifies a specific
+    mesh cluster. The operation has three attributes:
+
+    1. `sym_name`: This attribute uniquely identifies the name of the mesh
+    cluster. This name serves as a symbolic reference to the cluster throughout
+    the MLIR module, allowing for consistent referencing and easier debugging.
+
+    2. `rank`: This attribute specifies the number of axes of the cluster. The
+    rank indicates the dimensionality of the mesh cluster and can be used to
+    determine the layout and the addressing space of the computation distributed
+    across the mesh.
+
+    3. `dim_sizes`: This attribute represents the device assignment along the
+    axes of the cluster. Each integer in the array corresponds to the number of
+    devices along a specific axis. If an integer value is 0, it implies that the
+    number of devices along that axis is unknown. This flexibility allows for
+    dynamic device assignment or configurations where the exact number of
+    devices might not be determined during compile time.
+
+    Example:
+    ```
+    // A device mesh cluster with 3 axes, the total device number is 4 * 8 * 12
+    // The dimension sizes are 4, 8, 12 
+    mesh.cluster @mesh0(rank = 3, dim_sizes = [4, 8, 12])
+
+    // A device mesh cluster with 2 axes, the total device number is unknown
+    // The first dimension size is 4 and the second is unknown
+    mesh.cluster @mesh1(rank = 2, dim_sizes = [4])
+
+    // A device mesh cluster with 2 axes, the total device number is unknown
+    // The first dimension size is unknown and the second is 4
+    mesh.cluster @mesh2(rank = 2, dim_sizes = [0, 4])
+
+    // A device mesh cluster with 2 axes, the number of devices along both axes
+    // is unknown
+    mesh.cluster @mesh3(rank = 2)
+
+    // Used in the mesh sharding attribute to extend the standard tensor to
+    // distributed
+    tensor<4x8xf32, #mesh.shard<@mesh0, [[0]]>>
+    ```
+  }];
+  let arguments = (ins
+    SymbolNameAttr:$sym_name,
+    I8Attr:$rank,
+    DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$dim_sizes
+  );
+  let assemblyFormat = [{
+    $sym_name `(` `rank` `=` $rank (`,` `dim_sizes` `=` $dim_sizes^)? `)`
+      attr-dict
+  }];
+  let hasVerifier = 1;
+}
+
+def Mesh_ShardOp : Mesh_Op<"shard", [Pure, SameOperandsAndResultType]> {
+  let summary = "Annotate on how a tensor is sharded across a mesh cluster.";
+  let description = [{
+    The mesh.shard operation is designed to specify and guide the sharding
+    behavior of a tensor value across a mesh topology. This operation has one
+    operand and two attributes:
+
+    1. `input`: This operand represents the tensor value that needs to be
+    annotated for sharding.
+
+    2. `shard`: This attribute is type of `MeshSharding`, which is the core data
+    structure to represent distributed tensor in mesh cluster.
+
+    3. `annotate_for_users`: A unit attribute addressing the scenario when a
+    tensor's sharding annotation differs based on its context of use (either as
+    a result or an operand). If specified, the sharding pertains to specific
+    users of the tensor value, indicating how it should be considered when used
+    as an operand in subsequent operations. If not, the sharding applies to the
+    operation that defines the tensor value.
+
+    Example:
+    ```
+    func.func @only_result_annotated(%arg0 : tensor<4x8xf32>) -> () {
+      %0 = mesh.shard %arg0 to <@mesh0, [[0]]> : tensor<4x8xf32>
+      ...
+    }
+
+    func.func @only_operand_annotated(%arg0 : tensor<4x8xf32>) -> () {
+      %0 = mesh.shard %arg0 to <@mesh0, [[0]]> annotate_for_users : tensor<4x8xf32>
+      ...
+    }
+
+    // The first mesh.shard op applies to %arg0, the second mesh.shard op
+    // applies for the operand of op0, the third mesh.shard op applies for the
+    // operand of op2
+    func.func @both_result_and_multi_operands_annotated(
+        %arg0 : tensor<4x8xf32>) -> () {
+      %0 = mesh.shard %arg0 to <@mesh0, [[0]]> : tensor<4x8xf32>
+      %1 = mesh.shard %0 to <@mesh0, [[1]]> annotate_for_users : tensor<4x8xf32>
+      %2 = mesh.shard %0 to <@mesh0, [[2]]> annotate_for_users : tensor<4x8xf32>
+      "op0"(%1) : ...
+      "op1"(%2) : ...
+      ...
+    }
+    ```
+
+    The following usages are undefined:
+    ```
+    func.func @annotate_on_same_result_with_different_sharding(
+        %arg0 : tensor<4x8xf32>) -> () {
+      %0 = mesh.shard %arg0 to <@mesh0, [[0]]> : tensor<4x8xf32>
+      %1 = mesh.shard %0 to <@mesh0, [[1]]> : tensor<4x8xf32>
+      ...
+    }
+
+    func.func @annotate_on_same_result_same_value_with_different_sharding(
+        %arg0 : tensor<4x8xf32>) -> () {
+      %0 = mesh.shard %arg0 to <@mesh0, [[0]]> : tensor<4x8xf32>
+      %1 = mesh.shard %arg0 to <@mesh0, [[1]]> : tensor<4x8xf32>
+      ...
+    }
+
+    func.func @annotate_on_same_operand_with_different_sharding(
+        %arg0 : tensor<4x8xf32>) -> () {
+      %0 = mesh.shard %arg0 to <@mesh0, [[0]]> annotate_for_users : tensor<4x8xf32>
+      %1 = mesh.shard %0 to <@mesh0, [[1]]> annotate_for_users : tensor<4x8xf32>
+      ...
+    }
+
+    func.func @result_annotated_after_operand(
+        %arg0 : tensor<4x8xf32>) -> () {
+      %0 = mesh.shard %arg0 to <@mesh0, [[0]]> annotate_for_users : tensor<4x8xf32>
+      %1 = mesh.shard %0 to <@mesh0, [[1]]> : tensor<4x8xf32>
+      ...
+    }
+    ```
+  }];
+  let arguments = (ins
+    Builtin_RankedTensor:$src,
+    MeshSharding:$shard,
+    UnitAttr:$annotate_for_users
+  );
+  let results = (outs
+    Builtin_RankedTensor:$result
+  );
+  let assemblyFormat = [{
+    $src `to` $shard (`annotate_for_users` $annotate_for_users^)? attr-dict `:`
+      type($result)
+  }];
+}
+
+#endif // MLIR_DIALECT_MESH_IR_MESHOPS_TD
@@ -350,7 +350,8 @@ template <typename AsmPrinterT, typename T,
                                !std::is_convertible<T &, Attribute &>::value &&
                                !std::is_convertible<T &, ValueRange>::value &&
                                !std::is_convertible<T &, APFloat &>::value &&
-                               !llvm::is_one_of<T, bool, float, double>::value,
+                               !llvm::is_one_of<T, bool, int8_t, uint8_t, float,
+                                                double>::value,
                            T> * = nullptr>
 inline std::enable_if_t<std::is_base_of<AsmPrinter, AsmPrinterT>::value,
                         AsmPrinterT &>
@@ -366,6 +367,17 @@ operator<<(AsmPrinterT &p, bool value) {
   return p << (value ? StringRef("true") : "false");
 }
 
+/// Specialization for 8-bit integers to ensure values are printed as integers
+// and not characters.
+template <
+    typename AsmPrinterT, typename T,
+    std::enable_if_t<llvm::is_one_of<T, int8_t, uint8_t>::value, T> * = nullptr>
+inline std::enable_if_t<std::is_base_of<AsmPrinter, AsmPrinterT>::value,
+                        AsmPrinterT &>
+operator<<(AsmPrinterT &p, T value) {
+  return p << static_cast<int16_t>(value);
+}
+
 template <typename AsmPrinterT, typename ValueRangeT>
 inline std::enable_if_t<std::is_base_of<AsmPrinter, AsmPrinterT>::value,
                         AsmPrinterT &>
 
@@ -55,6 +55,7 @@
 #include "mlir/Dialect/MemRef/Transforms/AllocationOpInterfaceImpl.h"
 #include "mlir/Dialect/MemRef/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/MemRef/Transforms/RuntimeOpVerification.h"
+#include "mlir/Dialect/Mesh/IR/MeshOps.h"
 #include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
 #include "mlir/Dialect/OpenACC/OpenACC.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
@@ -117,6 +118,7 @@ inline void registerAllDialects(DialectRegistry &registry) {
                   LLVM::LLVMDialect,
                   math::MathDialect,
                   memref::MemRefDialect,
+                  mesh::MeshDialect,
                   ml_program::MLProgramDialect,
                   nvgpu::NVGPUDialect,
                   NVVM::NVVMDialect,