72 changes: 61 additions & 11 deletions mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir
Original file line number Diff line number Diff line change
@@ -1,25 +1,35 @@
// RUN: mlir-opt -convert-elementwise-to-linalg -split-input-file %s | FileCheck %s

// In-depth checking of the linalg.generic op for a very trivial case.
// CHECK: #map = affine_map<() -> ()>
// CHECK-LABEL: func @addf_rank0
// CHECK: #[[$MAP:.*]] = affine_map<() -> ()>
// CHECK-LABEL: func @addf_rank0
// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<f32>
// CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<f32>
func @addf_rank0(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<f32> {
// CHECK: %{{.*}} = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []} ins(%{{.*}}, %{{.*}} : tensor<f32>, tensor<f32>) {
// CHECK: ^bb0(%[[LHS:.*]]: f32, %[[RHS:.*]]: f32):
// CHECK: %[[YIELD:.*]] = addf %[[LHS]], %[[RHS]] : f32
// CHECK: linalg.yield %[[YIELD]] : f32
// CHECK: } -> tensor<f32>
// CHECK: %{{.*}} = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]], #[[$MAP]]]
// CHECK-SAME: iterator_types = []
// CHECK-SAME: ins(%[[ARG0]], %[[ARG1]]
// CHECK-SAME: outs(%[[ARG0]]
// CHECK: ^bb0(%[[LHS:.*]]: f32, %[[RHS:.*]]: f32, %{{.*}}: f32):
// CHECK: %[[YIELD:.*]] = addf %[[LHS]], %[[RHS]] : f32
// CHECK: linalg.yield %[[YIELD]] : f32
// CHECK: } -> tensor<f32>
%0 = addf %arg0, %arg1 : tensor<f32>
return %0 : tensor<f32>
}

// -----

// Check indexing maps and iterator types for the rank > 0 case.
// CHECK: #map = affine_map<(d0) -> (d0)>
// CHECK-LABEL: func @addf_rank1
// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<?xf32>
// CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<?xf32>
func @addf_rank1(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> {
// CHECK: linalg.generic{{.*}}indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]
// CHECK: linalg.generic
// CHECK-SAME: iterator_types = ["parallel"]
// CHECK-SAME: ins(%[[ARG0]], %[[ARG1]]
// CHECK-SAME: outs(%[[ARG0]]
%0 = addf %arg0, %arg1 : tensor<?xf32>
return %0 : tensor<?xf32>
}
Expand All @@ -28,9 +38,12 @@ func @addf_rank1(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> tensor<?xf32> {

// Check a unary op.
// CHECK-LABEL: func @exp
// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<f32>
func @exp(%arg0: tensor<f32>) -> tensor<f32> {
// CHECK: linalg.generic
// CHECK: ^bb0(%[[SCALAR:.*]]: f32):
// CHECK-SAME: ins(%[[ARG0]]
// CHECK-SAME: outs(%[[ARG0]]
// CHECK: ^bb0(%[[SCALAR:.*]]: f32, %{{.*}}: f32):
// CHECK: %[[YIELD:.*]] = exp %[[SCALAR]] : f32
// CHECK: linalg.yield %[[YIELD]] : f32
%0 = exp %arg0 : tensor<f32>
Expand All @@ -41,9 +54,14 @@ func @exp(%arg0: tensor<f32>) -> tensor<f32> {

// Check a case with varying operand types.
// CHECK-LABEL: func @select
// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<i1>
// CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<i32>
// CHECK-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<i32>
func @select(%arg0: tensor<i1>, %arg1: tensor<i32>, %arg2: tensor<i32>) -> tensor<i32> {
// CHECK: linalg.generic
// CHECK: ^bb0(%[[PRED:.*]]: i1, %[[TRUE_VAL:.*]]: i32, %[[FALSE_VAL:.*]]: i32):
// CHECK-SAME: ins(%[[ARG0]], %[[ARG1]], %[[ARG2]]
// CHECK-SAME: outs(%[[ARG1]]
// CHECK: ^bb0(%[[PRED:.*]]: i1, %[[TRUE_VAL:.*]]: i32, %[[FALSE_VAL:.*]]: i32, %{{.*}}: i32):
// CHECK: select %[[PRED]], %[[TRUE_VAL]], %[[FALSE_VAL]] : i32
%0 = select %arg0, %arg1, %arg2 : tensor<i1>, tensor<i32>
return %0 : tensor<i32>
Expand All @@ -52,9 +70,41 @@ func @select(%arg0: tensor<i1>, %arg1: tensor<i32>, %arg2: tensor<i32>) -> tenso
// -----

// Spot-check an op that requires copying attributes properly to the created scalar op.
// Also checks proper init_tensor usage.
// CHECK-LABEL: func @cmpf(
// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<f32>
// CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<f32>
func @cmpf(%arg0: tensor<f32>, %arg1: tensor<f32>) -> tensor<i1> {
// CHECK: %[[INIT:.*]] = linalg.init_tensor [] : tensor<i1>
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[ARG0]], %[[ARG1]]
// CHECK-SAME: outs(%[[INIT]]
// CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: i1):
// CHECK: cmpf "olt", %{{.*}}, %{{.*}} : f32
%0 = cmpf "olt", %arg0, %arg1 : tensor<f32>
return %0 : tensor<i1>
}

// -----

// Check proper init_tensor usage in a mixed case.
// CHECK-LABEL: func @cmpf(
// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<4x?x?x8x2x?xf32>
// CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<4x?x?x8x2x?xf32>
func @cmpf(%arg0: tensor<4x?x?x8x2x?xf32>, %arg1: tensor<4x?x?x8x2x?xf32>) -> tensor<4x?x?x8x2x?xi1> {
// CHECK: %[[C1:.*]] = constant 1 : index
// CHECK: %[[D1:.*]] = dim %[[ARG0]], %[[C1]] : tensor<4x?x?x8x2x?xf32>
// CHECK: %[[C2:.*]] = constant 2 : index
// CHECK: %[[D2:.*]] = dim %[[ARG0]], %[[C2]] : tensor<4x?x?x8x2x?xf32>
// CHECK: %[[C5:.*]] = constant 5 : index
// CHECK: %[[D5:.*]] = dim %[[ARG0]], %[[C5]] : tensor<4x?x?x8x2x?xf32>
// CHECK: %[[INIT:.*]] = linalg.init_tensor [4, %[[D1]], %[[D2]], 8, 2, %[[D5]]] : tensor<4x?x?x8x2x?xi1>
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[ARG0]], %[[ARG1]]
// CHECK-SAME: outs(%[[INIT]]
// CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: i1):
// CHECK: cmpf "olt", %{{.*}}, %{{.*}} : f32
%0 = cmpf "olt", %arg0, %arg1 : tensor<4x?x?x8x2x?xf32>
return %0 : tensor<4x?x?x8x2x?xi1>
}

61 changes: 33 additions & 28 deletions mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: mlir-opt %s -linalg-fold-unit-extent-dims -split-input-file | FileCheck %s
// RUN: mlir-opt %s -split-input-file -linalg-fold-unit-extent-dims | FileCheck %s

#accesses = [
affine_map<(i, j, k, l, m) -> (i, k, m)>,
Expand All @@ -11,12 +11,12 @@
library_call = "some_external_func"
}

func @drop_one_trip_loops(%arg0 : tensor<?x1x?xf32>) -> tensor<?x1x?x1x?xf32>
{
func @drop_one_trip_loops(%arg0 : tensor<?x1x?xf32>, %shape: tensor<?x1x?x1x?xf32>) -> tensor<?x1x?x1x?xf32> {
%0 = linalg.generic #trait
ins(%arg0 : tensor<?x1x?xf32>) {
^bb0(%arg1 : f32) :
linalg.yield %arg1 : f32
ins(%arg0 : tensor<?x1x?xf32>)
outs(%shape : tensor<?x1x?x1x?xf32>) {
^bb0(%arg2 : f32, %arg3 : f32) :
linalg.yield %arg2 : f32
} -> tensor<?x1x?x1x?xf32>
return %0 : tensor<?x1x?x1x?xf32>
}
Expand Down Expand Up @@ -48,12 +48,13 @@ func @drop_one_trip_loops(%arg0 : tensor<?x1x?xf32>) -> tensor<?x1x?x1x?xf32>
}

func @drop_one_trip_loops_indexed_generic
(%arg0 : tensor<?x1x?xi32>) -> tensor<?x1x?x1x?xi32>
(%arg0 : tensor<?x1x?xi32>, %shape: tensor<?x1x?x1x?xi32>) -> tensor<?x1x?x1x?xi32>
{
%0 = linalg.indexed_generic #trait
ins(%arg0 : tensor<?x1x?xi32>) {
ins(%arg0 : tensor<?x1x?xi32>)
outs(%shape: tensor<?x1x?x1x?xi32>) {
^bb0(%arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index,
%arg5 : index, %arg6 : i32) :
%arg5 : index, %arg6 : i32, %arg7 : i32) :
%1 = addi %arg1, %arg2 : index
%2 = addi %1, %arg3 : index
%3 = addi %2, %arg4 : index
Expand All @@ -68,7 +69,7 @@ func @drop_one_trip_loops_indexed_generic
// CHECK: linalg.indexed_generic
// CHECK: ^{{.+}}(
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index, %[[ARG2:[a-zA-Z0-9]+]]: index
// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index, %[[ARG4:[a-zA-Z0-9]+]]: i32)
// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index, %[[ARG4:[a-zA-Z0-9]+]]: i32, %{{.*}}: i32)
// CHECK: %[[T3:.+]] = addi %[[ARG1]], %[[ARG2]]
// CHECK: %[[T4:.+]] = addi %[[T3]], %[[ARG3]]
// CHECK: %[[T5:.+]] = index_cast %[[T4]] : index to i32
Expand All @@ -88,8 +89,9 @@ func @drop_one_trip_loops_indexed_generic
func @drop_all_loops(%arg0 : tensor<1x1xf32>) -> tensor<1x1xf32>
{
%0 = linalg.generic #trait
ins(%arg0 : tensor<1x1xf32>) {
^bb0(%arg1: f32) :
ins(%arg0 : tensor<1x1xf32>)
outs(%arg0 : tensor<1x1xf32>) {
^bb0(%arg1: f32, %arg2: f32) :
linalg.yield %arg1 : f32
} -> tensor<1x1xf32>
return %0 : tensor<1x1xf32>
Expand All @@ -112,11 +114,11 @@ func @drop_all_loops(%arg0 : tensor<1x1xf32>) -> tensor<1x1xf32>
}

func @drop_all_loops_indexed_generic
(%arg0 : tensor<1x1xi32>) -> tensor<1x1xi32>
{
(%arg0 : tensor<1x1xi32>) -> tensor<1x1xi32>{
%0 = linalg.indexed_generic #trait
ins(%arg0 : tensor<1x1xi32>) {
^bb0(%arg1 : index, %arg2 : index, %arg3: i32) :
ins(%arg0 : tensor<1x1xi32>)
outs(%arg0 : tensor<1x1xi32>) {
^bb0(%arg1 : index, %arg2 : index, %arg3: i32, %arg4: i32) :
%1 = addi %arg1, %arg2 : index
%2 = index_cast %1 : index to i32
%3 = addi %2, %arg3 : i32
Expand All @@ -127,7 +129,7 @@ func @drop_all_loops_indexed_generic

// CHECK-LABEL: func @drop_all_loops_indexed_generic
// CHECK: linalg.indexed_generic
// CHECK: ^{{.+}}(%[[ARG1:.+]]: i32)
// CHECK: ^{{.+}}(%[[ARG1:.+]]: i32, %[[ARG2:.+]]: i32)
// CHECK: linalg.yield %[[ARG1]] : i32

// -----
Expand All @@ -143,10 +145,11 @@ func @drop_all_loops_indexed_generic
library_call = "some_external_fn"
}

func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>) -> tensor<5xf32> {
func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>, %shape: tensor<5xf32>) -> tensor<5xf32> {
%0 = linalg.generic #trait
ins(%arg0 : tensor<1x5xf32>) {
^bb0(%arg2: f32): // no predecessors
ins(%arg0 : tensor<1x5xf32>)
outs(%shape : tensor<5xf32>) {
^bb0(%arg2: f32, %arg3: f32): // no predecessors
linalg.yield %arg2 : f32
} -> tensor<5xf32>
return %0 : tensor<5xf32>
Expand All @@ -172,16 +175,17 @@ func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>) -> tensor<5xf32> {
library_call = "some_external_fn"
}

func @broadcast_test(%arg0 : tensor<5xf32>, %arg1 : tensor<5xf32>) -> tensor<5x5xf32>
func @broadcast_test(%arg0 : tensor<5xf32>, %arg1 : tensor<5xf32>, %shape : tensor<5x5xf32>) -> tensor<5x5xf32>
{
%0 = linalg.tensor_reshape %arg0 [affine_map<(d0, d1) -> (d0, d1)>] :
tensor<5xf32> into tensor<1x5xf32>
%1 = linalg.tensor_reshape %arg1 [affine_map<(d0, d1) -> (d0, d1)>] :
tensor<5xf32> into tensor<5x1xf32>
%2 = linalg.generic #trait
ins(%0, %1 : tensor<1x5xf32>, tensor<5x1xf32>) {
^bb0(%arg2: f32, %arg3: f32):
%3 = addf %arg2, %arg3 : f32
ins(%0, %1 : tensor<1x5xf32>, tensor<5x1xf32>)
outs(%shape : tensor<5x5xf32>) {
^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
%3 = addf %arg3, %arg4 : f32
linalg.yield %3 : f32
} -> tensor<5x5xf32>
return %2 : tensor<5x5xf32>
Expand Down Expand Up @@ -209,12 +213,13 @@ func @broadcast_test(%arg0 : tensor<5xf32>, %arg1 : tensor<5xf32>) -> tensor<5x5
library_call = "some_external_fn"
}

func @broadcast_scalar(%arg0 : tensor<1x1xf32>) -> tensor<?x?xf32>
func @broadcast_scalar(%arg0 : tensor<1x1xf32>, %shape : tensor<?x?xf32>) -> tensor<?x?xf32>
{
%0 = linalg.generic #trait
ins(%arg0 : tensor<1x1xf32>) {
^bb0(%arg1 : f32):
linalg.yield %arg1 : f32
ins(%arg0 : tensor<1x1xf32>)
outs(%shape : tensor<?x?xf32>) {
^bb0(%arg2 : f32, %arg3 : f32):
linalg.yield %arg2 : f32
} -> tensor<?x?xf32>
return %0 : tensor<?x?xf32>
}
Expand Down
21 changes: 12 additions & 9 deletions mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: mlir-opt %s -linalg-fold-unit-extent-dims="fold-one-trip-loops-only" -split-input-file | FileCheck %s
// RUN: mlir-opt %s -split-input-file -linalg-fold-unit-extent-dims="fold-one-trip-loops-only" | FileCheck %s

#accesses = [
affine_map<(i, j, k, l, m) -> (i, k, m)>,
Expand All @@ -11,11 +11,12 @@
library_call = "some_external_func"
}

func @drop_one_trip_loops(%arg0 : tensor<?x1x?xf32>) -> tensor<?x1x?x1x?xf32>
func @drop_one_trip_loops(%arg0 : tensor<?x1x?xf32>, %shape: tensor<?x1x?x1x?xf32>) -> tensor<?x1x?x1x?xf32>
{
%0 = linalg.generic #trait
ins(%arg0 : tensor<?x1x?xf32>) {
^bb0(%arg1 : f32) :
ins(%arg0 : tensor<?x1x?xf32>)
outs(%shape : tensor<?x1x?x1x?xf32>) {
^bb0(%arg1 : f32, %arg2 : f32) :
linalg.yield %arg1 : f32
} -> tensor<?x1x?x1x?xf32>
return %0 : tensor<?x1x?x1x?xf32>
Expand All @@ -40,8 +41,9 @@ func @drop_one_trip_loops(%arg0 : tensor<?x1x?xf32>) -> tensor<?x1x?x1x?xf32>
func @drop_all_loops(%arg0 : tensor<1x1xf32>) -> tensor<1x1xf32>
{
%0 = linalg.generic #trait
ins(%arg0 : tensor<1x1xf32>) {
^bb0(%arg1: f32) :
ins(%arg0 : tensor<1x1xf32>)
outs(%arg0 : tensor<1x1xf32>) {
^bb0(%arg1: f32, %arg2: f32) :
linalg.yield %arg1 : f32
} -> tensor<1x1xf32>
return %0 : tensor<1x1xf32>
Expand Down Expand Up @@ -91,10 +93,11 @@ func @drop_all_loops(%arg0 : memref<1x1xf32>, %arg1 : memref<1x1xf32>)
library_call = "some_external_fn"
}

func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>) -> tensor<5xf32> {
func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>, %shape: tensor<5xf32>) -> tensor<5xf32> {
%0 = linalg.generic #trait
ins(%arg0 : tensor<1x5xf32>) {
^bb0(%arg2: f32): // no predecessors
ins(%arg0 : tensor<1x5xf32>)
outs(%shape : tensor<5xf32>) {
^bb0(%arg2: f32, %arg3: f32): // no predecessors
linalg.yield %arg2 : f32
} -> tensor<5xf32>
return %0 : tensor<5xf32>
Expand Down
439 changes: 277 additions & 162 deletions mlir/test/Dialect/Linalg/fusion-tensor.mlir

Large diffs are not rendered by default.

12 changes: 7 additions & 5 deletions mlir/test/Dialect/Linalg/generalize-named-ops.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ func @generalize_conv(%input : memref<1x225x225x3xf32>, %filter: memref<3x3x3x32
// -----

func @generalize_matmul_buffer(%A : memref<16x8xf32>, %B: memref<8x32xf32>, %C: memref<16x32xf32>) {
linalg.matmul ins(%A, %B: memref<16x8xf32>, memref<8x32xf32>) outs(%C: memref<16x32xf32>)
linalg.matmul ins(%A, %B: memref<16x8xf32>, memref<8x32xf32>)
outs(%C: memref<16x32xf32>)
return
}

Expand All @@ -45,7 +46,7 @@ func @generalize_matmul_buffer(%A : memref<16x8xf32>, %B: memref<8x32xf32>, %C:
// CHECK: linalg.generic
// CHECK-SAME: indexing_maps = [#[[A_MAP]], #[[B_MAP]], #[[C_MAP]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]
// CHECK-SAME: ins(%[[A]], %[[B]]
// CHECK-SAME: ins(%[[A]], %[[B]]
// CHECK-SAME: outs(%[[C]]

// CHECK: ^{{.*}}(%[[A_ARG:.+]]: f32, %[[B_ARG:.+]]: f32, %[[C_ARG:.+]]: f32)
Expand All @@ -56,15 +57,16 @@ func @generalize_matmul_buffer(%A : memref<16x8xf32>, %B: memref<8x32xf32>, %C:
// -----

func @generalize_matmul_tensor(%A : tensor<16x8xf32>, %B: tensor<8x32xf32>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> {
%0 = linalg.matmul ins(%A, %B: tensor<16x8xf32>, tensor<8x32xf32>) init(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
%0 = linalg.matmul ins(%A, %B: tensor<16x8xf32>, tensor<8x32xf32>)
outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
return %0: tensor<16x32xf32>
}

// CHECK: func @generalize_matmul_tensor

// CHECK: linalg.generic
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<16x8xf32>, tensor<8x32xf32>)
// CHECK-SAME: init(%{{.+}} : tensor<16x32xf32>)
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<16x8xf32>, tensor<8x32xf32>)
// CHECK-SAME: outs(%{{.+}} : tensor<16x32xf32>)

// CHECK: ^{{.*}}(%[[A_ARG:.+]]: f32, %[[B_ARG:.+]]: f32, %[[C_ARG:.+]]: f32)
// CHECK-NEXT: %[[MUL:.+]] = mulf %[[A_ARG]], %[[B_ARG]] : f32
Expand Down
137 changes: 58 additions & 79 deletions mlir/test/Dialect/Linalg/invalid.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func @generic_wrong_dim_in_map(%arg0: memref<1xi32>) {
// -----

func @generic_one_d_view(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
// expected-error @+1 {{op expected indexing_map #0 results to match view rank: 'memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>'}}
// expected-error @+1 {{expected shaped value rank (1) to match the result rank of indexing_map #0 (2)}}
linalg.generic {
indexing_maps = [ affine_map<() -> (0, 0)> ],
iterator_types = []}
Expand Down Expand Up @@ -143,9 +143,9 @@ func @generic_empty_region(%arg0: memref<f32>) {

func @generic_empty_region(%arg0: memref<f32>) {
%f0 = constant 0.0: f32
// expected-error @+1 {{linalg.generic' op expected region with 1 block}}
// expected-error @+1 {{linalg.generic' op expected 1 region with 1 block}}
linalg.generic {
indexing_maps = [ affine_map<() -> (0)> ],
indexing_maps = [ affine_map<() -> ()> , affine_map<() -> ()> ],
iterator_types = []}
ins(%arg0 : memref<f32>)
outs(%arg0 : memref<f32>) {
Expand All @@ -155,22 +155,22 @@ func @generic_empty_region(%arg0: memref<f32>) {
// -----

func @generic_mismatched_num_arguments(%arg0: memref<f32>) {
// expected-error @+1 {{op expected number of block arguments to match number of operands}}
// expected-error @+1 {{expected as many non-induction variable region arguments as the number of shaped operands}}
linalg.generic {
indexing_maps = [ affine_map<() -> (0)> ],
indexing_maps = [ affine_map<() -> ()>, affine_map<() -> ()> ],
iterator_types = []}
outs(%arg0 : memref<f32>) {
^bb(%f: f32, %g: f32):
outs(%arg0, %arg0 : memref<f32>, memref<f32>) {
^bb(%f: f32):
linalg.yield %f: f32
}
}

// -----

func @generic_block_arg_type(%arg0: memref<f32>) {
// expected-error @+1 {{op expected block argument 1 of the same type as elemental type of output operand: 'memref<f32>'}}
// expected-error @+1 {{expected type of bb argument #0 ('i1') to match element type of corresponding shaped operand ('f32')}}
linalg.generic {
indexing_maps = [ affine_map<() -> (0)> ],
indexing_maps = [ affine_map<() -> ()> ],
iterator_types = []}
outs(%arg0 : memref<f32>) {
^bb(%i: i1):
Expand All @@ -180,38 +180,38 @@ func @generic_block_arg_type(%arg0: memref<f32>) {

// -----

func @indexed_generic_block_arg_count(%arg0: memref<f32>) {
// expected-error @+1 {{op expected number of block arguments to match number of operands + number of loops}}
func @indexed_generic_block_arg_count(%arg0: memref<?xf32>) {
// expected-error @+1 {{expected as many non-induction variable region arguments as the number of shaped operands}}
linalg.indexed_generic {
indexing_maps = [ affine_map<(d0) -> (d0)> ],
indexing_maps = [ affine_map<(i) -> (i)> ],
iterator_types = ["parallel"]}
outs(%arg0 : memref<f32>) {
outs(%arg0 : memref<?xf32>) {
^bb(%f: f32):
linalg.yield %f : f32
}
}

// -----

func @indexed_generic_block_induction_var_arg_type(%arg0: memref<f32>) {
// expected-error @+1 {{op expected block argument 1 to be an index}}
func @indexed_generic_block_induction_var_arg_type(%arg0: memref<?xf32>) {
// expected-error @+1 {{op expected index block argument #0}}
linalg.indexed_generic {
indexing_maps = [ affine_map<(d0) -> (d0)> ],
iterator_types = ["parallel"]}
outs(%arg0 : memref<f32>) {
outs(%arg0 : memref<?xf32>) {
^bb(%i: f64, %f: f32):
linalg.yield %f: f32
}
}

// -----

func @indexed_generic_block_arg_type(%arg0: memref<f32>) {
// expected-error @+1 {{op expected block argument 2 of the same type as elemental type of output operand: 'memref<f32>'}}
func @indexed_generic_block_arg_type(%arg0: memref<?xf32>) {
// expected-error @+1 {{expected type of bb argument #1 ('i1') to match element type of corresponding shaped operand ('f32')}}
linalg.indexed_generic {
indexing_maps = [ affine_map<(d0) -> (d0)> ],
iterator_types = ["parallel"]}
outs(%arg0 : memref<f32>) {
outs(%arg0 : memref<?xf32>) {
^bb(%i: index, %f: i1):
linalg.yield %i: index
}
Expand All @@ -220,7 +220,7 @@ func @indexed_generic_block_arg_type(%arg0: memref<f32>) {
// -----

func @indexed_generic_arg_count(%arg0: memref<f32>) {
// expected-error @+1 {{op expected number of block arguments to match number of operands + number of loops}}
// expected-error @+1 {{expected as many non-induction variable region arguments as the number of shaped operands}}
linalg.indexed_generic {
indexing_maps = [ affine_map<()[] -> ()> ],
iterator_types = []}
Expand All @@ -233,19 +233,6 @@ func @indexed_generic_arg_count(%arg0: memref<f32>) {

// -----

func @indexed_generic_induction_var_arg_type(%arg0: memref<f32>) {
// expected-error @+1 {{op expected block argument 1 to be an index}}
linalg.indexed_generic {
iterator_types = ["parallel"],
indexing_maps = [ affine_map<(i) -> (i)> ]}
outs(%arg0 : memref<f32>) {
^bb(%0: i32, %1: f32):
linalg.yield %1: f32
}
}

// -----

func @indexed_generic_result_count(%arg0: memref<?xf32>) {
// expected-error @+6 {{op expected number of yield values (1) to match the number of operands of the enclosing LinalgOp (2)}}
linalg.indexed_generic {
Expand Down Expand Up @@ -273,19 +260,36 @@ func @generic_result_0_element_type(%arg0: memref<?xf32, affine_map<(i)[off]->(o

// -----

func @generic_result_tensor_type(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
// expected-error @+1 {{op result #0 must be ranked tensor of any type values, but got 'f32'}}
func @generic_result_tensor_type(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>,
%arg1: tensor<?xf32>) {
// expected-error @+1 {{expected type of operand #1 ('tensor<?xf32>') to match type of corresponding result ('f32')}}
%0 = linalg.generic {
indexing_maps = [ affine_map<(i) -> (i)> ],
indexing_maps = [ affine_map<(i) -> (i)> , affine_map<(i) -> (i)> ],
iterator_types = ["parallel"]}
ins(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
^bb(%i: f32):
ins(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>)
outs(%arg1 : tensor<?xf32>) {
^bb(%i: f32, %j: f32):
linalg.yield %i: f32
} -> f32
}

// -----

func @generic_result_tensor_type(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>,
%arg1: tensor<?xf32>) {
// expected-error @+1 {{unexpected output tensor expression in indexing map #0 a.k.a 'd0' is function of reduction iterator 'd0'}}
%0 = linalg.generic {
indexing_maps = [ affine_map<(i) -> (i)> , affine_map<(i) -> (i)> ],
iterator_types = ["reduction"]}
ins(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>)
outs(%arg1 : tensor<?xf32>) {
^bb(%i: f32, %j: f32):
linalg.yield %i: f32
} -> tensor<?xf32>
}

// -----

func @generic(%arg0: memref<?x?xi4>) {
// expected-error @+2 {{op expects regions to end with 'linalg.yield', found 'std.addf'}}
// expected-note @+1 {{in custom textual format, the absence of terminator implies 'linalg.yield'}}
Expand All @@ -301,12 +305,17 @@ func @generic(%arg0: memref<?x?xi4>) {

// -----

func @conv_rank_limit(%arg0: memref<?xf32>, %arg1: memref<?xf32>, %arg2: memref<?xf32>) {
// expected-error @+1 {{expects memref ranks to be greater than 2}}
linalg.conv(%arg0, %arg1, %arg2) : memref<?xf32>, memref<?xf32>, memref<?xf32>
}

// -----
// This test is currently disabled: subject to verifier ordering issues.
// Instead, when the ranks are not greater than 2, an assertion will be triggered
// in LinalgStructuredOps.td::ConvOp::iterator_types() for now because the
// verifier inspects the iterator_types. This is slated to become an
// autogenerated op in the future, alleviating the issue.
// func @conv_rank_limit(%arg0: memref<?xf32>, %arg1: memref<?xf32>, %arg2: memref<?xf32>) {
// // DISABLED_expected -error @+1 {{expects memref ranks to be greater than 2}}
// linalg.conv(%arg0, %arg1, %arg2) : memref<?xf32>, memref<?xf32>, memref<?xf32>
// }
//
// // -----

// expected-error @+1 {{unknown Linalg type}}
!invalid_type = type !linalg.unknown
Expand Down Expand Up @@ -367,7 +376,7 @@ func @reshape(%arg0: memref<?x?x?xf32>) {
func @pooling_rank_mismatch(%arg0: memref<?x?x?xf32>,
%arg1: memref<2x3xf32>,
%arg2: memref<?x?x?xf32>) {
// expected-error @+1 {{expects memref ranks to match}}
// expected-error @+1 {{expected shaped value rank (2) to match the result rank of indexing_map #1 (3)}}
linalg.pooling_max(%arg0, %arg1, %arg2) {strides = [2, 1, 2]}:
memref<?x?x?xf32>, memref<2x3xf32>, memref<?x?x?xf32>
return
Expand All @@ -376,57 +385,27 @@ func @pooling_rank_mismatch(%arg0: memref<?x?x?xf32>,
// -----

func @named_ops(%a3: memref<?x?x?xf32>, %b3: memref<?x?xf32>, %c3: memref<?x?x?xf32>) {
// expected-error @+1 {{op expected indexing_map #1 results to match view rank: 'memref<?x?xf32>'}}
// expected-error @+1 {{expected shaped value rank (2) to match the result rank of indexing_map #1 (3)}}
linalg.batch_matmul ins(%a3, %b3: memref<?x?x?xf32>, memref<?x?xf32>)
outs(%c3 : memref<?x?x?xf32>)
return
}

// -----

func @empty_init_expected(%m: memref<?x?xf32>, %t: tensor<?x?xf32>) {
// expected-error @+1 {{expected empty `init` when op has no results or no reduction dims}}
linalg.matmul ins(%m, %m: memref<?x?xf32>, memref<?x?xf32>)
outs(%m : memref<?x?xf32>)
init(%t : tensor<?x?xf32>)
return
}

// -----

func @incorrect_region_arg_count(%m: memref<?x?xf32>) {
// expected-error @+3 {{region expects 3 args, got 4}}
// expected-error @+3 {{region expects 3 args, got 2}}
%res = linalg.matmul ins(%m, %m : memref<?x?xf32>, memref<?x?xf32>)
-> tensor<?x?xf32>, tensor<?x?xf32>
return
}

// -----

func @single_tensor_result(%m: memref<?x?xf32>, %t: tensor<?x?xf32>) {
// expected-error @+1 {{expected single tensor result when reduction present}}
%res:2 = linalg.matmul ins(%m : memref<?x?xf32>)
init(%t, %t : tensor<?x?xf32>, tensor<?x?xf32>)
-> tensor<?x?xf32>, tensor<?x?xf32>
return
}

// -----

func @matching_inits(%m: memref<?x?xf32>, %t: tensor<?x?xf32>) {
// expected-error @+1 {{expected #init tensors to match #results when reduction present}}
%res = linalg.matmul ins(%m, %m : memref<?x?xf32>, memref<?x?xf32>)
init(%t, %t : tensor<?x?xf32>, tensor<?x?xf32>)
-> tensor<?x?xf32>
return
}

// -----

func @matching_inits(%m: memref<?x?xf32>, %t: tensor<?x?xf32>) {
// expected-error @+1 {{expected init tensor #0 of the same type as result #0}}
// expected-error @+1 {{expected type of operand #2 ('tensor<?x?xf32>') to match type of corresponding result ('tensor<?xf32>')}}
%res = linalg.matmul ins(%m, %m : memref<?x?xf32>, memref<?x?xf32>)
init(%t : tensor<?x?xf32>)
outs(%t : tensor<?x?xf32>)
-> tensor<?xf32>
return
}
4 changes: 2 additions & 2 deletions mlir/test/Dialect/Linalg/parallel-loops.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func @lower_outer_parallel(%A: memref<?x?x?x?xf32>, %B: memref<?x?x?xf32>) {

#accesses = [
affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>,
affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d4, d5)>
affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4, d3)>
]
#trait = {
iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"],
Expand Down Expand Up @@ -94,4 +94,4 @@ func @lower_mixed_parallel(%A: memref<?x?x?x?x?x?xf32>, %B: memref<?x?x?x?xf32>)
// CHECK: scf.parallel (%[[IV3:.*]], %[[IV4:.*]]) = (%[[C0]], %[[C0]]) to (%[[D3]], %[[D4]]) step (%[[C1]], %[[C1]])
// CHECK: scf.for %[[IV5:.*]] = %[[C0]] to %[[D5]] step %[[C1]]
// CHECK: load %{{.*}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]], %[[IV4]], %[[IV5]]]
// CHECK: store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV2]], %[[IV4]], %[[IV5]]]
// CHECK: store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV1]], %[[IV4]], %[[IV3]]]
211 changes: 134 additions & 77 deletions mlir/test/Dialect/Linalg/reshape_fusion.mlir

Large diffs are not rendered by default.

206 changes: 116 additions & 90 deletions mlir/test/Dialect/Linalg/reshape_linearization_fusion.mlir

Large diffs are not rendered by default.

68 changes: 38 additions & 30 deletions mlir/test/Dialect/Linalg/roundtrip.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ func @pooling_sum(%arg0: memref<?x?x?xf32>,
func @generic(%arg0: memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1]>,
%arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
linalg.generic #trait
ins(%arg0 : memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1]>)
ins(%arg0 : memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1]>)
outs(%arg1 : memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>)
attrs = {foo = 1} {
^bb(%0: vector<3x4xi4>, %1: f32) :
Expand All @@ -314,14 +314,14 @@ func @generic(%arg0: memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1]>,
// CHECK-SAME: indexing_maps = [#{{[0-9a-z]*}}, #{{[0-9a-z]*}}],
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"],
// CHECK-SAME: library_call = "some_external_function_name_1"}
// CHECK-SAME: ins({{.*}} : memref<?x?xvector<3x4xi4>, #[[$strided2D]]>)
// CHECK-SAME: ins({{.*}} : memref<?x?xvector<3x4xi4>, #[[$strided2D]]>)
// CHECK-SAME: outs({{.*}} : memref<?x?x?xf32, #[[$strided3D]]>)
// CHECK-SAME: {foo = 1 : i64}

func @generic_with_tensor_input(%arg0: tensor<?x?xvector<3x4xi4>>,
%arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
linalg.generic #trait
ins(%arg0 : tensor<?x?xvector<3x4xi4>>)
ins(%arg0 : tensor<?x?xvector<3x4xi4>>)
outs(%arg1 : memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>)
attrs = {foo = 1} {
^bb(%0: vector<3x4xi4>, %1: f32) :
Expand Down Expand Up @@ -358,14 +358,14 @@ func @generic_without_inputs(%arg0 : memref<?x?x?xf32>) {

// -----

#accesses = [
#accesses2 = [
affine_map<(i, j, k) -> (j, i)>,
affine_map<(i, j, k) -> (i, k, i + j)>,
affine_map<(i, j, k) -> (i, k, i + j)>
]

#trait2 = {
indexing_maps = #accesses,
indexing_maps = #accesses2,
iterator_types = ["parallel", "parallel", "parallel"],
library_call = "some_external_function_name_1"
}
Expand All @@ -374,9 +374,10 @@ func @generic_with_tensor_input_and_output(
%arg0: tensor<?x?xvector<3x4xi4>>, %arg1: tensor<?x?x?xf32>)
-> (tensor<?x?x?xf32>) {
%0 = linalg.generic #trait2
ins(%arg0, %arg1 : tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32>)
ins(%arg0, %arg1 : tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32>)
outs(%arg1 : tensor<?x?x?xf32>)
attrs = {foo = 1} {
^bb(%0: vector<3x4xi4>, %1: f32) :
^bb(%0: vector<3x4xi4>, %1: f32, %2: f32) :
%f0 = constant 0.0 : f32
linalg.yield %f0 : f32
} -> tensor<?x?x?xf32>
Expand All @@ -386,32 +387,34 @@ func @generic_with_tensor_input_and_output(
// CHECK: linalg.generic {
// CHECK-SAME: indexing_maps = [#{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel"],
// CHECK-SAME: library_call = "some_external_function_name_1"}
// CHECK-SAME: ins({{.*}} : tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32>)
// CHECK-SAME: ins({{.*}} : tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32>)
// CHECK-SAME: outs({{.*}} : tensor<?x?x?xf32>)
// CHECK-SAME: {foo = 1 : i64}
// CHECK: -> tensor<?x?x?xf32>
// CHECK: return {{.*}} : tensor<?x?x?xf32>

// -----

#accesses = [
#accesses3 = [
affine_map<(i, j, k) -> (j, i)>,
affine_map<(i, j, k) -> (i, k, i + j)>,
affine_map<(i, j, k) -> (i, k, i + j)>
]

#trait2 = {
indexing_maps = #accesses,
#trait3 = {
indexing_maps = #accesses3,
iterator_types = ["parallel", "parallel", "parallel"],
library_call = "some_external_function_name_1"
}

func @indexed_generic_with_tensor_input_and_output(
%arg0: tensor<?x?xvector<3x4xi4>>, %arg1: tensor<?x?x?xf32>)
-> (tensor<?x?x?xf32>) {
%0 = linalg.indexed_generic #trait2
ins(%arg0, %arg1 : tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32>)
%0 = linalg.indexed_generic #trait3
ins(%arg0, %arg1 : tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32>)
outs(%arg1 : tensor<?x?x?xf32>)
attrs = {foo = 1} {
^bb(%i: index, %j: index, %k: index, %0: vector<3x4xi4>, %1: f32) :
^bb(%i: index, %j: index, %k: index, %0: vector<3x4xi4>, %1: f32, %2: f32) :
%f0 = constant 0.0 : f32
linalg.yield %f0 : f32
} -> tensor<?x?x?xf32>
Expand All @@ -421,7 +424,8 @@ func @indexed_generic_with_tensor_input_and_output(
// CHECK: linalg.indexed_generic {
// CHECK-SAME: indexing_maps = [#{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel"],
// CHECK-SAME: library_call = "some_external_function_name_1"}
// CHECK-SAME: ins({{.*}} : tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32>)
// CHECK-SAME: ins({{.*}} : tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32>)
// CHECK-SAME: outs({{.*}} : tensor<?x?x?xf32>)
// CHECK-SAME: {foo = 1 : i64}
// CHECK: -> tensor<?x?x?xf32>
// CHECK: return {{.*}} : tensor<?x?x?xf32>
Expand All @@ -439,21 +443,23 @@ func @indexed_generic_with_tensor_input_and_output(
library_call = "some_broadcast_external_fn"
}

func @generic_op_zero_rank(%arg0: tensor<f32>) -> (tensor<3x4xf32>)
func @generic_op_zero_rank(%arg0: tensor<f32>, %arg1 : tensor<3x4xf32>) -> (tensor<3x4xf32>)
{
%0 = linalg.generic #trait_broadcast
ins(%arg0 : tensor<f32>) {
^bb(%a: f32) :
ins(%arg0 : tensor<f32>)
outs(%arg1 : tensor<3x4xf32>) {
^bb(%a: f32, %b: f32) :
linalg.yield %a : f32
} -> tensor<3x4xf32>
return %0 : tensor<3x4xf32>
}

func @indexed_generic_op_zero_rank(%arg0: tensor<f32>) -> (tensor<3x4xf32>)
func @indexed_generic_op_zero_rank(%arg0: tensor<f32>, %arg1 : tensor<3x4xf32>) -> (tensor<3x4xf32>)
{
%0 = linalg.indexed_generic #trait_broadcast
ins(%arg0 : tensor<f32>) {
^bb(%i: index, %j: index, %a: f32) :
ins(%arg0 : tensor<f32>)
outs(%arg1 : tensor<3x4xf32>) {
^bb(%i: index, %j: index, %a: f32, %b: f32) :
linalg.yield %a : f32
} -> tensor<3x4xf32>
return %0 : tensor<3x4xf32>
Expand All @@ -478,7 +484,7 @@ func @indexed_generic_op_zero_rank(%arg0: tensor<f32>) -> (tensor<3x4xf32>)
func @generic_region(%arg0: memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1]>,
%arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
linalg.generic #trait3
ins(%arg0 : memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1]>)
ins(%arg0 : memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1]>)
outs(%arg1 : memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>)
attrs = {foo = 1} {
^bb(%a: vector<3x4xi4>, %b: f32) :
Expand All @@ -491,7 +497,7 @@ func @generic_region(%arg0: memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1
// CHECK-SAME: indexing_maps = [#{{[0-9a-z]*}}, #{{[0-9a-z]*}}],
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"],
// CHECK-SAME: library_call = "some_external_function_name_2"
// CHECK-SAME: ins({{.*}} : memref<?x?xvector<3x4xi4>, #[[$strided2D]]>)
// CHECK-SAME: ins({{.*}} : memref<?x?xvector<3x4xi4>, #[[$strided2D]]>)
// CHECK-SAME: outs({{.*}} : memref<?x?x?xf32, #[[$strided3D]]>)
// CHECK-SAME: attrs = {foo = 1 : i64} {
// CHECK: ^{{.*}}(%{{.*}}: vector<3x4xi4>, %{{.*}}: f32):
Expand All @@ -500,7 +506,7 @@ func @generic_region(%arg0: memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1
func @indexed_generic(%arg0: memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1]>,
%arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
linalg.indexed_generic #trait3
ins(%arg0 : memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1]>)
ins(%arg0 : memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1]>)
outs(%arg1 : memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>)
attrs = {foo = 1} {
^bb(%i: index, %j: index, %k: index, %a: vector<3x4xi4>, %b: f32) :
Expand Down Expand Up @@ -564,8 +570,8 @@ func @reshape_static(%arg0: memref<3x4x5xf32>, %arg1: tensor<3x4x5xf32>, %arg2:
affine_map<(i, j, k, l, m) -> (l, m)>] :
tensor<3x4x5xf32> into tensor<1x3x4x1x5xf32>
%rt0 = linalg.tensor_reshape %t0 [affine_map<(i, j, k, l, m) -> (i, j)>,
affine_map<(i, j, k, l, m) -> (k)>,
affine_map<(i, j, k, l, m) -> (l, m)>] :
affine_map<(i, j, k, l, m) -> (k)>,
affine_map<(i, j, k, l, m) -> (l, m)>] :
tensor<1x3x4x1x5xf32> into tensor<3x4x5xf32>
%t1 = linalg.tensor_reshape %arg2 [affine_map<(i, j, k, l, m) -> (i, j)>,
affine_map<(i, j, k, l, m) -> (k)>,
Expand Down Expand Up @@ -660,11 +666,13 @@ func @named_ops(%a3: memref<?x?x?xf32>, %b3: memref<?x?x?xf32>, %c3: memref<?x?x
outs(%c3: memref<?x?x?xf32>)
linalg.batch_matmul ins(%ta3, %tb3: tensor<?x?x?xf32>, tensor<?x?x?xf32>)
outs(%c3: memref<?x?x?xf32>)
%res1 = linalg.batch_matmul ins(%ta3, %tb3: tensor<?x?x?xf32>, tensor<?x?x?xf32>)
init(%tc3: tensor<?x?x?xf32>)
%res1 = linalg.batch_matmul
ins(%ta3, %tb3: tensor<?x?x?xf32>, tensor<?x?x?xf32>)
outs(%tc3: tensor<?x?x?xf32>)
-> tensor<?x?x?xf32>
%res2 = linalg.batch_matmul ins(%ta3, %b3: tensor<?x?x?xf32>, memref<?x?x?xf32>)
init(%tc3: tensor<?x?x?xf32>)
%res2 = linalg.batch_matmul
ins(%ta3, %b3: tensor<?x?x?xf32>, memref<?x?x?xf32>)
outs(%tc3: tensor<?x?x?xf32>)
-> tensor<?x?x?xf32>
return %res1, %res2 : tensor<?x?x?xf32>, tensor<?x?x?xf32>
}
Expand Down
81 changes: 48 additions & 33 deletions mlir/test/Dialect/Linalg/sparse_1d.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@
// CHECK: }
func @add_d(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
%0 = linalg.generic #trait_d
ins(%arga: tensor<32xf32>) {
^bb(%a: f32):
ins(%arga: tensor<32xf32>)
outs(%arga: tensor<32xf32>) {
^bb(%a: f32, %s : f32):
%0 = addf %a, %argb : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
Expand All @@ -58,8 +59,9 @@ func @add_d(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
// CHECK: }
func @mul_d(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
%0 = linalg.generic #trait_d
ins(%arga: tensor<32xf32>) {
^bb(%a: f32):
ins(%arga: tensor<32xf32>)
outs(%arga: tensor<32xf32>) {
^bb(%a: f32, %s : f32):
%0 = mulf %a, %argb : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
Expand Down Expand Up @@ -124,8 +126,9 @@ func @mul_d(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
// CHECK: }
func @add_s(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
%0 = linalg.generic #trait_s
ins(%arga: tensor<32xf32>) {
^bb(%a: f32):
ins(%arga: tensor<32xf32>)
outs(%arga: tensor<32xf32>) {
^bb(%a: f32, %s : f32):
%0 = addf %a, %argb : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
Expand Down Expand Up @@ -159,8 +162,9 @@ func @add_s(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
// CHECK: }
func @repeated_add_s(%arga: tensor<32xf32>) -> tensor<32xf32> {
%0 = linalg.generic #trait_s
ins(%arga: tensor<32xf32>) {
^bb(%a: f32):
ins(%arga: tensor<32xf32>)
outs(%arga: tensor<32xf32>) {
^bb(%a: f32, %s : f32):
%0 = addf %a, %a : f32 // same tensor
%1 = addf %a, %a : f32 // should yield
%2 = addf %0, %1 : f32 // one guard
Expand Down Expand Up @@ -192,8 +196,9 @@ func @repeated_add_s(%arga: tensor<32xf32>) -> tensor<32xf32> {
// CHECK: }
func @mul_s(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
%0 = linalg.generic #trait_s
ins(%arga: tensor<32xf32>) {
^bb(%a: f32):
ins(%arga: tensor<32xf32>)
outs(%arga: tensor<32xf32>) {
^bb(%a: f32, %s : f32):
%0 = mulf %a, %argb : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
Expand Down Expand Up @@ -235,8 +240,9 @@ func @mul_s(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
// CHECK: }
func @add_dd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
%0 = linalg.generic #trait_dd
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>)
outs(%arga : tensor<32xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
Expand All @@ -263,8 +269,9 @@ func @add_dd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
// CHECK: }
func @mul_dd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
%0 = linalg.generic #trait_dd
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>)
outs(%arga : tensor<32xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
Expand Down Expand Up @@ -335,8 +342,9 @@ func @mul_dd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
// CHECK: }
func @add_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
%0 = linalg.generic #trait_ds
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>)
outs(%arga : tensor<32xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
Expand Down Expand Up @@ -368,8 +376,9 @@ func @add_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
// CHECK: }
func @mul_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
%0 = linalg.generic #trait_ds
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>)
outs(%arga : tensor<32xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
Expand Down Expand Up @@ -440,8 +449,9 @@ func @mul_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
// CHECK: }
func @add_sd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
%0 = linalg.generic #trait_sd
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>)
outs(%arga : tensor<32xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
Expand Down Expand Up @@ -473,8 +483,9 @@ func @add_sd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
// CHECK: }
func @mul_sd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
%0 = linalg.generic #trait_sd
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>)
outs(%arga : tensor<32xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
Expand Down Expand Up @@ -569,8 +580,9 @@ func @mul_sd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
// CHECK: }
func @add_ss(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
%0 = linalg.generic #trait_ss
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>)
outs(%arga : tensor<32xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
Expand Down Expand Up @@ -628,8 +640,9 @@ func @add_ss(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
// CHECK: }
func @mul_ss(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
%0 = linalg.generic #trait_ss
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>)
outs(%arga : tensor<32xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
Expand Down Expand Up @@ -730,8 +743,9 @@ func @mul_ss(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
func @two_way_inv(%arga: tensor<16xf32>,
%argb: tensor<16xf32>, %argc: f32) -> tensor<16xf32> {
%0 = linalg.generic #trait_two_way_inv
ins(%arga, %argb : tensor<16xf32>, tensor<16xf32>) {
^bb(%a : f32, %b : f32):
ins(%arga, %argb : tensor<16xf32>, tensor<16xf32>)
outs(%argb : tensor<16xf32>) {
^bb(%a : f32, %b : f32, %c : f32):
%0 = mulf %a, %argc : f32
%1 = mulf %b, %argc : f32
%2 = addf %0, %1 : f32
Expand Down Expand Up @@ -819,8 +833,9 @@ func @two_way_inv_alt(%arga: tensor<16xf32>,
%argb: tensor<16xf32>, %argc: f32) -> tensor<16xf32> {
// Same kernel, but now expressed as "x(i) = (a(i) + b(i)) * c".
%0 = linalg.generic #trait_two_way_inv
ins(%arga, %argb : tensor<16xf32>, tensor<16xf32>) {
^bb(%a : f32, %b : f32):
ins(%arga, %argb : tensor<16xf32>, tensor<16xf32>)
outs(%argb : tensor<16xf32>) {
^bb(%a : f32, %b : f32, %c : f32):
%0 = addf %a, %b : f32
%1 = mulf %0, %argc : f32
linalg.yield %1: f32
Expand Down Expand Up @@ -866,7 +881,7 @@ func @two_way_inv_alt(%arga: tensor<16xf32>,
func @sum_reduction(%arga: tensor<?xf32>, %argx: tensor<f32>) -> tensor<f32> {
%0 = linalg.generic #trait_sum_reduction
ins(%arga : tensor<?xf32>)
init(%argx : tensor<f32>) {
outs(%argx : tensor<f32>) {
^bb(%a : f32, %x : f32):
%0 = addf %x, %a : f32
linalg.yield %0: f32
Expand Down Expand Up @@ -975,7 +990,7 @@ func @sum_reduction_ss(%arga: tensor<16xf32>,
// as two separate reductions kernels.
%0 = linalg.generic #trait_sum_reduction_ss
ins(%arga, %argb: tensor<16xf32>, tensor<16xf32>)
init(%argx : tensor<f32>) {
outs(%argx : tensor<f32>) {
^bb(%a : f32, %b : f32, %x : f32):
%0 = addf %a, %b : f32
%1 = addf %x, %0 : f32
Expand Down Expand Up @@ -1094,7 +1109,7 @@ func @sum_reduction_inv(%arga: tensor<16xf32>,
// as two separate reductions kernels.
%0 = linalg.generic #trait_sum_reduction_inv_ss
ins(%arga, %argb, %argc : tensor<16xf32>, tensor<f32>, tensor<16xf32>)
init(%argx : tensor<f32>) {
outs(%argx : tensor<f32>) {
^bb(%a : f32, %b : f32, %c : f32, %x : f32):
%0 = mulf %a, %b : f32
%1 = addf %0, %c : f32
Expand Down
81 changes: 47 additions & 34 deletions mlir/test/Dialect/Linalg/sparse_2d.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@
// CHECK: }
func @add_dd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> {
%0 = linalg.generic #trait_dd
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>)
outs(%arga: tensor<32x16xf32>) {
^bb(%a: f32, %b: f32, %s: f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16xf32>
Expand Down Expand Up @@ -70,8 +71,9 @@ func @add_dd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16
// CHECK: }
func @mul_dd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> {
%0 = linalg.generic #trait_dd
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>)
outs(%arga : tensor<32x16xf32>) {
^bb(%a: f32, %b: f32, %s: f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16xf32>
Expand Down Expand Up @@ -146,8 +148,9 @@ func @mul_dd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16
// CHECK: }
func @add_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> {
%0 = linalg.generic #trait_ds
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>)
outs(%arga : tensor<32x16xf32>) {
^bb(%a: f32, %b: f32, %s: f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16xf32>
Expand Down Expand Up @@ -183,8 +186,9 @@ func @add_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16
// CHECK: }
func @mul_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> {
%0 = linalg.generic #trait_ds
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>)
outs(%arga : tensor<32x16xf32>) {
^bb(%a: f32, %b: f32, %s: f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16xf32>
Expand Down Expand Up @@ -264,8 +268,9 @@ func @mul_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16
// CHECK: }
func @add_sd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> {
%0 = linalg.generic #trait_sd
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>)
outs(%arga : tensor<32x16xf32>) {
^bb(%a: f32, %b: f32, %s: f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16xf32>
Expand Down Expand Up @@ -302,8 +307,9 @@ func @add_sd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16
// CHECK: }
func @mul_sd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> {
%0 = linalg.generic #trait_sd
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>)
outs(%arga : tensor<32x16xf32>) {
^bb(%a: f32, %b: f32, %s: f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16xf32>
Expand Down Expand Up @@ -409,8 +415,9 @@ func @mul_sd(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16
// CHECK: }
func @add_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> {
%0 = linalg.generic #trait_ss
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>)
outs(%arga : tensor<32x16xf32>) {
^bb(%a: f32, %b: f32, %s: f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16xf32>
Expand Down Expand Up @@ -450,8 +457,9 @@ func @add_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16
// CHECK: }
func @mul_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> {
%0 = linalg.generic #trait_ss
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>)
outs(%arga : tensor<32x16xf32>) {
^bb(%a: f32, %b: f32, %s: f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16xf32>
Expand Down Expand Up @@ -627,8 +635,9 @@ func @mul_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16
// CHECK: }
func @add_ss_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> {
%0 = linalg.generic #trait_ss_ss
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>)
outs(%arga : tensor<32x16xf32>) {
^bb(%a: f32, %b: f32, %s: f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16xf32>
Expand Down Expand Up @@ -721,8 +730,9 @@ func @add_ss_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32
// CHECK: }
func @mul_ss_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> {
%0 = linalg.generic #trait_ss_ss
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>)
outs(%arga : tensor<32x16xf32>) {
^bb(%a: f32, %b: f32, %s: f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16xf32>
Expand Down Expand Up @@ -898,8 +908,9 @@ func @mul_ss_ss(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32
// CHECK: }
func @add_sd_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> {
%0 = linalg.generic #trait_ss_ss
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>)
outs(%arga : tensor<32x16xf32>) {
^bb(%a: f32, %b: f32, %s: f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16xf32>
Expand Down Expand Up @@ -992,8 +1003,9 @@ func @add_sd_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32
// CHECK: }
func @mul_sd_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32x16xf32> {
%0 = linalg.generic #trait_ss_ss
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16xf32>, tensor<32x16xf32>)
outs(%arga : tensor<32x16xf32>) {
^bb(%a: f32, %b: f32, %s: f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16xf32>
Expand Down Expand Up @@ -1048,8 +1060,8 @@ func @mul_sd_ds(%arga: tensor<32x16xf32>, %argb: tensor<32x16xf32>) -> tensor<32
// CHECK: }
func @matvec(%argA: tensor<16x32xf32>, %argb: tensor<32xf32>, %argx: tensor<16xf32>) -> tensor<16xf32> {
%0 = linalg.generic #trait_matvec
ins(%argA, %argb : tensor<16x32xf32>, tensor<32xf32>)
init(%argx : tensor<16xf32>) {
ins(%argA, %argb : tensor<16x32xf32>, tensor<32xf32>)
outs(%argx : tensor<16xf32>) {
^bb(%A: f32, %b: f32, %x: f32):
%0 = mulf %A, %b : f32
%1 = addf %0, %x : f32
Expand Down Expand Up @@ -1099,8 +1111,8 @@ func @matvec(%argA: tensor<16x32xf32>, %argb: tensor<32xf32>, %argx: tensor<16xf
// CHECK: }
func @sum_reduction(%arga: tensor<10x20xf32>, %argx: tensor<f32>) -> tensor<f32> {
%0 = linalg.generic #trait_sum_reduction
ins(%arga : tensor<10x20xf32>)
init(%argx : tensor<f32>) {
ins(%arga : tensor<10x20xf32>)
outs(%argx : tensor<f32>) {
^bb(%a : f32, %x : f32):
%0 = addf %x, %a : f32
linalg.yield %0: f32
Expand Down Expand Up @@ -1150,8 +1162,9 @@ func @sum_reduction(%arga: tensor<10x20xf32>, %argx: tensor<f32>) -> tensor<f32>
func @scale(%arga: tensor<?x?xf64>) -> tensor<?x?xf64> {
%0 = constant 2.0 : f64
%1 = linalg.generic #trait_scale
ins(%arga: tensor<?x?xf64>) {
^bb(%a: f64):
ins(%arga: tensor<?x?xf64>)
outs(%arga: tensor<?x?xf64>) {
^bb(%a: f64, %s: f64):
%2 = mulf %a, %0 : f64
linalg.yield %2 : f64
} -> tensor<?x?xf64>
Expand Down Expand Up @@ -1224,10 +1237,10 @@ func @scale(%arga: tensor<?x?xf64>) -> tensor<?x?xf64> {
func @sampled_dense_dense(%args: tensor<?x?xf32>,
%arga: tensor<?x?xf32>,
%argb: tensor<?x?xf32>,
%argx: tensor<?x?xf32>) -> tensor<?x?xf32> {
%argx: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.generic #trait_sampled_dense_dense
ins(%args, %arga, %argb : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)
init(%argx : tensor<?x?xf32>) {
ins(%args, %arga, %argb : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)
outs(%argx : tensor<?x?xf32>) {
^bb(%s : f32, %a : f32, %b : f32, %x : f32):
%0 = mulf %a, %b : f32
%1 = mulf %s, %0 : f32
Expand Down Expand Up @@ -1457,7 +1470,7 @@ func @sum_kernel_with_inv(%arga: tensor<?x?xf32>,
tensor<?x?xf32>,
tensor<?xf32>,
tensor<f32>)
init(%argx : tensor<?xf32>) {
outs(%argx : tensor<?xf32>) {
^bb(%a : f32, %b : f32, %c : f32, %d : f32, %e : f32, %x : f32):
%0 = mulf %a, %b : f32
%1 = mulf %0, %d : f32
Expand Down
101 changes: 60 additions & 41 deletions mlir/test/Dialect/Linalg/sparse_3d.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@
// CHECK: }
func @add_ddd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_ddd
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s: f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
Expand Down Expand Up @@ -76,8 +77,9 @@ func @add_ddd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<
// CHECK: }
func @mul_ddd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_ddd
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
Expand Down Expand Up @@ -157,8 +159,9 @@ func @mul_ddd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<
// CHECK: }
func @add_dds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_dds
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
Expand Down Expand Up @@ -199,8 +202,9 @@ func @add_dds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<
// CHECK: }
func @mul_dds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_dds
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
Expand Down Expand Up @@ -284,8 +288,9 @@ func @mul_dds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<
// CHECK: }
func @add_dsd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_dsd
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
Expand Down Expand Up @@ -326,8 +331,9 @@ func @add_dsd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<
// CHECK: }
func @mul_dsd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_dsd
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
Expand Down Expand Up @@ -437,8 +443,9 @@ func @mul_dsd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<
// CHECK: }
func @add_dss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_dss
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
Expand Down Expand Up @@ -482,8 +489,9 @@ func @add_dss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<
// CHECK: }
func @mul_dss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_dss
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
Expand Down Expand Up @@ -572,8 +580,9 @@ func @mul_dss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<
// CHECK: }
func @add_sdd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_sdd
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
Expand Down Expand Up @@ -615,8 +624,9 @@ func @add_sdd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<
// CHECK: }
func @mul_sdd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_sdd
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
Expand Down Expand Up @@ -731,8 +741,9 @@ func @mul_sdd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<
// CHECK: }
func @add_sds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_sds
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
Expand Down Expand Up @@ -777,8 +788,9 @@ func @add_sds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<
// CHECK: }
func @mul_sds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_sds
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
Expand Down Expand Up @@ -897,8 +909,9 @@ func @mul_sds(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<
// CHECK: }
func @add_ssd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_ssd
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
Expand Down Expand Up @@ -943,8 +956,9 @@ func @add_ssd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<
// CHECK: }
func @mul_ssd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_ssd
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
Expand Down Expand Up @@ -1089,8 +1103,9 @@ func @mul_ssd(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<
// CHECK: }
func @add_sss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_sss
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
Expand Down Expand Up @@ -1138,8 +1153,9 @@ func @add_sss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<
// CHECK: }
func @mul_sss(%arga: tensor<32x16x8xf32>, %argb: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
%0 = linalg.generic #trait_sss
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32):
ins(%arga, %argb: tensor<32x16x8xf32>, tensor<32x16x8xf32>)
outs(%arga : tensor<32x16x8xf32>) {
^bb(%a: f32, %b: f32, %s : f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32x16x8xf32>
Expand Down Expand Up @@ -1213,8 +1229,8 @@ func @kernel_3d(%arga: tensor<?x?xf32>,
%argc: tensor<?x?xf32>,
%argd: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.generic #trait_kernel_3d
ins(%argb, %argc, %argd : tensor<?x?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)
init(%arga : tensor<?x?xf32>) {
ins(%argb, %argc, %argd : tensor<?x?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)
outs(%arga : tensor<?x?xf32>) {
^bb(%b: f32, %c: f32, %d : f32, %a : f32):
%0 = mulf %b, %c : f32
%1 = mulf %0, %d : f32
Expand Down Expand Up @@ -1275,8 +1291,8 @@ func @kernel_3d(%arga: tensor<?x?xf32>,
// CHECK: }
func @sum_reduction(%arga: tensor<10x20x30xf32>, %argx: tensor<f32>) -> tensor<f32> {
%0 = linalg.generic #trait_sum_reduction
ins(%arga : tensor<10x20x30xf32>)
init(%argx : tensor<f32>) {
ins(%arga : tensor<10x20x30xf32>)
outs(%argx : tensor<f32>) {
^bb(%a : f32, %x : f32):
%0 = addf %x, %a : f32
linalg.yield %0: f32
Expand Down Expand Up @@ -1334,7 +1350,7 @@ func @sum_reduction_inv(%arga: tensor<?x?x?xf32>,
%argx: tensor<f32>) -> tensor<f32> {
%0 = linalg.generic #trait_sum_reduction_inv
ins(%arga, %argb : tensor<?x?x?xf32>, tensor<?xf32>)
init(%argx : tensor<f32>) {
outs(%argx : tensor<f32>) {
^bb(%a : f32, %b : f32, %x : f32):
%0 = mulf %a, %b : f32
%1 = addf %x, %0 : f32
Expand Down Expand Up @@ -1363,7 +1379,8 @@ func @sum_reduction_inv(%arga: tensor<?x?x?xf32>,
// CHECK-LABEL: func @invariants(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<10xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<20xf32>,
// CHECK-SAME: %[[VAL_2:.*]]: tensor<30xf32>) -> tensor<10x20x30xf32> {
// CHECK-SAME: %[[VAL_2:.*]]: tensor<30xf32>,
// CHECK-SAME: %[[SHAPE:.*]]: tensor<10x20x30xf32>) -> tensor<10x20x30xf32> {
// CHECK: %[[VAL_3:.*]] = constant 10 : index
// CHECK: %[[VAL_4:.*]] = constant 20 : index
// CHECK: %[[VAL_5:.*]] = constant 30 : index
Expand All @@ -1390,10 +1407,12 @@ func @sum_reduction_inv(%arga: tensor<?x?x?xf32>,
// CHECK: }
func @invariants(%arga: tensor<10xf32>,
%argb: tensor<20xf32>,
%argc: tensor<30xf32>) -> tensor<10x20x30xf32> {
%argc: tensor<30xf32>,
%shape : tensor<10x20x30xf32>) -> tensor<10x20x30xf32> {
%0 = linalg.generic #trait_invariants
ins(%arga, %argb, %argc : tensor<10xf32>, tensor<20xf32>, tensor<30xf32>) {
^bb(%a : f32, %b : f32, %c : f32):
ins(%arga, %argb, %argc : tensor<10xf32>, tensor<20xf32>, tensor<30xf32>)
outs(%shape : tensor<10x20x30xf32>) {
^bb(%a : f32, %b : f32, %c : f32, %s : f32):
%0 = mulf %a, %b : f32
%1 = mulf %0, %c : f32
linalg.yield %1: f32
Expand Down
112 changes: 24 additions & 88 deletions mlir/test/Dialect/Linalg/sparse_invalid.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,14 @@
iterator_types = ["parallel"]
}

func @invalid_memref(%arga: memref<32xf32>, %argb: f32) -> tensor<32xf32> {
func @invalid_memref(%arga: memref<32xf32>, %argb: f32, %shape: tensor<32xf32>)
-> tensor<32xf32>
{
// expected-error@+1 {{'linalg.generic' op expected sparse annotations on tensors only}}
%0 = linalg.generic #trait_memref
ins(%arga: memref<32xf32>) {
^bb(%a: f32):
ins(%arga: memref<32xf32>)
outs(%shape: tensor<32xf32>) {
^bb(%a: f32, %s: f32):
%0 = addf %a, %argb : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
Expand All @@ -25,79 +28,6 @@ func @invalid_memref(%arga: memref<32xf32>, %argb: f32) -> tensor<32xf32> {

// -----

#trait_two_out = {
indexing_maps = [
affine_map<(i) -> (i)>, // a
affine_map<(i) -> (i)>, // x (out)
affine_map<(i) -> (i)> // y (out)
],
sparse = [
[ "S" ], // a
[ "D" ], // x
[ "D" ] // y
],
iterator_types = ["parallel"]
}

func @invalid_two_out(%arga: tensor<32xf32>) -> tensor<32xf32> {
// expected-error@+1 {{'linalg.generic' op expected single output tensor}}
%0, %1 = linalg.generic #trait_two_out
ins(%arga: tensor<32xf32>) {
^bb(%a: f32):
%0 = addf %a, %a : f32
linalg.yield %a, %0 : f32, f32
} -> tensor<32xf32>, tensor<32xf32>
return %1 : tensor<32xf32>
}

// -----

#trait_two_blocks = {
indexing_maps = [
affine_map<(i) -> (i)>, // a
affine_map<(i) -> (i)> // x (out)
],
sparse = [
[ "S" ], // a
[ "D" ] // x
],
iterator_types = ["parallel"]
}

func @invalid_two_blocks(%arga: tensor<32xf32>) -> tensor<32xf32> {
// expected-error@+1 {{'linalg.generic' op expects region #0 to have 0 or 1 blocks}}
%0 = linalg.generic #trait_two_blocks
ins(%arga: tensor<32xf32>) {
^bb1(%a: f32):
%0 = addf %a, %a : f32
^bb2:
linalg.yield %0 : f32
} -> tensor<32xf32>
return %0 : tensor<32xf32>
}

// -----

#trait_no_block = {
indexing_maps = [
affine_map<(i) -> (i)> // a
],
sparse = [
[ "S" ] // a
],
iterator_types = ["parallel"]
}

func @invalid_no_block(%arga: tensor<32xf32>) {
// expected-error@+1 {{'linalg.generic' op expected region with 1 block}}
linalg.generic #trait_no_block
ins(%arga: tensor<32xf32>) {
}
return
}

// -----

#trait_too_many = {
indexing_maps = [
affine_map<(i) -> (i)>, // a
Expand All @@ -114,8 +44,9 @@ func @invalid_no_block(%arga: tensor<32xf32>) {
func @invalid_too_many(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
// expected-error@+1 {{'linalg.generic' op expected one sparse annotation for each tensor}}
%0 = linalg.generic #trait_too_many
ins(%arga: tensor<32xf32>) {
^bb(%a: f32):
ins(%arga: tensor<32xf32>)
outs(%arga: tensor<32xf32>) {
^bb(%a: f32, %s: f32):
%0 = addf %a, %argb : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
Expand All @@ -136,8 +67,9 @@ func @invalid_too_many(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
func @invalid_no_array(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
// expected-error@+1 {{'linalg.generic' op expected sparse annotation array for tensor 0}}
%0 = linalg.generic #trait_no_array
ins(%arga: tensor<32xf32>) {
^bb(%a: f32):
ins(%arga: tensor<32xf32>)
outs(%arga: tensor<32xf32>) {
^bb(%a: f32, %s: f32):
%0 = addf %a, %argb : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
Expand All @@ -161,8 +93,9 @@ func @invalid_no_array(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
func @invalid_wrong_rank(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
// expected-error@+1 {{'linalg.generic' op expected sparse annotation with rank 1 for tensor 1}}
%0 = linalg.generic #trait_wrong_rank
ins(%arga: tensor<32xf32>) {
^bb(%a: f32):
ins(%arga: tensor<32xf32>)
outs(%arga: tensor<32xf32>) {
^bb(%a: f32, %s: f32):
%0 = addf %a, %argb : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
Expand All @@ -186,8 +119,9 @@ func @invalid_wrong_rank(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
func @invalid_no_string(%arga: tensor<32x16xf32>, %argb: f32) -> tensor<32x16xf32> {
// expected-error@+1 {{'linalg.generic' op expected sparse annotation at position 1 for tensor 0}}
%0 = linalg.generic #trait_no_string
ins(%arga: tensor<32x16xf32>) {
^bb(%a: f32):
ins(%arga: tensor<32x16xf32>)
outs(%arga: tensor<32x16xf32>) {
^bb(%a: f32, %s: f32):
%0 = addf %a, %argb : f32
linalg.yield %0 : f32
} -> tensor<32x16xf32>
Expand All @@ -211,8 +145,9 @@ func @invalid_no_string(%arga: tensor<32x16xf32>, %argb: f32) -> tensor<32x16xf3
func @invalid_wrong_symbol(%arga: tensor<32x16xf32>, %argb: f32) -> tensor<32x16xf32> {
// expected-error@+1 {{'linalg.generic' op expected sparse annotation at position 1 for tensor 1}}
%0 = linalg.generic #trait_wrong_symbol
ins(%arga: tensor<32x16xf32>) {
^bb(%a: f32):
ins(%arga: tensor<32x16xf32>)
outs(%arga: tensor<32x16xf32>) {
^bb(%a: f32, %s: f32):
%0 = addf %a, %argb : f32
linalg.yield %0 : f32
} -> tensor<32x16xf32>
Expand All @@ -236,8 +171,9 @@ func @invalid_wrong_symbol(%arga: tensor<32x16xf32>, %argb: f32) -> tensor<32x16
func @invalid_no_sparse_output(%arga: tensor<32x16xf32>, %argb: f32) -> tensor<32x16xf32> {
// expected-error@+1 {{'linalg.generic' op sparse output tensors not supported (yet)}}
%0 = linalg.generic #trait_no_sparse_output
ins(%arga: tensor<32x16xf32>) {
^bb(%a: f32):
ins(%arga: tensor<32x16xf32>)
outs(%arga: tensor<32x16xf32>) {
^bb(%a: f32, %s: f32):
%0 = addf %a, %argb : f32
linalg.yield %0 : f32
} -> tensor<32x16xf32>
Expand Down
12 changes: 7 additions & 5 deletions mlir/test/Dialect/Linalg/sparse_parallel.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,9 @@
//
func @scale_dd(%scale: f32, %arga: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.generic #trait_dd
ins(%arga: tensor<?x?xf32>) {
^bb(%a: f32):
ins(%arga: tensor<?x?xf32>)
outs(%arga: tensor<?x?xf32>) {
^bb(%a: f32, %s: f32):
%0 = mulf %a, %scale : f32
linalg.yield %0 : f32
} -> tensor<?x?xf32>
Expand Down Expand Up @@ -99,8 +100,9 @@ func @scale_dd(%scale: f32, %arga: tensor<?x?xf32>) -> tensor<?x?xf32> {
//
func @scale_ss(%scale: f32, %arga: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.generic #trait_ss
ins(%arga: tensor<?x?xf32>) {
^bb(%a: f32):
ins(%arga: tensor<?x?xf32>)
outs(%arga: tensor<?x?xf32>) {
^bb(%a: f32, %s: f32):
%0 = mulf %a, %scale : f32
linalg.yield %0 : f32
} -> tensor<?x?xf32>
Expand Down Expand Up @@ -151,7 +153,7 @@ func @scale_ss(%scale: f32, %arga: tensor<?x?xf32>) -> tensor<?x?xf32> {
func @matvec(%argA: tensor<16x32xf32>, %argb: tensor<32xf32>, %argx: tensor<16xf32>) -> tensor<16xf32> {
%0 = linalg.generic #trait_matvec
ins(%argA, %argb : tensor<16x32xf32>, tensor<32xf32>)
init(%argx : tensor<16xf32>) {
outs(%argx : tensor<16xf32>) {
^bb(%A: f32, %b: f32, %x: f32):
%0 = mulf %A, %b : f32
%1 = addf %0, %x : f32
Expand Down
5 changes: 3 additions & 2 deletions mlir/test/Dialect/Linalg/sparse_storage.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,9 @@

func @mul_dd(%arga: tensor<32xf64>, %argb: tensor<32xf64>) -> tensor<32xf64> {
%0 = linalg.generic #trait_mul_1d
ins(%arga, %argb: tensor<32xf64>, tensor<32xf64>) {
^bb(%a: f64, %b: f64):
ins(%arga, %argb: tensor<32xf64>, tensor<32xf64>)
outs(%arga : tensor<32xf64>) {
^bb(%a: f64, %b: f64, %s: f64):
%0 = mulf %a, %b : f64
linalg.yield %0 : f64
} -> tensor<32xf64>
Expand Down
4 changes: 2 additions & 2 deletions mlir/test/Dialect/Linalg/tile-and-distribute.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -198,14 +198,14 @@ func @matmul_tensors(
// CHECK: %[[sTB:.*]] = subtensor %[[TB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
// CHECK: %[[sTC:.*]] = subtensor %[[TC2]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
// CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<?x?xf32>, tensor<?x?xf32>)
// CHECK-SAME: init(%[[sTC]] : tensor<?x?xf32>) -> tensor<?x?xf32>
// CHECK-SAME: outs(%[[sTC]] : tensor<?x?xf32>) -> tensor<?x?xf32>
// CHECK: %[[TD:.*]] = subtensor_insert %[[sTD]] into %[[TC2]][{{.*}}] : tensor<?x?xf32> into tensor<?x?xf32>
// CHECK: scf.yield %[[TD]] : tensor<?x?xf32>
// CHECK: scf.yield %[[TD2]] : tensor<?x?xf32>
// CHECK: scf.yield %[[TD1]] : tensor<?x?xf32>
%0 = linalg.matmul {__internal_linalg_transform__ = "tensors_distribute1"}
ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
init(%arg2: tensor<?x?xf32>)
outs(%arg2: tensor<?x?xf32>)
-> tensor<?x?xf32>

// CHECK: return %[[TD0]] : tensor<?x?xf32>
Expand Down
8 changes: 4 additions & 4 deletions mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

func @matmul_tensors(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> {
%t0 = linalg.matmul ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
init(%arg2: tensor<?x?xf32>)
outs(%arg2: tensor<?x?xf32>)
-> tensor<?x?xf32>

%c4 = constant 4 : index
Expand All @@ -25,7 +25,7 @@ func @matmul_tensors(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tens
%6 = subtensor %t0[%arg3, %arg7][%c2, 4][1, 1] : tensor<?x?xf32> to tensor<?x4xf32>
%7 = subtensor %arg1[%arg7, %arg5][4, %c3][1, 1] : tensor<?x?xf32> to tensor<4x?xf32>
%8 = subtensor %arg8[%arg3, %arg5][%c2, %c3][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
%9 = linalg.matmul ins(%6, %7 : tensor<?x4xf32>, tensor<4x?xf32>) init(%8 : tensor<?x?xf32>) -> tensor<?x?xf32>
%9 = linalg.matmul ins(%6, %7 : tensor<?x4xf32>, tensor<4x?xf32>) outs(%8 : tensor<?x?xf32>) -> tensor<?x?xf32>
%10 = subtensor_insert %9 into %arg8[%arg3, %arg5] [%c2, %c3] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
scf.yield %10 : tensor<?x?xf32>
}
Expand Down Expand Up @@ -53,6 +53,6 @@ func @matmul_tensors(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tens
// subtensors of the producing matmul.
// CHECK-DAG: %[[stB2:.*]] = subtensor %[[B]][0, %[[K]]] [%[[dA1]], 4] [1, 1] : tensor<?x?xf32> to tensor<?x4xf32>
// CHECK-DAG: %[[stC:.*]] = subtensor %[[C]][%[[I]], %[[K]]] [2, 4] [1, 1] : tensor<?x?xf32> to tensor<2x4xf32>
// CHECK: %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor<2x?xf32>, tensor<?x4xf32>) init(%[[stC]] : tensor<2x4xf32>) -> tensor<2x4xf32>
// CHECK-NEXT: %[[stG:.*]] = linalg.matmul ins(%[[stD]], %[[stB1]] : tensor<2x4xf32>, tensor<4x3xf32>) init(%[[stF]] : tensor<2x3xf32>) -> tensor<2x3xf32>
// CHECK: %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor<2x?xf32>, tensor<?x4xf32>) outs(%[[stC]] : tensor<2x4xf32>) -> tensor<2x4xf32>
// CHECK-NEXT: %[[stG:.*]] = linalg.matmul ins(%[[stD]], %[[stB1]] : tensor<2x4xf32>, tensor<4x3xf32>) outs(%[[stF]] : tensor<2x3xf32>) -> tensor<2x3xf32>
// CHECK-NEXT: subtensor_insert %[[stG]] into %[[RES]][%[[I]], %[[J]]]
6 changes: 3 additions & 3 deletions mlir/test/Dialect/Linalg/tile-tensors.mlir
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,4" -mlir-disable-threading=true | FileCheck %s
// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,4" | FileCheck %s

// CHECK-LABEL: func @matmul_tensors(
// CHECK-SAME: %[[TA:[0-9a-z]+]]: tensor<?x?xf32>
Expand All @@ -14,13 +14,13 @@ func @matmul_tensors(
// CHECK: %[[sTB:.*]] = subtensor %[[TB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
// CHECK: %[[sTC:.*]] = subtensor %[[TC2]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
// CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<?x?xf32>, tensor<?x?xf32>)
// CHECK-SAME: init(%[[sTC]] : tensor<?x?xf32>) -> tensor<?x?xf32>
// CHECK-SAME: outs(%[[sTC]] : tensor<?x?xf32>) -> tensor<?x?xf32>
// CHECK: %[[TD:.*]] = subtensor_insert %[[sTD]] into %[[TC2]][{{.*}}] : tensor<?x?xf32> into tensor<?x?xf32>
// CHECK: scf.yield %[[TD]] : tensor<?x?xf32>
// CHECK: scf.yield %[[TD2]] : tensor<?x?xf32>
// CHECK: scf.yield %[[TD1]] : tensor<?x?xf32>
%0 = linalg.matmul ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
init(%arg2: tensor<?x?xf32>)
outs(%arg2: tensor<?x?xf32>)
-> tensor<?x?xf32>

// CHECK: return %[[TD0]] : tensor<?x?xf32>
Expand Down
12 changes: 7 additions & 5 deletions mlir/test/EDSC/builder-api-test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1101,7 +1101,7 @@ TEST_FUNC(linalg_metadata_ops) {
// CHECK-SAME: affine_map<(d0, d1, d2) -> (d0, d1)>],
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]
// CHECK-SAME: ins(%{{[a-z0-9]*}}, %{{[a-z0-9]*}} : tensor<?x?xf32>, memref<?x?xf32>)
// CHECK-SAME: init(%{{[a-z0-9]*}} : tensor<?x?xf32>)
// CHECK-SAME: outs(%{{[a-z0-9]*}} : tensor<?x?xf32>)
// CHECK: mulf
// CHECK: addf
// CHECK: } -> tensor<?x?xf32>
Expand All @@ -1115,14 +1115,15 @@ TEST_FUNC(linalg_tensors_test) {
{ShapedType::kDynamicSize, ShapedType::kDynamicSize}, f32Type, {}, 0);
auto tensorType = RankedTensorType::get(
{ShapedType::kDynamicSize, ShapedType::kDynamicSize}, f32Type);
auto f = makeFunction("linalg_tensors", {}, {tensorType, memrefType});
auto f =
makeFunction("linalg_tensors", {}, {tensorType, memrefType, tensorType});

OpBuilder builder(f.getBody());
ScopedContext scope(builder, f.getLoc());
Value A(f.getArgument(0)), B(f.getArgument(1));
Value A(f.getArgument(0)), B(f.getArgument(1)), C(f.getArgument(2));
AffineExpr i, j;
bindDims(&globalContext(), i, j);
StructuredIndexed SA(A), SB(B), SC(tensorType);
StructuredIndexed SA(A), SB(B), SC(C);
Value added = linalg_generic_pointwise_add(SA({i, j}), SB({i, j}), SC({i, j}))
->getResult(0);
Value maxed = linalg_generic_pointwise_max(
Expand Down Expand Up @@ -1223,7 +1224,8 @@ TEST_FUNC(builder_loop_for_yield) {
[&](Value iv, ValueRange args) {
Value sum = args[0] + args[1];
return scf::ValueVector{args[1], sum};
}).getResults();
})
.getResults();
results[0] + results[1];

// clang-format off
Expand Down
3 changes: 0 additions & 3 deletions mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
// ODS-LABEL: def Test1Op : LinalgStructuredBase_Op<"test1", [
// ODS-NEXT: AttrSizedOperandSegments
// ODS-NEXT: DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
// ODS-NEXT: NamedStructuredOpTrait
// ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp">
//
// IMPL-LABEL: ArrayAttr Test1Op::iterator_types() {
Expand All @@ -29,7 +28,6 @@ def test1(A: f32(M, K), B: f32(K)) -> (C: f32(M)) {
// ODS-LABEL: def Test2Op : LinalgStructuredBase_Op<"test2", [
// ODS-NEXT: AttrSizedOperandSegments
// ODS-NEXT: DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
// ODS-NEXT: NamedStructuredOpTrait
// ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp">
//
// IMPL-LABEL: ArrayAttr Test2Op::iterator_types() {
Expand All @@ -54,7 +52,6 @@ def test2(A: f32(M, K), B: f32(K, N)) -> (C: f32(M, N)) {
// ODS-LABEL: def Test3Op : LinalgStructuredBase_Op<"test3", [
// ODS-NEXT: AttrSizedOperandSegments
// ODS-NEXT: DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
// ODS-NEXT: NamedStructuredOpTrait
// ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp">
//
// IMPL-LABEL: ArrayAttr Test3Op::iterator_types() {
Expand Down
28 changes: 9 additions & 19 deletions mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1453,54 +1453,45 @@ void TCParser::printODS(llvm::raw_ostream &os, StringRef cppOpName,
const char *header = R"FMT( def {0} : LinalgStructuredBase_Op<"{1}", [
AttrSizedOperandSegments,
DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
NamedStructuredOpTrait,
SingleBlockImplicitTerminator<"YieldOp">]> {
let arguments = (ins Variadic<AnyShaped>:$inputs,
Variadic<AnyMemRef>:$output_buffers,
Variadic<AnyRankedTensor>:$init_tensors);
Variadic<AnyShaped>:$outputs);
let results = (outs Variadic<AnyRankedTensor>:$result_tensors);
let regions = (region AnyRegion:$region);
let skipDefaultBuilders = 1;
let builders = [ OpBuilderDAG<
(ins "ValueRange":$inputs, "ValueRange":$outputBuffers),
(ins "ValueRange":$inputs, "ValueRange":$outputs),
[{{
$_state.addOperands(inputs);
$_state.addOperands(outputBuffers);
$_state.addOperands(outputs);
$_state.addAttribute(
"operand_segment_sizes",
$_builder.getI32VectorAttr({{
static_cast<int32_t>(inputs.size()),
static_cast<int32_t>(outputBuffers.size()),
static_cast<int32_t>(0)}));
static_cast<int32_t>(outputs.size())}));
buildNamedStructuredOpRegionAndAttributes<{0}>(
$_builder,
$_state,
TypeRange(inputs),
TypeRange(outputBuffers),
TypeRange(),
TypeRange());
TypeRange(outputs));
}]>, OpBuilderDAG<
(ins "TypeRange":$resultTensorTypes, "ValueRange":$inputs,
"ValueRange":$outputBuffers, "ValueRange":$initTensors),
"ValueRange":$outputs),
[{{
$_state.addOperands(inputs);
$_state.addOperands(outputBuffers);
$_state.addOperands(initTensors);
$_state.addOperands(outputs);
$_state.addTypes(resultTensorTypes);
$_state.addAttribute(
"operand_segment_sizes",
$_builder.getI32VectorAttr({{
static_cast<int32_t>(inputs.size()),
static_cast<int32_t>(outputBuffers.size()),
static_cast<int32_t>(initTensors.size())}));
static_cast<int32_t>(outputs.size())}));
buildNamedStructuredOpRegionAndAttributes<{0}>(
$_builder,
$_state,
TypeRange(inputs),
TypeRange(outputBuffers),
TypeRange(initTensors),
resultTensorTypes);
TypeRange(outputs));
}]>, OpBuilderDAG<
(ins "TypeRange":$resultTensorTypes, "ValueRange":$operands,
CArg<"ArrayRef<NamedAttribute>", "{{}">:$attributes),
Expand All @@ -1513,7 +1504,6 @@ void TCParser::printODS(llvm::raw_ostream &os, StringRef cppOpName,
];
let printer = [{{ return ::printNamedStructuredOp(p, *this); }];
let parser = [{{ return ::parseNamedStructuredOp<{0}>(parser, result); }];
let verifier = [{{ return ::verifyNamedStructuredOp(*this); }];
let hasFolder = 1;
let hasCanonicalizer = 1;
Expand Down