16 changes: 8 additions & 8 deletions mlir/test/Dialect/Linalg/roundtrip.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@ func @ops(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>,
// -----

func @fill_view(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: f32) {
linalg.fill(%arg1, %arg0) : f32, memref<?xf32, offset: ?, strides: [1]>
linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<?xf32, offset: ?, strides: [1]>)
return
}
// CHECK-LABEL: func @fill_view(
// CHECK: %{{.*}}: memref<?xf32, #[[$strided1D]]>, %{{.*}}: f32) {
// CHECK: linalg.fill(%{{.*}}, %{{.*}}) : f32, memref<?xf32, #[[$strided1D]]>
// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%{{.*}} : memref<?xf32, #[[$strided1D]]>)

// -----

Expand All @@ -84,12 +84,12 @@ func @transpose(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {


func @fill_view3(%arg0: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg1: f32) {
linalg.fill(%arg1, %arg0) : f32, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>)
return
}
// CHECK-LABEL: func @fill_view3(
// CHECK: %{{.*}}: memref<?x?x?xf32, #[[$strided3D]]>, %{{.*}}: f32) {
// CHECK: linalg.fill(%{{.*}}, %{{.*}}) : f32, memref<?x?x?xf32, #[[$strided3D]]>
// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%{{.*}} : memref<?x?x?xf32, #[[$strided3D]]>)

// -----

Expand Down Expand Up @@ -208,9 +208,9 @@ func @generic_with_multiple_tensor_outputs(
-> (tensor<i32>, tensor<i32>) {
%c0 = arith.constant 0 : index
%0 = linalg.init_tensor [] : tensor<i32>
%1 = linalg.fill(%arg2, %0) : i32, tensor<i32> -> tensor<i32>
%1 = linalg.fill ins(%arg2 : i32) outs(%0 : tensor<i32>) -> tensor<i32>
%2 = linalg.init_tensor [] : tensor<i32>
%3 = linalg.fill(%arg2, %2) : i32, tensor<i32> -> tensor<i32>
%3 = linalg.fill ins(%arg2 : i32) outs(%2 : tensor<i32>) -> tensor<i32>
%4:2 = linalg.generic {
indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>, affine_map<(d0) -> ()>],
iterator_types = ["reduction"]}
Expand Down Expand Up @@ -346,7 +346,7 @@ func @init_tensor(%arg0 : index, %arg1 : index)

func @fill_tensor(%arg0 : index, %arg1 : index, %arg2 : f32) -> tensor<?x?xf32> {
%0 = linalg.init_tensor [%arg0, %arg1] : tensor<?x?xf32>
%1 = linalg.fill(%arg2, %0) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
%1 = linalg.fill ins(%arg2 : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
return %1 : tensor<?x?xf32>
}
// CHECK: %{{.+}} = linalg.fill(%{{.+}}, %{{.+}}) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
// CHECK: %{{.+}} = linalg.fill ins(%{{.+}} : f32) outs(%{{.+}} : tensor<?x?xf32>) -> tensor<?x?xf32>
2 changes: 1 addition & 1 deletion mlir/test/Dialect/Linalg/tile-and-fuse-no-fuse.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ builtin.func @no_fuse_gemm(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>) ->
%d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
%init = linalg.init_tensor [%d0, %d1] : tensor<?x?xf32>
%fill = linalg.fill(%cst, %init) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
%result = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%fill : tensor<?x?xf32>) -> tensor<?x?xf32>
return %result : tensor<?x?xf32>
Expand Down
28 changes: 14 additions & 14 deletions mlir/test/Dialect/Linalg/tile-and-fuse-on-tensors.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ builtin.func @fuse_input(%arg0: tensor<24x12xf32>,
%c24 = arith.constant 24 : index
%c4 = arith.constant 4 : index
%cst = arith.constant 0.000000e+00 : f32
%0 = linalg.fill(%cst, %arg0) : f32, tensor<24x12xf32> -> tensor<24x12xf32>
%0 = linalg.fill ins(%cst : f32) outs(%arg0 : tensor<24x12xf32>) -> tensor<24x12xf32>

// MATMUL: scf.for %[[IV0:[0-9a-zA-Z]*]] =
// MATMUL: scf.for %[[IV1:[0-9a-zA-Z]*]] =
Expand All @@ -31,7 +31,7 @@ builtin.func @fuse_input(%arg0: tensor<24x12xf32>,
// MATMUL: %[[T0:.*]] = tensor.extract_slice %[[ARG0]]
// MATMUL-SAME: %[[IV1]], %[[IV2]]
// MATMUL-SAME: %[[UB1]], %[[UB2]]
// MATMUL: %[[T1:.*]] = linalg.fill(%{{.*}}, %[[T0]])
// MATMUL: %[[T1:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[T0]]
// MATMUL: %{{.*}} = linalg.matmul ins(%[[T1]]
%1 = linalg.matmul ins(%0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
return %1 : tensor<24x25xf32>
Expand All @@ -55,7 +55,7 @@ builtin.func @fuse_output(%arg0: tensor<24x12xf32>,
%c24 = arith.constant 24 : index
%c4 = arith.constant 4 : index
%cst = arith.constant 0.000000e+00 : f32
%0 = linalg.fill(%cst, %arg2) : f32, tensor<24x25xf32> -> tensor<24x25xf32>
%0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>

// Update the iteration argument of the outermost tile loop.
// MATMUL: scf.for %[[IV0:.*]] = {{.*}} iter_args(%[[ARG3:.*]] = %[[ARG2]]
Expand All @@ -67,7 +67,7 @@ builtin.func @fuse_output(%arg0: tensor<24x12xf32>,
// MATMUL: %[[T0:.*]] = tensor.extract_slice %[[ARG4]]
// MATMUL-SAME: %[[IV1]], %[[IV0]]
// MATMUL-SAME: %[[TS1]], %[[TS0]]
// MATMUL: %[[T1:.*]] = linalg.fill(%{{.*}}, %[[T0]])
// MATMUL: %[[T1:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[T0]]
// MATMUL: scf.for %[[IV2:.*]] = {{.*}} iter_args(%[[ARG5:.*]] = %[[T1]]

// Check there is an extract/insert slice pair for the output operand.
Expand Down Expand Up @@ -184,19 +184,19 @@ builtin.func @fuse_input_and_output(%arg0: tensor<24x12xf32>,
%c24 = arith.constant 24 : index
%c4 = arith.constant 4 : index
%cst = arith.constant 0.000000e+00 : f32
%0 = linalg.fill(%cst, %arg0) : f32, tensor<24x12xf32> -> tensor<24x12xf32>
%1 = linalg.fill(%cst, %arg2) : f32, tensor<24x25xf32> -> tensor<24x25xf32>
%0 = linalg.fill ins(%cst : f32) outs(%arg0 : tensor<24x12xf32>) -> tensor<24x12xf32>
%1 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>

// Fuse both producers to the appropriate tile loops.
// MATMUL: scf.for %[[IV0:.*]] = {{.*}} iter_args(%[[ARG3:.*]] = %[[ARG2]]
// MATMUL: scf.for %[[IV1:.*]] = {{.*}} iter_args(%[[ARG4:.*]] = %[[ARG3]]
// MATMUL: %[[T0:.*]] = tensor.extract_slice %[[ARG4]]
// MATMUL-SAME: %[[IV1]], %[[IV0]]
// MATMUL: %[[T1:.*]] = linalg.fill(%{{.*}}, %[[T0]])
// MATMUL: %[[T1:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[T0]]
// MATMUL: scf.for %[[IV2:.*]] = {{.*}} iter_args(%[[ARG5:.*]] = %[[T1]]
// MATMUL: %[[T2:.*]] = tensor.extract_slice %[[ARG0]]
// MATMUL-SAME: %[[IV1]], %[[IV2]]
// MATMUL: %[[T3:.*]] = linalg.fill(%{{.*}}, %[[T2]])
// MATMUL: %[[T3:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[T2]]
// MATMUL: %[[T4:.*]] = tensor.extract_slice %[[ARG5]]
// MATMUL: %{{.*}} = linalg.matmul ins(%[[T3]], {{.*}} outs(%[[T4]]
%2 = linalg.matmul ins(%0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%1 : tensor<24x25xf32>) -> tensor<24x25xf32>
Expand Down Expand Up @@ -255,19 +255,19 @@ builtin.func @fuse_indexed(%arg0: tensor<24x12xi32>,
func @fuse_outermost_reduction(%arg0: tensor<10x17xf32>,
%arg1: tensor<10xf32>) -> tensor<10xf32> {
%cst = arith.constant 0.000000e+00 : f32
%0 = linalg.fill(%cst, %arg0) : f32, tensor<10x17xf32> -> tensor<10x17xf32>
%0 = linalg.fill ins(%cst : f32) outs(%arg0 : tensor<10x17xf32>) -> tensor<10x17xf32>

// Cannot fuse the output fill since the reduction loop is the outermost loop.
// GENERIC: %[[T0:.*]] = linalg.fill(%{{.*}}, %[[ARG1]])
%1 = linalg.fill(%cst, %arg1) : f32, tensor<10xf32> -> tensor<10xf32>
// GENERIC: %[[T0:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[ARG1]]
%1 = linalg.fill ins(%cst : f32) outs(%arg1 : tensor<10xf32>) -> tensor<10xf32>

// GENERIC: scf.for %[[IV0:[0-9a-zA-Z]*]] = {{.*}} iter_args(%[[ARG2:.*]] = %[[T0]]
// GENERIC: scf.for %[[IV1:[0-9a-zA-Z]*]] = {{.*}} iter_args(%[[ARG3:.*]] = %[[ARG2]]

// MATMUL the input fill has been fused.
// GENERIC: %[[T1:.*]] = tensor.extract_slice %[[ARG0]]
// GENERIC-SAME: %[[IV1]], %[[IV0]]
// GENERIC: %[[T2:.*]] = linalg.fill(%{{.*}}, %[[T1]])
// GENERIC: %[[T2:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[T1]]
// GENERIC: %[[T3:.*]] = tensor.extract_slice %[[ARG3]]
// GENERIC-SAME: %[[IV1]]
// GENERIC: linalg.generic {{.*}} ins(%[[T2]] {{.*}} outs(%[[T3]]
Expand Down Expand Up @@ -298,7 +298,7 @@ func @fuse_non_rectangular(%arg0: tensor<10x17xf32>,
// GENERIC-DAG: %[[C8:.*]] = arith.constant 8 : index
// GENERIC-DAG: %[[C10:.*]] = arith.constant 10 : index
%cst = arith.constant 0.000000e+00 : f32
%0 = linalg.fill(%cst, %arg0) : f32, tensor<10x17xf32> -> tensor<10x17xf32>
%0 = linalg.fill ins(%cst : f32) outs(%arg0 : tensor<10x17xf32>) -> tensor<10x17xf32>

// GENERIC: scf.for %[[IV0:[0-9a-zA-Z]*]] = %[[C0]] to %[[C8]] step %[[C4]]
// GENERIC: scf.for %[[IV1:[0-9a-zA-Z]*]] = %[[C0]] to %[[C10]] step %[[C5]]
Expand All @@ -313,7 +313,7 @@ func @fuse_non_rectangular(%arg0: tensor<10x17xf32>,
// GENERIC: %[[T0:.*]] = tensor.extract_slice %[[ARG0]]
// GENERIC-SAME: %[[IV1]], %[[SUM]]
// GENERIC-SAME: , %[[UB1]]
// GENERIC: %[[T1:.*]] = linalg.fill(%{{.*}}, %[[T0]])
// GENERIC: %[[T1:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[T0]]
%1 = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel"]} ins(%0 : tensor<10x17xf32>) outs(%arg1 : tensor<10x8xf32>) {
^bb0(%arg2: f32, %arg3: f32):
%2 = arith.addf %arg2, %arg3 : f32
Expand Down
24 changes: 12 additions & 12 deletions mlir/test/Dialect/Linalg/tile-and-fuse-sequence-on-tensors.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ builtin.func @fuse_conv_chain(%arg0: tensor<2x2xf32>,
%cst = arith.constant 1.0 : f32

// Do not tile the filter fill since the filter dimensions are not tiled.
// CONV: %[[T0:.*]] = linalg.fill(%{{.*}}, %[[ARG0]])
%0 = linalg.fill(%cst, %arg0) : f32, tensor<2x2xf32> -> tensor<2x2xf32>
// CONV: %[[T0:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[ARG0]]
%0 = linalg.fill ins(%cst : f32) outs(%arg0 : tensor<2x2xf32>) -> tensor<2x2xf32>

// Fuse all other operations.
// CONV: scf.for %[[IV0:.*]] = {{.*}} iter_args(%[[ARG5:.*]] = %[[ARG4]]
Expand All @@ -26,24 +26,24 @@ builtin.func @fuse_conv_chain(%arg0: tensor<2x2xf32>,
// CONV-SAME: %[[IV0]], %[[IV1]]
// CONV: %[[T2:.*]] = tensor.extract_slice %[[ARG2]]
// CONV-SAME: %[[IV0]], %[[IV1]]
// CONV: %[[T3:.*]] = linalg.fill(%{{.*}}, %[[T2]])
// CONV: %[[T3:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[T2]]
// CONV: %[[T4:.*]] = linalg.conv_2d ins(%[[T1]], %[[T0]] : {{.*}} outs(%[[T3]]
%1 = linalg.fill(%cst, %arg2) : f32, tensor<10x10xf32> -> tensor<10x10xf32>
%1 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<10x10xf32>) -> tensor<10x10xf32>
%2 = linalg.conv_2d ins(%arg1, %0 : tensor<11x11xf32>, tensor<2x2xf32>) outs(%1 : tensor<10x10xf32>) -> tensor<10x10xf32>

// CONV: %[[T5:.*]] = tensor.extract_slice %[[ARG3]]
// CONV-SAME: %[[IV0]], %[[IV1]]
// CONV: %[[T6:.*]] = linalg.fill(%{{.*}}, %[[T5]])
// CONV: %[[T6:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[T5]]
// CONV: %[[T7:.*]] = linalg.conv_2d ins(%[[T4]], %[[T0]] : {{.*}} outs(%[[T6]]
%3 = linalg.fill(%cst, %arg3) : f32, tensor<9x9xf32> -> tensor<9x9xf32>
%3 = linalg.fill ins(%cst : f32) outs(%arg3 : tensor<9x9xf32>) -> tensor<9x9xf32>
%4 = linalg.conv_2d ins(%2, %0 : tensor<10x10xf32>, tensor<2x2xf32>) outs(%3 : tensor<9x9xf32>) -> tensor<9x9xf32>

// Use the argument passed in by iteration argument.
// CONV: %[[T8:.*]] = tensor.extract_slice %[[ARG6]]
// CONV-SAME: %[[IV0]], %[[IV1]]
// CONV: %[[T9:.*]] = linalg.fill(%{{.*}}, %[[T8]])
// CONV: %[[T9:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[T8]]
// CONV: %[[T5:.*]] = linalg.conv_2d ins(%[[T7]], %[[T0]] {{.*}} outs(%[[T9]]
%5 = linalg.fill(%cst, %arg4) : f32, tensor<8x8xf32> -> tensor<8x8xf32>
%5 = linalg.fill ins(%cst : f32) outs(%arg4 : tensor<8x8xf32>) -> tensor<8x8xf32>
%6 = linalg.conv_2d ins(%4, %0 : tensor<9x9xf32>, tensor<2x2xf32>) outs(%5 : tensor<8x8xf32>) -> tensor<8x8xf32>
return %6 : tensor<8x8xf32>
}
Expand All @@ -61,23 +61,23 @@ builtin.func @fuse_matmul_chain(%arg0: tensor<8x8xf32>) -> tensor<8x8xf32> {
%cst = arith.constant 0.000000e+00 : f32

// Do not tile rhs fill of the producer matmul since none of its loop dimension is tiled.
// MATMUL: %[[T0:.*]] = linalg.fill(%{{.*}}, %[[ARG0]])
%0 = linalg.fill(%cst, %arg0) : f32, tensor<8x8xf32> -> tensor<8x8xf32>
// MATMUL: %[[T0:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[ARG0]]
%0 = linalg.fill ins(%cst : f32) outs(%arg0 : tensor<8x8xf32>) -> tensor<8x8xf32>

// MATMUL: scf.for %[[IV0:.*]] = {{.*}} iter_args(%[[ARG1:.*]] = %[[ARG0]]
// MATMUL: scf.for %[[IV1:.*]] = {{.*}} iter_args(%[[ARG2:.*]] = %[[ARG1]]

// Only the outermost loop of the producer matmul is tiled.
// MATMUL: %[[T1:.*]] = tensor.extract_slice %[[ARG0]]
// MATMUL-SAME: %[[IV0]], 0
// MATMUL: %[[T2:.*]] = linalg.fill(%{{.*}}, %[[T1]])
// MATMUL: %[[T2:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[T1]]
// MATMUL: %[[T3:.*]] = linalg.matmul ins(%[[T2]], %[[T0]] {{.*}}
%1 = linalg.matmul ins(%0, %0 : tensor<8x8xf32>, tensor<8x8xf32>) outs(%0 : tensor<8x8xf32>) -> tensor<8x8xf32>

// Use the argument passed in by iteration argument.
// MATMUL: %[[T4:.*]] = tensor.extract_slice %[[ARG2]]
// MATMUL-SAME: %[[IV0]], %[[IV1]]
// MATMUL: %[[T5:.*]] = linalg.fill(%{{.*}}, %[[T4]])
// MATMUL: %[[T5:.*]] = linalg.fill ins(%{{.*}}{{.*}}outs(%[[T4]]
// MATMUL: %{{.*}} = linalg.matmul ins(%[[T3]], {{.*}} outs(%[[T5]]
%2 = linalg.matmul ins(%1, %0 : tensor<8x8xf32>, tensor<8x8xf32>) outs(%0 : tensor<8x8xf32>) -> tensor<8x8xf32>
return %2 : tensor<8x8xf32>
Expand Down
10 changes: 5 additions & 5 deletions mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ func @conv_tensors_static(%input: tensor<1x225x225x3xf32>, %filter: tensor<3x3x3
%cst = arith.constant 0.0 : f32

%init = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
%fill = linalg.fill(%cst, %init) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>

%conv = linalg.conv_2d_nhwc_hwcf
{dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
Expand Down Expand Up @@ -119,7 +119,7 @@ func @conv_tensors_static(%input: tensor<1x225x225x3xf32>, %filter: tensor<3x3x3
// CHECK-SAME: (%[[INPUT:.+]]: tensor<1x225x225x3xf32>, %[[FILTER:.+]]: tensor<3x3x3x32xf32>, %[[ELEM:.+]]: tensor<1x112x112x32xf32>)

// CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 112, 112, 32] : tensor<1x112x112x32xf32>
// CHECK-NEXT: %[[FILL:.+]] = linalg.fill(%cst, %[[INIT]]) : f32, tensor<1x112x112x32xf32> -> tensor<1x112x112x32xf32>
// CHECK-NEXT: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>

// CHECK-NEXT: scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG0:.+]] = %[[FILL]])
// CHECK-NEXT: %[[OFFSET_H:.+]] = affine.apply #[[MAP0]](%[[IV0]])
Expand Down Expand Up @@ -157,7 +157,7 @@ func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?x?x?x
%oc = tensor.dim %elementwise, %c3 : tensor<?x?x?x?xf32>

%init = linalg.init_tensor [%n, %oh, %ow, %oc] : tensor<?x?x?x?xf32>
%fill = linalg.fill(%cst, %init) : f32, tensor<?x?x?x?xf32> -> tensor<?x?x?x?xf32>
%fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>

%conv = linalg.conv_2d_nhwc_hwcf
{dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
Expand Down Expand Up @@ -226,7 +226,7 @@ func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?x?x?x
// CHECK-DAG: %[[ELEM_OC:.+]] = tensor.dim %[[ELEM]], %[[C3]] : tensor<?x?x?x?xf32>

// CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[ELEM_N]], %[[ELEM_OH]], %[[ELEM_OW]], %[[ELEM_OC]]] : tensor<?x?x?x?xf32>
// CHECK: %[[FILL:.+]] = linalg.fill(%cst, %[[INIT]]) : f32, tensor<?x?x?x?xf32> -> tensor<?x?x?x?xf32>
// CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>

// CHECK-DAG: %[[FILTER_H:.+]] = tensor.dim %[[FILTER]], %[[C0]] : tensor<?x?x?x?xf32>
// CHECK-DAG: %[[FILTER_W:.+]] = tensor.dim %[[FILTER]], %[[C1]] : tensor<?x?x?x?xf32>
Expand Down Expand Up @@ -310,7 +310,7 @@ func @pad_generic_static(%small_input: tensor<58x1xf32>, %large_input: tensor<64
tensor.yield %zero : f32
} : tensor<58x1xf32> to tensor<64x128xf32>

%fill = linalg.fill(%zero, %large_input) : f32, tensor<64x128xf32> -> tensor<64x128xf32>
%fill = linalg.fill ins(%zero : f32) outs(%large_input : tensor<64x128xf32>) -> tensor<64x128xf32>

%for0 = scf.for %iv0 = %c0 to %d0 step %c16 iter_args(%arg0 = %fill) -> tensor<64x128xf32> {
%for1 = scf.for %iv1 = %c0 to %d1 step %c32 iter_args(%arg1 = %arg0) -> tensor<64x128xf32> {
Expand Down
4 changes: 2 additions & 2 deletions mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func @fill_matmul_tensors(
// CHECK: %[[STEPX:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSX]], %[[C8]]]
// CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<?x?xf32>) {
// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[TC1]]
// CHECK: %[[FILL:.+]] = linalg.fill(%{{.+}}, %[[SLICE]])
// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[SLICE]]
// CHECK: %[[sTD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[FILL]]) -> (tensor<?x?xf32>) {
// CHECK: %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
// CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
Expand All @@ -42,7 +42,7 @@ func @fill_matmul_tensors(
%0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
%1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
%2 = linalg.init_tensor [%0, %1] : tensor<?x?xf32>
%3 = linalg.fill(%cst, %2) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
%3 = linalg.fill ins(%cst : f32) outs(%2 : tensor<?x?xf32>) -> tensor<?x?xf32>
%4 = linalg.matmul {__internal_linalg_transform__ = "tensors_fuse_distribute1"}
ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
outs(%3: tensor<?x?xf32>)
Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func @tiled_and_peeled_matmul(%arg0: tensor<257x259xf32>, %arg1: tensor<259x258x
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%c32 = arith.constant 32 : index
%0 = linalg.fill(%cst, %arg2) : f32, tensor<257x258xf32> -> tensor<257x258xf32>
%0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<257x258xf32>) -> tensor<257x258xf32>
%1 = scf.for %arg3 = %c0 to %c257 step %c64 iter_args(%arg4 = %0) -> (tensor<257x258xf32>) {
%2 = affine.min #map0(%arg3)
%3 = tensor.extract_slice %arg0[%arg3, 0] [%2, 259] [1, 1] : tensor<257x259xf32> to tensor<?x259xf32>
Expand Down
14 changes: 7 additions & 7 deletions mlir/test/Dialect/Linalg/tile.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -254,35 +254,35 @@ func @dot(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf32, of
// TILE-234: linalg.dot ins(%[[sAi]], %[[sBi]]{{.*}} outs(

func @fill_static(%arg0: memref<127x99xf32>, %arg1: f32) {
linalg.fill(%arg1, %arg0) : f32, memref<127x99xf32>
linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<127x99xf32>)
return
}
// TILE-2-LABEL: func @fill_static
// TILE-2: for
// TILE-2-NOT: for
// TILE-2: memref.subview{{.*}} : memref<127x99xf32>
// TILE-2: linalg.fill{{.*}} : f32, memref<?x99xf32, #[[$stride_99_1_layout_map]]>
// TILE-2: linalg.fill{{.*}} : memref<?x99xf32, #[[$stride_99_1_layout_map]]>

// TILE-02-LABEL: func @fill_static
// TILE-02: for
// TILE-02-NOT: for
// TILE-02: memref.subview{{.*}} : memref<127x99xf32>
// TILE-02: linalg.fill{{.*}} : f32, memref<127x?xf32, #[[$stride_99_1_layout_map]]>
// TILE-02: linalg.fill{{.*}} : memref<127x?xf32, #[[$stride_99_1_layout_map]]>

// TILE-002-LABEL: func @fill_static
// TILE-002-NOT: for
// TILE-002: linalg.fill{{.*}} f32, memref<127x99xf32>
// TILE-002: linalg.fill{{.*}} : memref<127x99xf32>

// TILE-234-LABEL: func @fill_static
// TILE-234: for
// TILE-234: for
// TILE-234-NOT: for
// TILE-234: memref.subview{{.*}} : memref<127x99xf32>
// TILE-234: linalg.fill{{.*}} : f32, memref<?x3xf32, #[[$stride_99_1_layout_map]]>
// TILE-234: linalg.fill{{.*}} : memref<?x3xf32, #[[$stride_99_1_layout_map]]>


func @fill(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: f32) {
linalg.fill(%arg1, %arg0) : f32, memref<?x?xf32, offset: ?, strides: [?, 1]>
linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<?x?xf32, offset: ?, strides: [?, 1]>)
return
}
// TILE-2-LABEL: func @fill
Expand Down Expand Up @@ -318,7 +318,7 @@ func @pointwise(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memre
linalg.generic #pointwise_2d_trait
ins(%arg0, %arg1 : memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?xf32, offset: ?, strides: [?, 1]>)
outs(%arg2 : memref<?x?xf32, offset: ?, strides: [?, 1]>) {
^bb0(%arg4: f32, %arg5: f32, %arg6: f32):
^bb0(%arg4: f32, %arg5: f32, %arg6: f32):
%4 = arith.addf %arg4, %arg5 : f32
linalg.yield %4 : f32
}
Expand Down
16 changes: 8 additions & 8 deletions mlir/test/Dialect/Linalg/transform-patterns.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,8 @@ func @aligned_promote_fill(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>) {
%cf = arith.constant 1.0 : f32
%3 = memref.subview %arg0[%c0, %c0][%c2000, %c4000][%c1, %c1] :
memref<?x?xf32, offset: ?, strides: [?, 1]> to memref<?x?xf32, offset: ?, strides: [?, ?]>
linalg.fill(%cf, %3) { __internal_linalg_transform__ = "_promote_views_aligned_"}
: f32, memref<?x?xf32, offset: ?, strides: [?, ?]>
linalg.fill { __internal_linalg_transform__ = "_promote_views_aligned_"}
ins(%cf : f32) outs(%3 : memref<?x?xf32, offset: ?, strides: [?, ?]>)
return
}
// CHECK-LABEL: func @aligned_promote_fill
Expand All @@ -306,9 +306,9 @@ func @aligned_promote_fill(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>) {
// CHECK: %[[a0:.*]] = memref.alloc() {alignment = 32 : i64} : memref<32000000xi8>
// CHECK: %[[v0:.*]] = memref.view %[[a0]]{{.*}} : memref<32000000xi8> to memref<?x?xf32>
// CHECK: %[[l0:.*]] = memref.subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf32> to memref<?x?xf32, #[[$STRIDED_2D_u_1]]>
// CHECK: linalg.fill({{.*}}, %[[v0]]) : f32, memref<?x?xf32>
// CHECK: linalg.fill ins({{.*}} : f32) outs(%[[v0]] : memref<?x?xf32>)
// CHECK: memref.copy %[[s0]], %[[l0]] : memref<?x?xf32, #map{{.*}}> to memref<?x?xf32, #map{{.*}}>
// CHECK: linalg.fill(%[[cf]], %[[v0]]) : f32, memref<?x?xf32>
// CHECK: linalg.fill ins(%[[cf]] : f32) outs(%[[v0]] : memref<?x?xf32>)

func @aligned_promote_fill_complex(%arg0: memref<?x?xcomplex<f32>, offset: ?, strides: [?, 1]>) {
%c2000 = arith.constant 2000 : index
Expand All @@ -319,8 +319,8 @@ func @aligned_promote_fill_complex(%arg0: memref<?x?xcomplex<f32>, offset: ?, st
%cc = complex.create %cf, %cf : complex<f32>
%3 = memref.subview %arg0[%c0, %c0][%c2000, %c4000][%c1, %c1] :
memref<?x?xcomplex<f32>, offset: ?, strides: [?, 1]> to memref<?x?xcomplex<f32>, offset: ?, strides: [?, ?]>
linalg.fill(%cc, %3) { __internal_linalg_transform__ = "_promote_views_aligned_"}
: complex<f32>, memref<?x?xcomplex<f32>, offset: ?, strides: [?, ?]>
linalg.fill { __internal_linalg_transform__ = "_promote_views_aligned_"}
ins(%cc : complex<f32>) outs(%3 : memref<?x?xcomplex<f32>, offset: ?, strides: [?, ?]>)
return
}
// CHECK-LABEL: func @aligned_promote_fill_complex
Expand All @@ -329,9 +329,9 @@ func @aligned_promote_fill_complex(%arg0: memref<?x?xcomplex<f32>, offset: ?, st
// CHECK: %[[a0:.*]] = memref.alloc() {alignment = 32 : i64} : memref<64000000xi8>
// CHECK: %[[v0:.*]] = memref.view %[[a0]]{{.*}} : memref<64000000xi8> to memref<?x?xcomplex<f32>>
// CHECK: %[[l0:.*]] = memref.subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xcomplex<f32>> to memref<?x?xcomplex<f32>, #[[$STRIDED_2D_u_1]]>
// CHECK: linalg.fill({{.*}}, %[[v0]]) : complex<f32>, memref<?x?xcomplex<f32>>
// CHECK: linalg.fill ins({{.*}} : complex<f32>) outs(%[[v0]] : memref<?x?xcomplex<f32>>)
// CHECK: memref.copy %[[s0]], %[[l0]] : memref<?x?xcomplex<f32>, #map{{.*}}> to memref<?x?xcomplex<f32>, #map{{.*}}>
// CHECK: linalg.fill(%[[cc]], %[[v0]]) : complex<f32>, memref<?x?xcomplex<f32>>
// CHECK: linalg.fill ins(%[[cc]] : complex<f32>) outs(%[[v0]] : memref<?x?xcomplex<f32>>)

func @tile_permute_parallel_loop(%arg0: memref<?x?xf32>,
%arg1: memref<?x?xf32>,
Expand Down
24 changes: 12 additions & 12 deletions mlir/test/Dialect/Linalg/vectorization.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ func @test_vectorize_scalar_input(%A : memref<8x16xf32>, %arg0 : f32) {
func @test_vectorize_fill(%A : memref<8x16xf32>, %arg0 : f32) {
// CHECK: %[[V:.*]] = vector.broadcast {{.*}} : f32 to vector<8x16xf32>
// CHECK: vector.transfer_write %[[V]], {{.*}} : vector<8x16xf32>, memref<8x16xf32>
linalg.fill(%arg0, %A) : f32, memref<8x16xf32>
linalg.fill ins(%arg0 : f32) outs(%A : memref<8x16xf32>)
return
}

Expand All @@ -202,7 +202,7 @@ func @test_vectorize_fill_scalar(%A : memref<f32>, %arg0 : f32) {
// CHECK-SAME: (%[[M:.*]]: memref<f32>, %[[val:.*]]: f32)
// CHECK: %[[VEC:.*]] = vector.broadcast %[[val]] : f32 to vector<f32>
// CHECK: vector.transfer_write %[[VEC]], %[[M]][] : vector<f32>, memref<f32>
linalg.fill(%arg0, %A) : f32, memref<f32>
linalg.fill ins(%arg0 : f32) outs(%A : memref<f32>)
return
}

Expand Down Expand Up @@ -590,7 +590,7 @@ func @pad_static_source(%arg0: tensor<2x5x2xf32>, %pad_value: f32) -> tensor<2x6
// CHECK: %[[V4:.*]] = arith.addi %[[DIM3]], %[[C3]] : index
// CHECK: %[[V5:.*]] = arith.addi %[[V4]], %[[C2]] : index
// CHECK: %[[INIT:.*]] = linalg.init_tensor [6, %[[V1]], %[[V2]], %[[V5]]] : tensor<6x?x?x?xf32>
// CHECK: %[[FILL:.*]] = linalg.fill(%{{.*}}, %[[INIT]]) : f32, tensor<6x?x?x?xf32> -> tensor<6x?x?x?xf32>
// CHECK: %[[FILL:.*]] = linalg.fill ins(%{{.*}} : f32) outs(%[[INIT]] : tensor<6x?x?x?xf32>) -> tensor<6x?x?x?xf32>
// CHECK: %[[SRCDIM:.*]] = tensor.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32>
// CHECK: %[[RESULT:.*]] = tensor.insert_slice %[[SRC]] into %[[FILL]][2, %[[LOW]], 3, 3] [1, 2, 2, %[[SRCDIM]]] [1, 1, 1, 1] : tensor<1x2x2x?xf32> into tensor<6x?x?x?xf32>
// CHECK: return %[[RESULT]]
Expand Down Expand Up @@ -833,7 +833,7 @@ func @red_max_2d(%arg0: tensor<4x4xf32>) -> tensor<4xf32> {
// CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
%ident = arith.constant -3.40282e+38 : f32
%init = linalg.init_tensor [4] : tensor<4xf32>
%fill = linalg.fill(%ident, %init) : f32, tensor<4xf32> -> tensor<4xf32>
%fill = linalg.fill ins(%ident : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0)>],
iterator_types = ["parallel", "reduction"]}
Expand All @@ -858,7 +858,7 @@ func @red_min_2d(%arg0: tensor<4x4xf32>) -> tensor<4xf32> {
// CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
%maxf32 = arith.constant 3.40282e+38 : f32
%init = linalg.init_tensor [4] : tensor<4xf32>
%fill = linalg.fill(%maxf32, %init) : f32, tensor<4xf32> -> tensor<4xf32>
%fill = linalg.fill ins(%maxf32 : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0)>],
iterator_types = ["parallel", "reduction"]}
Expand All @@ -881,7 +881,7 @@ func @red_mul_2d(%arg0: tensor<4x4xf32>) -> tensor<4xf32> {
// CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
%ident = arith.constant 1.0 : f32
%init = linalg.init_tensor [4] : tensor<4xf32>
%fill = linalg.fill(%ident, %init) : f32, tensor<4xf32> -> tensor<4xf32>
%fill = linalg.fill ins(%ident : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0)>],
iterator_types = ["parallel", "reduction"]}
Expand All @@ -904,7 +904,7 @@ func @red_or_2d(%arg0: tensor<4x4xi1>) -> tensor<4xi1> {
// CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
%ident = arith.constant false
%init = linalg.init_tensor [4] : tensor<4xi1>
%fill = linalg.fill(%ident, %init) : i1, tensor<4xi1> -> tensor<4xi1>
%fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1>
%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0)>],
iterator_types = ["parallel", "reduction"]}
Expand All @@ -927,7 +927,7 @@ func @red_and_2d(%arg0: tensor<4x4xi1>) -> tensor<4xi1> {
// CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
%ident = arith.constant true
%init = linalg.init_tensor [4] : tensor<4xi1>
%fill = linalg.fill(%ident, %init) : i1, tensor<4xi1> -> tensor<4xi1>
%fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1>
%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0)>],
iterator_types = ["parallel", "reduction"]}
Expand All @@ -950,7 +950,7 @@ func @red_xor_2d(%arg0: tensor<4x4xi1>) -> tensor<4xi1> {
// CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
%ident = arith.constant false
%init = linalg.init_tensor [4] : tensor<4xi1>
%fill = linalg.fill(%ident, %init) : i1, tensor<4xi1> -> tensor<4xi1>
%fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1>
%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0)>],
iterator_types = ["parallel", "reduction"]}
Expand All @@ -974,7 +974,7 @@ func @explicit_broadcast(%arg0: tensor<4x4xf32>, %arg1: tensor<4x1xf32>) -> tens
// CHECK: vector.transfer_write {{.*}} {in_bounds = [true, true]} : vector<4x4xf32>, tensor<4x4xf32>
%c0 = arith.constant 0.0 : f32
%init = linalg.init_tensor [4, 4] : tensor<4x4xf32>
%fill = linalg.fill(%c0, %init) : f32, tensor<4x4xf32> -> tensor<4x4xf32>
%fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4x4xf32>) -> tensor<4x4xf32>
%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0, 0)>,
affine_map<(d0, d1) -> (d0, d1)>],
Expand Down Expand Up @@ -1003,7 +1003,7 @@ func @fused_broadcast_red_2d(%arg0: tensor<4x4xf32>, %arg1: tensor<4x1xf32>) ->
// CHECK: vector.transfer_write {{.*}} {in_bounds = [true]} : vector<4xf32>, tensor<4xf32>
%c0 = arith.constant 0.0 : f32
%init = linalg.init_tensor [4] : tensor<4xf32>
%fill = linalg.fill(%c0, %init) : f32, tensor<4xf32> -> tensor<4xf32>
%fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
%red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0, 0)>,
affine_map<(d0, d1) -> (d0)>],
Expand Down Expand Up @@ -1034,7 +1034,7 @@ func @reduce_1d(%arg0: tensor<32xf32>) -> tensor<f32> {

// CHECK: %[[f:.*]] = vector.transfer_write %[[vF0]], %[[init]][]
// CHECK-SAME: : vector<f32>, tensor<f32>
%1 = linalg.fill(%f0, %0) : f32, tensor<f32> -> tensor<f32>
%1 = linalg.fill ins(%f0 : f32) outs(%0 : tensor<f32>) -> tensor<f32>
// CHECK: %[[r:.*]] = vector.transfer_read %[[A]][%[[C0]]]
// CHECK-SAME: : tensor<32xf32>, vector<32xf32>
// CHECK: %[[f0:.*]] = vector.extractelement %[[vF0]][] : vector<f32>
Expand Down
4 changes: 2 additions & 2 deletions mlir/test/Dialect/SparseTensor/conversion.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -446,8 +446,8 @@ func @sparse_insert(%arg0: tensor<128xf32, #SparseVector>,
// %[[V:.*]] = memref.alloca(%[[S]]) : memref<?xf64>
// %[[F:.*]] = memref.alloca(%[[S]]) : memref<?xi1>
// %[[A:.*]] = memref.alloca(%[[S]]) : memref<?xindex>
// linalg.fill(%{{.*}}, %[[V]]) : f64, memref<?xf64>
// linalg.fill(%{{.*}}, %[[F]]) : i1, memref<?xi1>
// linalg.fill ins(%{{.*}} : f64) outs(%[[V]] : memref<?xf64>)
// linalg.fill ins(%{{.*}} : i1) outs(%[[F]] : memref<?xi1>)
// CHECK: return
func @sparse_expansion() {
%c = arith.constant 8 : index
Expand Down
14 changes: 7 additions & 7 deletions mlir/test/Dialect/SparseTensor/conversion_sparse2dense.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
// CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<1xindex> to memref<?xindex>
// CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<i32>
// CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<13xi32>
// CHECK-DAG: linalg.fill(%[[zeroI32]], %[[M]]) : i32, memref<13xi32>
// CHECK-DAG: linalg.fill ins(%[[zeroI32]] : i32) outs(%[[M]] : memref<13xi32>)
// CHECK: scf.while : () -> () {
// CHECK: %[[Cond:.*]] = call @getNextI32(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<i32>) -> i1
// CHECK: scf.condition(%[[Cond]])
Expand Down Expand Up @@ -74,7 +74,7 @@ func @sparse_convert_1d(%arg0: tensor<13xi32, #SparseVector>) -> tensor<13xi32>
// CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<1xindex> to memref<?xindex>
// CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<i32>
// CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]]) : memref<?xi32>
// CHECK-DAG: linalg.fill(%[[zeroI32]], %[[M]]) : i32, memref<?xi32>
// CHECK-DAG: linalg.fill ins(%[[zeroI32]] : i32) outs(%[[M]] : memref<?xi32>)
// CHECK: scf.while : () -> () {
// CHECK: %[[Cond:.*]] = call @getNextI32(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<i32>) -> i1
// CHECK: scf.condition(%[[Cond]])
Expand Down Expand Up @@ -117,7 +117,7 @@ func @sparse_convert_1d_dyn(%arg0: tensor<?xi32, #SparseVector>) -> tensor<?xi32
// CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
// CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<2x4xf64>
// CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
// CHECK-DAG: linalg.fill(%[[E0]], %[[M]]) : f64, memref<2x4xf64>
// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x4xf64>)
// CHECK: scf.while : () -> () {
// CHECK: %[[Cond:.*]] = call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
// CHECK: scf.condition(%[[Cond]])
Expand Down Expand Up @@ -161,7 +161,7 @@ func @sparse_convert_2d(%arg0: tensor<2x4xf64, #SparseMatrix>) -> tensor<2x4xf64
// CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
// CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]]) : memref<?x4xf64>
// CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
// CHECK-DAG: linalg.fill(%[[E0]], %[[M]]) : f64, memref<?x4xf64>
// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<?x4xf64>)
// CHECK: scf.while : () -> () {
// CHECK: %[[Cond:.*]] = call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
// CHECK: scf.condition(%[[Cond]])
Expand Down Expand Up @@ -205,7 +205,7 @@ func @sparse_convert_2d_dyn0(%arg0: tensor<?x4xf64, #SparseMatrix>) -> tensor<?x
// CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
// CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI1]]) : memref<2x?xf64>
// CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
// CHECK-DAG: linalg.fill(%[[E0]], %[[M]]) : f64, memref<2x?xf64>
// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x?xf64>)
// CHECK: scf.while : () -> () {
// CHECK: %[[Cond:.*]] = call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
// CHECK: scf.condition(%[[Cond]])
Expand Down Expand Up @@ -249,7 +249,7 @@ func @sparse_convert_2d_dyn1(%arg0: tensor<2x?xf64, #SparseMatrix>) -> tensor<2x
// CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
// CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]], %[[SizeI1]]) : memref<?x?xf64>
// CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
// CHECK-DAG: linalg.fill(%[[E0]], %[[M]]) : f64, memref<?x?xf64>
// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<?x?xf64>)
// CHECK: scf.while : () -> () {
// CHECK: %[[Cond:.*]] = call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
// CHECK: scf.condition(%[[Cond]])
Expand Down Expand Up @@ -297,7 +297,7 @@ func @sparse_convert_2d_dyn2(%arg0: tensor<?x?xf64, #SparseMatrix>) -> tensor<?x
// CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
// CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<2x3x4xf64>
// CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
// CHECK-DAG: linalg.fill(%[[E0]], %[[M]]) : f64, memref<2x3x4xf64>
// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x3x4xf64>)
// CHECK: scf.while : () -> () {
// CHECK: %[[Cond:.*]] = call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
// CHECK: scf.condition(%[[Cond]])
Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Dialect/SparseTensor/sparse_1d.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func @add_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>) -> te
// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index
// CHECK: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
// CHECK: %[[VAL_7:.*]] = memref.alloc() : memref<32xf32>
// CHECK: linalg.fill(%[[VAL_3]], %[[VAL_7]]) : f32, memref<32xf32>
// CHECK: linalg.fill ins(%[[VAL_3]] : f32) outs(%[[VAL_7]] : memref<32xf32>)
// CHECK: scf.for %[[VAL_8:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_5]] {
// CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_8]]] : memref<?xf32>
// CHECK: %[[VAL_10:.*]] = arith.addf %[[VAL_9]], %[[VAL_1]] : f32
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ func @split_vector_transfer_read_2d(%A: memref<?x8xf32>, %i: index, %j: index) -
// LINALG: scf.yield %[[A]], %[[i]], %[[j]] : memref<?x8xf32>, index, index
// LINALG: } else {
// slow path, fill tmp alloc and yield a memref_casted version of it
// LINALG: linalg.fill(%cst, %[[alloc]]) : f32, memref<4x8xf32>
// LINALG: linalg.fill ins(%cst : f32) outs(%[[alloc]] : memref<4x8xf32>)
// LINALG: %[[d0:.*]] = memref.dim %[[A]], %[[c0]] : memref<?x8xf32>
// LINALG: %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[d0]], %[[i]], %[[c4]])
// LINALG: %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]])
Expand Down Expand Up @@ -168,7 +168,7 @@ func @split_vector_transfer_read_strided_2d(
// LINALG-SAME: memref<?x8xf32, #[[$map_2d_stride_1]]>, index, index
// LINALG: } else {
// slow path, fill tmp alloc and yield a memref_casted version of it
// LINALG: linalg.fill(%cst, %[[alloc]]) : f32, memref<4x8xf32>
// LINALG: linalg.fill ins(%cst : f32) outs(%[[alloc]] : memref<4x8xf32>)
// LINALG: %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[c7]], %[[i]], %[[c4]])
// LINALG: %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]])
// LINALG: %[[sv:.*]] = memref.subview %[[A]][%[[i]], %[[j]]] [%[[sv0]], %[[sv1]]] [1, 1]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ func @entry() {
%RHS10 = memref.alloc() {alignment = 64} : memref<1x10xf32>
%DST10 = memref.alloc() {alignment = 64} : memref<1x10xf32>

linalg.fill(%f1, %LHS10) : f32, memref<1x10xf32>
linalg.fill(%f1, %RHS10) : f32, memref<1x10xf32>
linalg.fill ins(%f1 : f32) outs(%LHS10 : memref<1x10xf32>)
linalg.fill ins(%f1 : f32) outs(%RHS10 : memref<1x10xf32>)

%LHS = memref.cast %LHS10 : memref<1x10xf32> to memref<?x?xf32>
%RHS = memref.cast %RHS10 : memref<1x10xf32> to memref<?x?xf32>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ func @entry() {
%RHS10 = memref.alloc() {alignment = 64} : memref<1x10xf32>
%DST10 = memref.alloc() {alignment = 64} : memref<1x10xf32>

linalg.fill(%f1, %LHS10) : f32, memref<1x10xf32>
linalg.fill(%f1, %RHS10) : f32, memref<1x10xf32>
linalg.fill ins(%f1 : f32) outs(%LHS10 : memref<1x10xf32>)
linalg.fill ins(%f1 : f32) outs(%RHS10 : memref<1x10xf32>)

%LHS = memref.cast %LHS10 : memref<1x10xf32> to memref<?x?xf32>
%RHS = memref.cast %RHS10 : memref<1x10xf32> to memref<?x?xf32>
Expand Down
12 changes: 6 additions & 6 deletions mlir/test/Integration/Dialect/Linalg/CPU/benchmark_matmul.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ func @main() {
%B = memref.alloc() : !row_major_B
%C = memref.alloc() : !row_major_C

linalg.fill(%v1, %A) : !elem_type_a, !row_major_A
linalg.fill(%v1, %B) : !elem_type_b, !row_major_B
linalg.fill(%v0, %C) : !elem_type_c, !row_major_C
linalg.fill ins(%v1 : !elem_type_a) outs(%A : !row_major_A)
linalg.fill ins(%v1 : !elem_type_b) outs(%B : !row_major_B)
linalg.fill ins(%v0 : !elem_type_c) outs(%C : !row_major_C)

%c0 = arith.constant 0: index
%c1 = arith.constant 1: index
Expand All @@ -71,7 +71,7 @@ func @main() {
/// Preheating run:
scf.for %arg0 = %c0 to %iters step %c1 {
%z = arith.constant 0.0 : !elem_type_c
linalg.fill(%z, %C) : !elem_type_c, !row_major_C
linalg.fill ins(%z : !elem_type_c) outs(%C : !row_major_C)
call @matmul(%A, %B, %C) : (!row_major_A, !row_major_B, !row_major_C) -> ()
}
%t_start_matmul = call @rtclock() : () -> f64
Expand All @@ -81,7 +81,7 @@ func @main() {
// Once linalg on tensors is ready, fusing fill at the register level will
// be easy.
%z = arith.constant 0.0 : !elem_type_c
linalg.fill(%z, %C) : !elem_type_c, !row_major_C
linalg.fill ins(%z : !elem_type_c) outs(%C : !row_major_C)
call @matmul(%A, %B, %C) : (!row_major_A, !row_major_B, !row_major_C) -> ()
}
%t_end_matmul = call @rtclock() : () -> f64
Expand All @@ -90,7 +90,7 @@ func @main() {

// CHECK: {{^0$}}
%C_ref = memref.alloc() : !row_major_C
linalg.fill(%v0, %C_ref) : !elem_type_c, !row_major_C
linalg.fill ins(%v0 : !elem_type_c) outs(%C_ref : !row_major_C)
linalg.matmul ins(%A, %B : !row_major_A, !row_major_B)
outs(%C_ref: !row_major_C)
%act = memref.cast %C : !row_major_C to memref<*xf32>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ func @matmul(%A: memref<?x?xf32>, %B: memref<?x?xf32>) -> (memref<?x?xf32>) {
%x = memref.dim %A, %c0 : memref<?x?xf32>
%y = memref.dim %B, %c1 : memref<?x?xf32>
%C = memref.alloc(%x, %y) : memref<?x?xf32>
linalg.fill(%f0, %C) : f32, memref<?x?xf32>
linalg.fill ins(%f0 : f32) outs(%C : memref<?x?xf32>)
linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
outs(%C: memref<?x?xf32>)
return %C : memref<?x?xf32>
Expand All @@ -26,7 +26,7 @@ func @matvec(%A: memref<?x?xf32>, %B: memref<?x?xf32>) -> (memref<?x?xf32>) {
%x = memref.dim %A, %c1 : memref<?x?xf32>
%n = memref.dim %B, %c1 : memref<?x?xf32>
%C = memref.alloc(%m, %n) : memref<?x?xf32>
linalg.fill(%f0, %C) : f32, memref<?x?xf32>
linalg.fill ins(%f0 : f32) outs(%C : memref<?x?xf32>)
scf.for %i = %c0 to %n step %c1 {
%b = memref.subview %B[0, %i][%x, 1][1, 1] : memref<?x?xf32> to memref<?xf32, offset: ?, strides: [?]>
%c = memref.subview %C[0, %i][%m, 1][1, 1] : memref<?x?xf32> to memref<?xf32, offset: ?, strides: [?]>
Expand All @@ -46,8 +46,8 @@ func @main() {
%val2 = arith.constant 17.0 : f32
%A = memref.alloc(%m, %x) : memref<?x?xf32>
%B = memref.alloc(%x, %n) : memref<?x?xf32>
linalg.fill(%val1, %A) : f32, memref<?x?xf32>
linalg.fill(%val2, %B) : f32, memref<?x?xf32>
linalg.fill ins(%val1 : f32) outs(%A : memref<?x?xf32>)
linalg.fill ins(%val2 : f32) outs(%B : memref<?x?xf32>)
memref.store %val1, %B[%c0, %c0] : memref<?x?xf32>
%C1 = call @matmul(%A, %B) : (memref<?x?xf32>, memref<?x?xf32>) -> memref<?x?xf32>
%C2 = call @matvec(%A, %B) : (memref<?x?xf32>, memref<?x?xf32>) -> memref<?x?xf32>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ func @init_and_dot(%arg0: tensor<64xf32>, %arg1: tensor<64xf32>, %arg2: tensor<f
%cst = arith.constant 0.000000e+00 : f32
%c2 = arith.constant 2 : index
%c0 = arith.constant 0 : index
%0 = linalg.fill(%cst, %arg2) : f32, tensor<f32> -> tensor<f32>
%0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<f32>) -> tensor<f32>
%1 = affine.apply #map0(%c0, %c64)[%c2]
%2 = linalg.init_tensor [%1, 2] : tensor<?x2xf32>
%3 = scf.for %arg3 = %c0 to %c64 step %c2 iter_args(%arg4 = %2) -> (tensor<?x2xf32>) {
Expand Down Expand Up @@ -83,9 +83,9 @@ func @main() {
%A = linalg.init_tensor [64] : tensor<64xf32>
%B = linalg.init_tensor [64] : tensor<64xf32>
%C = linalg.init_tensor [] : tensor<f32>
%AA = linalg.fill(%v1, %A) : f32, tensor<64xf32> -> tensor<64xf32>
%BB = linalg.fill(%v2, %B) : f32, tensor<64xf32> -> tensor<64xf32>
%CC = linalg.fill(%v0, %C) : f32, tensor<f32> -> tensor<f32>
%AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32>
%BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32>
%CC = linalg.fill ins(%v0 : f32) outs(%C : tensor<f32>) -> tensor<f32>

%res = call @init_and_dot(%AA, %BB, %CC) :
(tensor<64xf32>, tensor<64xf32>, tensor<f32>) -> tensor<f32>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ func private @print_memref_f32(memref<*xf32>)
// Creates and returns a 1-D buffer of size %s1 filled with the value %f
func @alloc_1d_filled_f32(%s1 : index, %f : f32) -> memref<?xf32> {
%buf = memref.alloc(%s1) : memref<?xf32>
linalg.fill(%f, %buf) : f32, memref<?xf32>
linalg.fill ins(%f : f32) outs(%buf : memref<?xf32>)
return %buf : memref<?xf32>
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ func private @print_memref_f32(memref<*xf32>)
// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref<?x?x?xf32> {
%buf = memref.alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
linalg.fill(%f, %buf) : f32, memref<?x?x?xf32>
linalg.fill ins(%f : f32) outs(%buf : memref<?x?x?xf32>)
return %buf : memref<?x?x?xf32>
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ func private @print_memref_f32(memref<*xf32>)
// Creates and returns a 2-D buffer of size (%s1, %s2) filled with the value %f
func @alloc_2d_filled_f32(%s1 : index, %s2 : index, %f : f32) -> memref<?x?xf32> {
%buf = memref.alloc(%s1, %s2) : memref<?x?xf32>
linalg.fill(%f, %buf) : f32, memref<?x?xf32>
linalg.fill ins(%f : f32) outs(%buf : memref<?x?xf32>)
return %buf : memref<?x?xf32>
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ func private @print_memref_f32(memref<*xf32>)
// Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f
func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref<?x?x?x?xf32> {
%buf = memref.alloc(%s1, %s2, %s3, %s4) : memref<?x?x?x?xf32>
linalg.fill(%f, %buf) : f32, memref<?x?x?x?xf32>
linalg.fill ins(%f : f32) outs(%buf : memref<?x?x?x?xf32>)
return %buf : memref<?x?x?x?xf32>
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ func private @print_memref_f32(memref<*xf32>)
// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref<?x?x?xf32> {
%buf = memref.alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
linalg.fill(%f, %buf) : f32, memref<?x?x?xf32>
linalg.fill ins(%f : f32) outs(%buf : memref<?x?x?xf32>)
return %buf : memref<?x?x?xf32>
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ func private @print_memref_f32(memref<*xf32>)
// Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f
func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref<?x?x?x?x?xf32> {
%buf = memref.alloc(%s1, %s2, %s3, %s4, %s5) : memref<?x?x?x?x?xf32>
linalg.fill(%f, %buf) : f32, memref<?x?x?x?x?xf32>
linalg.fill ins(%f : f32) outs(%buf : memref<?x?x?x?x?xf32>)
return %buf : memref<?x?x?x?x?xf32>
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ def emit_tensor_init(self) -> ir.RankedTensorType:
ir_type = _mlir_type_from_taco_type(self.dst_dtype)
tensor = linalg.InitTensorOp(self.dst_dims, ir_type).result
zero = arith.ConstantOp(ir_type, 0.0)
return linalg.FillOp(output=tensor, value=zero).results[0]
return linalg.fill(zero, outs=[tensor])

# Initialize the sparse tensor.
mlir_type = _mlir_tensor_type(self.dst_dtype, self.dst_dims,
Expand Down
2 changes: 1 addition & 1 deletion mlir/test/mlir-cpu-runner/async.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ func @main() {
%c4 = arith.constant 4.0 : f32

%A = memref.alloc() : memref<4xf32>
linalg.fill(%c0, %A) : f32, memref<4xf32>
linalg.fill ins(%c0 : f32) outs(%A : memref<4xf32>)

// CHECK: [0, 0, 0, 0]
%U = memref.cast %A : memref<4xf32> to memref<*xf32>
Expand Down
6 changes: 3 additions & 3 deletions mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ func @main() {

%cf1 = arith.constant 1.00000e+00 : f32

linalg.fill(%cf1, %A) : f32, memref<16x16xf32>
linalg.fill(%cf1, %B) : f32, memref<16x16xf32>
linalg.fill ins(%cf1 : f32) outs(%A : memref<16x16xf32>)
linalg.fill ins(%cf1 : f32) outs(%B : memref<16x16xf32>)

%reps = arith.constant 1 : index

%t_start = call @rtclock() : () -> f64
affine.for %arg0 = 0 to 5 {
linalg.fill(%cf1, %C) : f32, memref<16x16xf32>
linalg.fill ins(%cf1 : f32) outs(%C : memref<16x16xf32>)
call @sgemm_naive(%A, %B, %C) : (memref<16x16xf32>, memref<16x16xf32>, memref<16x16xf32>) -> ()
}
%t_end = call @rtclock() : () -> f64
Expand Down
10 changes: 5 additions & 5 deletions mlir/test/mlir-cpu-runner/unranked-memref.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,18 @@ func @main() -> () {
%f10 = arith.constant 10.00000e+00 : f32

%V = memref.cast %A : memref<10x3xf32, 0> to memref<?x?xf32>
linalg.fill(%f10, %V) : f32, memref<?x?xf32, 0>
linalg.fill ins(%f10 : f32) outs(%V : memref<?x?xf32, 0>)
%U = memref.cast %A : memref<10x3xf32, 0> to memref<*xf32>
call @print_memref_f32(%U) : (memref<*xf32>) -> ()

%V2 = memref.cast %U : memref<*xf32> to memref<?x?xf32>
linalg.fill(%f5, %V2) : f32, memref<?x?xf32, 0>
linalg.fill ins(%f5 : f32) outs(%V2 : memref<?x?xf32, 0>)
%U2 = memref.cast %V2 : memref<?x?xf32, 0> to memref<*xf32>
call @print_memref_f32(%U2) : (memref<*xf32>) -> ()

%V3 = memref.cast %V2 : memref<?x?xf32> to memref<*xf32>
%V4 = memref.cast %V3 : memref<*xf32> to memref<?x?xf32>
linalg.fill(%f2, %V4) : f32, memref<?x?xf32, 0>
linalg.fill ins(%f2 : f32) outs(%V4 : memref<?x?xf32, 0>)
%U3 = memref.cast %V2 : memref<?x?xf32> to memref<*xf32>
call @print_memref_f32(%U3) : (memref<*xf32>) -> ()

Expand All @@ -79,7 +79,7 @@ func private @print_memref_f32(memref<*xf32>) attributes { llvm.emit_c_interface
func @return_two_var_memref_caller() {
%0 = memref.alloca() : memref<4x3xf32>
%c0f32 = arith.constant 1.0 : f32
linalg.fill(%c0f32, %0) : f32, memref<4x3xf32>
linalg.fill ins(%c0f32 : f32) outs(%0 : memref<4x3xf32>)
%1:2 = call @return_two_var_memref(%0) : (memref<4x3xf32>) -> (memref<*xf32>, memref<*xf32>)
call @print_memref_f32(%1#0) : (memref<*xf32>) -> ()
call @print_memref_f32(%1#1) : (memref<*xf32>) -> ()
Expand All @@ -94,7 +94,7 @@ func @return_two_var_memref_caller() {
func @return_var_memref_caller() {
%0 = memref.alloca() : memref<4x3xf32>
%c0f32 = arith.constant 1.0 : f32
linalg.fill(%c0f32, %0) : f32, memref<4x3xf32>
linalg.fill ins(%c0f32 : f32) outs(%0 : memref<4x3xf32>)
%1 = call @return_var_memref(%0) : (memref<4x3xf32>) -> memref<*xf32>
call @print_memref_f32(%1) : (memref<*xf32>) -> ()
return
Expand Down
4 changes: 2 additions & 2 deletions mlir/test/mlir-cpu-runner/utils.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ func @print_1d() {
%f = arith.constant 2.00000e+00 : f32
%A = memref.alloc() : memref<16xf32>
%B = memref.cast %A: memref<16xf32> to memref<?xf32>
linalg.fill(%f, %B) : f32, memref<?xf32>
linalg.fill ins(%f : f32) outs(%B : memref<?xf32>)
%U = memref.cast %B : memref<?xf32> to memref<*xf32>
call @print_memref_f32(%U): (memref<*xf32>) -> ()
memref.dealloc %A : memref<16xf32>
Expand All @@ -33,7 +33,7 @@ func @print_3d() {
%f4 = arith.constant 4.00000e+00 : f32
%A = memref.alloc() : memref<3x4x5xf32>
%B = memref.cast %A: memref<3x4x5xf32> to memref<?x?x?xf32>
linalg.fill(%f, %B) : f32, memref<?x?x?xf32>
linalg.fill ins(%f : f32) outs(%B : memref<?x?x?xf32>)

%c2 = arith.constant 2 : index
memref.store %f4, %B[%c2, %c2, %c2]: memref<?x?x?xf32>
Expand Down
2 changes: 1 addition & 1 deletion mlir/test/mlir-opt/async.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ func @main() {
%c4 = arith.constant 4.0 : f32

%A = memref.alloc() : memref<4xf32>
linalg.fill(%c0, %A) : f32, memref<4xf32>
linalg.fill ins(%c0 : f32) outs(%A : memref<4xf32>)

%U = memref.cast %A : memref<4xf32> to memref<*xf32>
call @print_memref_f32(%U): (memref<*xf32>) -> ()
Expand Down
12 changes: 6 additions & 6 deletions mlir/test/python/dialects/linalg/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,22 +65,22 @@ def testFill():
# CHECK-LABEL: func @fill_tensor
# CHECK-SAME: %[[OUT:[0-9a-z]+]]: tensor<12x?xf32>
# CHECK-NEXT: %[[CST:.*]] = arith.constant 0.0{{.*}} : f32
# CHECK-NEXT: %[[RES:.*]] = linalg.fill(%[[CST]], %[[OUT]]) : f32, tensor<12x?xf32> -> tensor<12x?xf32>
# CHECK-NEXT: %[[RES:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[OUT]] : tensor<12x?xf32>) -> tensor<12x?xf32>
# CHECK-NEXT: return %[[RES]] : tensor<12x?xf32>
@builtin.FuncOp.from_py_func(RankedTensorType.get((12, -1), f32))
def fill_tensor(out):
zero = arith.ConstantOp(value=FloatAttr.get(f32, 0.), result=f32).result
return linalg.FillOp(output=out, value=zero).result
return linalg.fill(zero, outs=[out])

# CHECK-LABEL: func @fill_buffer
# CHECK-SAME: %[[OUT:[0-9a-z]+]]: memref<12x?xf32>
# CHECK-NEXT: %[[CST:.*]] = arith.constant 0.0{{.*}} : f32
# CHECK-NEXT: linalg.fill(%[[CST]], %[[OUT]]) : f32, memref<12x?xf32>
# CHECK-NEXT: linalg.fill ins(%[[CST]] : f32) outs(%[[OUT]] : memref<12x?xf32>)
# CHECK-NEXT: return
@builtin.FuncOp.from_py_func(MemRefType.get((12, -1), f32))
def fill_buffer(out):
zero = arith.ConstantOp(value=FloatAttr.get(f32, 0.), result=f32).result
linalg.FillOp(output=out, value=zero)
linalg.fill(zero, outs=[out])

print(module)

Expand Down Expand Up @@ -179,9 +179,9 @@ def testOpResultFromOtherOp():
def pass_an_op_directly(arg0, arg1):
one = arith.ConstantOp(F32Type.get(), 1.0)
# CHECK: %[[LHS:.*]] = linalg.fill
lhs = linalg.FillOp(arg0, one)
lhs = linalg.fill(one, outs=[arg0])
# CHECK: %[[RHS:.*]] = linalg.fill
rhs = linalg.FillOp(arg1, one)
rhs = linalg.fill(one, outs=[arg1])
# CHECK: %[[INIT:.*]] = linalg.init_tensor
init = linalg.InitTensorOp([4, 8], f32)
# CHECK: linalg.matmul
Expand Down
40 changes: 20 additions & 20 deletions mlir/test/python/integration/dialects/linalg/opsrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ def log(*args):
%rhs = memref.alloc() : memref<4x8xf32>
%O0 = memref.alloc() : memref<4x8xf32>
%O1 = memref.alloc() : memref<4x8xf32>
linalg.fill(%v1, %lhs) : f32, memref<f32>
linalg.fill(%v2, %rhs) : f32, memref<4x8xf32>
linalg.fill(%v0, %O0) : f32, memref<4x8xf32>
linalg.fill(%v0, %O1) : f32, memref<4x8xf32>
linalg.fill ins(%v1 : f32) outs(%lhs : memref<f32>)
linalg.fill ins(%v2 : f32) outs(%rhs : memref<4x8xf32>)
linalg.fill ins(%v0 : f32) outs(%O0 : memref<4x8xf32>)
linalg.fill ins(%v0 : f32) outs(%O1 : memref<4x8xf32>)
call @elemwise_exp_add_on_buffers(%lhs, %rhs, %O0) :
(memref<f32>, memref<4x8xf32>, memref<4x8xf32>) -> ()
Expand Down Expand Up @@ -60,10 +60,10 @@ def log(*args):
%B = memref.alloc() : memref<16x8xf32>
%C0 = memref.alloc() : memref<4x8xf32>
%C1 = memref.alloc() : memref<4x8xf32>
linalg.fill(%v1, %A) : i8, memref<4x16xi8>
linalg.fill(%v2, %B) : f32, memref<16x8xf32>
linalg.fill(%v0, %C0) : f32, memref<4x8xf32>
linalg.fill(%v0, %C1) : f32, memref<4x8xf32>
linalg.fill ins(%v1 : i8) outs(%A : memref<4x16xi8>)
linalg.fill ins(%v2 : f32) outs(%B : memref<16x8xf32>)
linalg.fill ins(%v0 : f32) outs(%C0 : memref<4x8xf32>)
linalg.fill ins(%v0 : f32) outs(%C1 : memref<4x8xf32>)
call @matmul_signed_on_buffers(%A, %B, %C0) :
(memref<4x16xi8>, memref<16x8xf32>, memref<4x8xf32>) -> ()
Expand Down Expand Up @@ -137,9 +137,9 @@ def log(*args):
%input = memref.alloc() : memref<1x4x16x1xf64>
%filter = memref.alloc() : memref<2x2x1xf64>
%output = memref.alloc() : memref<1x2x4x1xi32>
linalg.fill(%v1, %input) : f64, memref<1x4x16x1xf64>
linalg.fill(%v2, %filter) : f64, memref<2x2x1xf64>
linalg.fill(%v0, %output) : i32, memref<1x2x4x1xi32>
linalg.fill ins(%v1 : f64) outs(%input : memref<1x4x16x1xf64>)
linalg.fill ins(%v2 : f64) outs(%filter : memref<2x2x1xf64>)
linalg.fill ins(%v0 : i32) outs(%output : memref<1x2x4x1xi32>)
call @conv_on_buffers(%input, %filter, %output) :
(memref<1x4x16x1xf64>, memref<2x2x1xf64>, memref<1x2x4x1xi32>) -> ()
Expand All @@ -163,9 +163,9 @@ def log(*args):
%input = memref.alloc() : memref<1x4x16x1xf64>
%shape = memref.alloc() : memref<2x2xf64>
%output = memref.alloc() : memref<1x2x4x1xi32>
linalg.fill(%v1, %input) : f64, memref<1x4x16x1xf64>
linalg.fill(%v1, %shape) : f64, memref<2x2xf64>
linalg.fill(%v0, %output) : i32, memref<1x2x4x1xi32>
linalg.fill ins(%v1 : f64) outs(%input : memref<1x4x16x1xf64>)
linalg.fill ins(%v1 : f64) outs(%shape : memref<2x2xf64>)
linalg.fill ins(%v0 : i32) outs(%output : memref<1x2x4x1xi32>)
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
Expand Down Expand Up @@ -368,15 +368,15 @@ def test_fill_builtin():

@builtin.FuncOp.from_py_func(f32, MemRefType.get([], i32))
def fill_0d_on_buffers(value, out):
linalg.fill_tensor(value, outs=[out])
linalg.fill(value, outs=[out])

@builtin.FuncOp.from_py_func(f32, MemRefType.get([16], i32))
def fill_1d_on_buffers(value, out):
linalg.fill_tensor(value, outs=[out])
linalg.fill(value, outs=[out])

@builtin.FuncOp.from_py_func(f32, MemRefType.get([4, 16], i32))
def fill_2d_on_buffers(value, out):
linalg.fill_tensor(value, outs=[out])
linalg.fill(value, outs=[out])

execution_engine = ExecutionEngine(transform(module, fill_boiler))

Expand All @@ -403,15 +403,15 @@ def test_fill_generic():

@builtin.FuncOp.from_py_func(f32, MemRefType.get([], i32))
def fill_0d_on_buffers(value, out):
linalg.fill_tensor(value, outs=[out], emit_generic=True)
linalg.fill(value, outs=[out], emit_generic=True)

@builtin.FuncOp.from_py_func(f32, MemRefType.get([16], i32))
def fill_1d_on_buffers(value, out):
linalg.fill_tensor(value, outs=[out], emit_generic=True)
linalg.fill(value, outs=[out], emit_generic=True)

@builtin.FuncOp.from_py_func(f32, MemRefType.get([4, 16], i32))
def fill_2d_on_buffers(value, out):
linalg.fill_tensor(value, outs=[out], emit_generic=True)
linalg.fill(value, outs=[out], emit_generic=True)

execution_engine = ExecutionEngine(transform(module, fill_boiler))

Expand Down