@@ -1,12 +1,14 @@
// RUN: mlir-opt %s -linalg-comprehensive-module- bufferize=allow-return-allocs -split-input-file | FileCheck %s
// RUN: mlir-opt %s -one-shot- bufferize=" allow-return-allocs bufferize-function-boundaries" -split-input-file | FileCheck %s
// Run fuzzer with different seeds.
// RUN: mlir-opt %s -linalg-comprehensive-module- bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -linalg-comprehensive-module- bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -linalg-comprehensive-module- bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot- bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries " -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot- bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=59 bufferize-function-boundaries " -split-input-file -o /dev/null
// RUN: mlir-opt %s -one-shot- bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries " -split-input-file -o /dev/null
// Test bufferization using memref types that have no layout map.
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-allocs fully-dynamic-layout-maps=0" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP
// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs fully-dynamic-layout-maps=0 bufferize-function-boundaries" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP
// TODO: Some test cases from this file should be moved to other dialects.
// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
Expand All
@@ -32,19 +34,6 @@ func.func @fill_inplace(
// -----
// CHECK-LABEL: func @tensor_extract(%{{.*}}: memref<?xf32, #{{.*}}>) -> f32 {
func.func @tensor_extract (%A : tensor <?xf32 > {bufferization.writable = false }) -> (f32 ) {
%c0 = arith.constant 0 : index
// CHECK: %[[RES:.*]] = memref.load {{.*}} : memref<?xf32, #{{.*}}>
%0 = tensor.extract %A [%c0 ] : tensor <?xf32 >
// CHECK: return %[[RES]] : f32
return %0 : f32
}
// -----
// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
/// No bufferization.writable flag, must allocate.
Expand Down
Expand Up
@@ -160,138 +149,6 @@ func.func @vec_not_inplace(
// -----
// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
// CHECK-LABEL: func @insert_slice_fun
// CHECK-SAME: %[[A0:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>,
// CHECK-SAME: %[[A1:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>,
// CHECK-SAME: %[[t0:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>,
// CHECK-SAME: %[[t1:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
func.func @insert_slice_fun (
%A0 : tensor <?xf32 > {bufferization.writable = false },
%A1 : tensor <?xf32 > {bufferization.writable = true },
%t0 : tensor <4 xf32 > {bufferization.writable = false },
%t1 : tensor <4 xf32 > {bufferization.writable = true })
-> (tensor <?xf32 >, tensor <?xf32 >, tensor <?xf32 >, tensor <?xf32 >)
{
// Hoisted allocs.
// CHECK: %[[REALLOC1:.*]] = memref.alloc
// CHECK: %[[REALLOC2:.*]] = memref.alloc
// CHECK: %[[REALLOC3:.*]] = memref.alloc
// Alloc and copy the whole result tensor. Copy the tensor.extract_slice.
// CHECK: memref.copy %[[A0]], %[[REALLOC3]]
// CHECK: %[[SV_A0:.*]] = memref.subview %[[REALLOC3]]
// CHECK: memref.copy %[[t0]], %[[SV_A0]]
%r0 = tensor.insert_slice %t0 into %A0 [0 ][4 ][1 ] : tensor <4 xf32 > into tensor <?xf32 >
// Alloc and copy the whole result tensor. Copy the tensor.extract_slice.
// CHECK: memref.copy %[[A0]]
// CHECK: %[[SV_A0_2:.*]] = memref.subview %[[REALLOC2]]
// CHECK: memref.copy %[[t1]], %[[SV_A0_2]]
%r1 = tensor.insert_slice %t1 into %A0 [0 ][4 ][1 ] : tensor <4 xf32 > into tensor <?xf32 >
// Still alloc the large tensor because %A1 is read after. Copy the tensor.extract_slice.
// CHECK: memref.copy %[[A1]]
// CHECK: %[[SV_A1:.*]] = memref.subview %[[REALLOC1]]
// CHECK: memref.copy %[[t0]], %[[SV_A1]]
%r2 = tensor.insert_slice %t0 into %A1 [0 ][4 ][1 ] : tensor <4 xf32 > into tensor <?xf32 >
// Do not realloc the large tensor. Copy the tensor.extract_slice.
// CHECK-NOT: alloc
// CHECK: %[[SV_A1_2:.*]] = memref.subview %[[A1]]
// CHECK: memref.copy %[[t1]], %[[SV_A1_2]]
%r3 = tensor.insert_slice %t1 into %A1 [0 ][4 ][1 ] : tensor <4 xf32 > into tensor <?xf32 >
// CHECK: return %[[REALLOC3]], %[[REALLOC2]], %[[REALLOC1]] :
// CHECK-SAME: memref<?xf32>, memref<?xf32>, memref<?xf32>
return %r0 , %r1 , %r2 , %r3: tensor <?xf32 >, tensor <?xf32 >, tensor <?xf32 >, tensor <?xf32 >
}
// -----
// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
// CHECK-LABEL: func @insert_slice_fun
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
func.func @insert_slice_fun (
%A : tensor <?xf32 > {bufferization.writable = true },
%t : tensor <4 xf32 > {bufferization.writable = false })
-> tensor <?xf32 >
{
%f0 = arith.constant 0.0 : f32
// CHECK-NOT: alloc
// CHECK: %[[SV_A:.*]] = memref.subview %[[A]]
// CHECK: memref.copy %[[t]], %[[SV_A]]
%r0 = tensor.insert_slice %t into %A [0 ][4 ][1 ] : tensor <4 xf32 > into tensor <?xf32 >
/// Overwrite A inplace.
// CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[A]]
%r1 = linalg.fill ins (%f0 : f32 ) outs (%r0 : tensor <?xf32 >) -> tensor <?xf32 >
// CHECK: return
// CHECK-NOT: tensor
return %r1: tensor <?xf32 >
}
// -----
// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
// CHECK-LABEL: func @insert_slice_fun
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
func.func @insert_slice_fun (
%A : tensor <?xf32 > {bufferization.writable = true },
%t : tensor <4 xf32 > {bufferization.writable = false })
-> tensor <?xf32 >
{
%f0 = arith.constant 0.0 : f32
// CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[A]]
%r0 = linalg.fill ins (%f0 : f32 ) outs (%A : tensor <?xf32 >) -> tensor <?xf32 >
// CHECK-NOT: alloc
// CHECK: %[[SV_A:.*]] = memref.subview %[[A]]
/// Overwrite A inplace by copying into the subview.
// CHECK: memref.copy %[[t]], %[[SV_A]]
%r1 = tensor.insert_slice %t into %r0 [0 ][4 ][1 ] : tensor <4 xf32 > into tensor <?xf32 >
// CHECK: return
// CHECK-NOT: tensor
return %r1: tensor <?xf32 >
}
// -----
// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
// CHECK-LABEL: func @insert_slice_fun_not_inplace
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
func.func @insert_slice_fun_not_inplace (
%A : tensor <?xf32 > {bufferization.writable = false },
%t : tensor <4 xf32 > {bufferization.writable = false })
-> tensor <?xf32 >
{
// CHECK: %[[ALLOC:.*]] = memref.alloc(%{{.*}}) {alignment = 128 : i64} : memref<?xf32>
// CHECK: memref.copy %[[A]], %[[ALLOC]] : memref<?xf32{{.*}} to memref<?xf32>
// CHECK: %[[SV:.*]] = memref.subview %[[ALLOC]][0] [4] [1] : memref<?xf32> to memref<4xf32>
// CHECK: memref.copy %[[t]], %[[SV]] : memref<4xf32, #map> to memref<4xf32>
%r0 = tensor.insert_slice %t into %A [0 ][4 ][1 ] : tensor <4 xf32 > into tensor <?xf32 >
// CHECK: return %{{.*}} : memref<?xf32>
return %r0: tensor <?xf32 >
}
// -----
//===----------------------------------------------------------------------===//
// Cross function boundary cases.
//===----------------------------------------------------------------------===//
// CHECK: func @matmul(
// CHECK-SAME: %[[A:[0-9a-zA-Z]*]]: memref<128x256xf32>
// CHECK-SAME: %[[B:[0-9a-zA-Z]*]]: memref<256x192xf32>
Expand Down
Expand Up
@@ -364,29 +221,7 @@ func.func @matmul(
// -----
// CHECK-LABEL: func @tensor_cast_not_in_place(
// CHECK-SAME: %[[A:.*]]: memref<?xf32{{.*}}>, %[[B:.*]]: memref<?xf32{{.*}}>
// CHECK: %[[alloc:.*]] = memref.alloc
// CHECK: memref.copy %[[A]], %[[alloc]]
// CHECK: %[[subview:.*]] = memref.subview %[[A]][{{.*}}] [4] [1] : {{.*}} to memref<4xf32
// CHECK: memref.copy %[[alloc]], %[[subview]]
func.func @tensor_cast_not_in_place (
%A : tensor <?xf32 > {bufferization.writable = true },
%B : tensor <?xf32 > {bufferization.writable = false }, %idx: index )
-> (tensor <?xf32 >)
{
%r0 = tensor.cast %A : tensor <?xf32 > to tensor <4 xf32 >
%r1 = tensor.insert_slice %r0 into %A [%idx ][4 ][1 ] : tensor <4 xf32 > into tensor <?xf32 >
return %r1 : tensor <?xf32 >
}
// -----
//===----------------------------------------------------------------------===//
// Insertion point cases.
//===----------------------------------------------------------------------===//
/// These tests just check the produced IR is valid and does not have dominance
/// This test just checks the produced IR is valid and does not have dominance
/// errors in the def-use chains.
// CHECK-LABEL: func @dominance_violation_bug_1
Expand All
@@ -406,19 +241,6 @@ func.func @dominance_violation_bug_1(
return %rA : tensor <?x?xf32 >
}
// -----
// CHECK-LABEL: func @insert_op
// CHECK-SAME: %[[t1:.*]]: memref<?xf32, {{.*}}>, %[[s:.*]]: f32, %[[i:.*]]: index
func.func @insert_op (%t1 : tensor <?xf32 > {bufferization.writable = true },
%s : f32 , %i : index ) -> tensor <?xf32 > {
// CHECK: memref.store %[[s]], %[[t1]][%[[i]]]
%0 = tensor.insert %s into %t1 [%i ] : tensor <?xf32 >
// CHECK: return
return %0 : tensor <?xf32 >
}
// -----
func.func @gather_like (
Expand Down
Expand Up
@@ -537,85 +359,3 @@ func.func @depthwise_conv_1d_nwc_wc(%arg0: index, %arg1: index, %arg2: tensor<8x
return %3 : tensor <?x1 x6 x8 xf32 >
}
// -----
// CHECK-LABEL: func @write_to_select_op_source
// CHECK-SAME: %[[t1:.*]]: memref<?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>
func.func @write_to_select_op_source (
%t1 : tensor <?xf32 > {bufferization.writable = true },
%t2 : tensor <?xf32 > {bufferization.writable = true },
%c : i1 )
-> (tensor <?xf32 >, tensor <?xf32 >)
{
%cst = arith.constant 0.0 : f32
%idx = arith.constant 0 : index
// CHECK: %[[alloc:.*]] = memref.alloc
// CHECK: memref.copy %[[t1]], %[[alloc]]
// CHECK: memref.store %{{.*}}, %[[alloc]]
%w = tensor.insert %cst into %t1 [%idx ] : tensor <?xf32 >
// CHECK: %[[select:.*]] = arith.select %{{.*}}, %[[t1]], %[[t2]]
%s = arith.select %c , %t1 , %t2 : tensor <?xf32 >
// CHECK: return %[[select]], %[[alloc]]
return %s , %w : tensor <?xf32 >, tensor <?xf32 >
}
// -----
// CHECK-LABEL: func @write_after_select_read_one
// CHECK-SAME: %[[t1:.*]]: memref<?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>
func.func @write_after_select_read_one (
%t1 : tensor <?xf32 > {bufferization.writable = true },
%t2 : tensor <?xf32 > {bufferization.writable = true },
%c : i1 )
-> (f32 , tensor <?xf32 >)
{
%cst = arith.constant 0.0 : f32
%idx = arith.constant 0 : index
// CHECK: %[[alloc:.*]] = memref.alloc
// CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
// CHECK-DAG: memref.copy %[[t1]], %[[alloc]]
// CHECK: %[[select:.*]] = arith.select %{{.*}}, %[[casted]], %[[t2]]
%s = arith.select %c , %t1 , %t2 : tensor <?xf32 >
// CHECK: memref.store %{{.*}}, %[[select]]
%w = tensor.insert %cst into %s [%idx ] : tensor <?xf32 >
// CHECK: %[[f:.*]] = memref.load %[[t1]]
%f = tensor.extract %t1 [%idx ] : tensor <?xf32 >
// CHECK: return %[[f]], %[[select]]
return %f , %w : f32 , tensor <?xf32 >
}
// -----
// A regression test to make sure that we handle rank-reducing extract_slice
// correctly.
// CHECK-LABEL: func @rank_reducing
func.func @rank_reducing (
%i: index , %j: index ,
%arg0: tensor <8 x18 x32 xf32 >)
-> tensor <?x1 x6 x8 xf32 > {
%c1 = arith.constant 1 : index
%c6 = arith.constant 6 : index
%c8 = arith.constant 8 : index
%c32 = arith.constant 32 : index
%c0 = arith.constant 0 : index
%0 = linalg.init_tensor [4 , 1 , 6 , 8 ] : tensor <4 x1 x6 x8 xf32 >
%1 = tensor.cast %0 : tensor <4 x1 x6 x8 xf32 > to tensor <?x1 x6 x8 xf32 >
%2 = linalg.init_tensor [1 , 6 , 8 ] : tensor <1 x6 x8 xf32 >
%5 = scf.for %arg7 = %c0 to %c32 step %c8 iter_args (%arg8 = %1 ) -> (tensor <?x1 x6 x8 xf32 >) {
%7 = affine.apply affine_map <(d0 ) -> (d0 ceildiv 8 )>(%arg7 )
%8 = tensor.extract_slice %arg0 [%i , %j , %arg7 ] [1 , 6 , 8 ] [1 , 1 , 1 ] : tensor <8 x18 x32 xf32 > to tensor <1 x6 x8 xf32 >
%9 = scf.for %arg9 = %c0 to %c6 step %c1 iter_args (%arg10 = %2 ) -> (tensor <1 x6 x8 xf32 >) {
%11 = tensor.extract_slice %8 [0 , %arg9 , 0 ] [1 , 1 , 8 ] [1 , 1 , 1 ] : tensor <1 x6 x8 xf32 > to tensor <1 x1 x8 xf32 >
%12 = tensor.insert_slice %11 into %arg10 [0 , %arg9 , 0 ] [1 , 1 , 8 ] [1 , 1 , 1 ] : tensor <1 x1 x8 xf32 > into tensor <1 x6 x8 xf32 >
scf.yield %12 : tensor <1 x6 x8 xf32 >
}
%10 = tensor.insert_slice %9 into %arg8 [%7 , 0 , 0 , 0 ] [1 , 1 , 6 , 8 ] [1 , 1 , 1 , 1 ] : tensor <1 x6 x8 xf32 > into tensor <?x1 x6 x8 xf32 >
scf.yield %10 : tensor <?x1 x6 x8 xf32 >
}
return %5: tensor <?x1 x6 x8 xf32 >
}