Skip to content

Commit

Permalink
Merge pull request #20 from jerryyin/miopen-dialect
Browse files Browse the repository at this point in the history
Adding tuning parameters unit tests
  • Loading branch information
whchung committed Jun 17, 2020
2 parents dbe2a97 + 1fd9e9b commit 45e5146
Show file tree
Hide file tree
Showing 3 changed files with 204 additions and 0 deletions.
Expand Up @@ -981,6 +981,7 @@ std::unique_ptr<llvm::StringRef> mlir::translateModuleToMIOpenCFlags(ModuleOp m)
llvm::raw_string_ostream output(resultStr);

for (auto f : m.getOps<FuncOp>()) {
output << f.getName() << "\n";
miopen::ConvOpType opType;
ObtainConvDirection(f, opType);

Expand Down Expand Up @@ -1104,6 +1105,7 @@ std::unique_ptr<llvm::StringRef> mlir::translateModuleToMIOpenCFlags(ModuleOp m)

output << " -std=c++14";
output << " -D__HIP_PLATFORM_HCC__=1";
output << "\n";
});
}

Expand Down
100 changes: 100 additions & 0 deletions mlir/test/Dialect/MIOpen/translate_cflags_bwd.mlir
@@ -0,0 +1,100 @@
// RUN: mlir-translate -mlir-to-miopen-cflags %s | FileCheck %s

func @basic_parsing(%filter : memref<?x?xf32>, %input : memref<?x?xf32>, %output : memref<?x?xf32>) {
miopen.gridwise_gemm(%filter, %input, %output) {
kernel_algorithm = "backward_data_v1r1",
filter_dimension = [128, 8, 4, 4],
filter_layout = ["k", "c", "y", "x"],
input_dimension = [128, 8, 32, 32],
input_layout = ["ni", "ci", "hi", "wi"],
output_dimension = [128, 128, 32, 32],
output_layout = ["no", "ko", "ho", "wo"],
dilations = [1, 1],
padding = [[1, 1], [1, 1]],
strides = [1, 1]
} : memref<?x?xf32>,
memref<?x?xf32>,
memref<?x?xf32>
return
}
// CHECK-LABEL: basic_parsing
// CHECK: -DCK_PARAM_PROBLEM_K=128
// CHECK: -DCK_PARAM_PROBLEM_C=8
// CHECK: -DCK_PARAM_PROBLEM_Y=4
// CHECK: -DCK_PARAM_PROBLEM_X=4
// CHECK: -DCK_PARAM_PROBLEM_N=128
// CHECK: -DCK_PARAM_PROBLEM_HI=32
// CHECK: -DCK_PARAM_PROBLEM_WI=32
// CHECK: -DCK_PARAM_PROBLEM_HO=32
// CHECK: -DCK_PARAM_PROBLEM_WO=32
// CHECK: -DCK_PARAM_PROBLEM_CONV_STRIDE_H=1
// CHECK: -DCK_PARAM_PROBLEM_CONV_STRIDE_W=1
// CHECK: -DCK_PARAM_PROBLEM_CONV_DILATION_H=1
// CHECK: -DCK_PARAM_PROBLEM_CONV_DILATION_W=1
// CHECK: -DCK_PARAM_PROBLEM_IN_LEFT_PAD_H=1
// CHECK: -DCK_PARAM_PROBLEM_IN_LEFT_PAD_W=1
// CHECK: -DCK_PARAM_PROBLEM_IN_RIGHT_PAD_H=1
// CHECK: -DCK_PARAM_PROBLEM_IN_RIGHT_PAD_W=1

func @all_params(%filter : memref<?x?xf32>, %input : memref<?x?xf32>, %output : memref<?x?xf32>) {
miopen.gridwise_gemm(%filter, %input, %output) {
kernel_algorithm = "backward_data_v1r1",
filter_dimension = [128, 128, 3, 3],
filter_layout = ["k", "c", "y", "x"],
input_dimension = [32, 128, 32, 32],
input_layout = ["ni", "ci", "hi", "wi"],
output_dimension = [32, 128, 32, 32],
output_layout = ["no", "ko", "ho", "wo"],
dilations = [1, 1],
padding = [[1, 1], [1, 1]],
strides = [1, 1]
} : memref<?x?xf32>,
memref<?x?xf32>,
memref<?x?xf32>
return
}
// CHECK-LABEL: all_params
// CHECK: -DCK_PARAM_PROBLEM_K=128
// CHECK: -DCK_PARAM_PROBLEM_C=128
// CHECK: -DCK_PARAM_PROBLEM_Y=3
// CHECK: -DCK_PARAM_PROBLEM_X=3
// CHECK: -DCK_PARAM_PROBLEM_N=32
// CHECK: -DCK_PARAM_PROBLEM_HI=32
// CHECK: -DCK_PARAM_PROBLEM_WI=32
// CHECK: -DCK_PARAM_PROBLEM_HO=32
// CHECK: -DCK_PARAM_PROBLEM_WO=32
// CHECK: -DCK_PARAM_PROBLEM_CONV_STRIDE_H=1
// CHECK: -DCK_PARAM_PROBLEM_CONV_STRIDE_W=1
// CHECK: -DCK_PARAM_PROBLEM_CONV_DILATION_H=1
// CHECK: -DCK_PARAM_PROBLEM_CONV_DILATION_W=1
// CHECK: -DCK_PARAM_PROBLEM_IN_LEFT_PAD_H=1
// CHECK: -DCK_PARAM_PROBLEM_IN_LEFT_PAD_W=1
// CHECK: -DCK_PARAM_PROBLEM_IN_RIGHT_PAD_H=1
// CHECK: -DCK_PARAM_PROBLEM_IN_RIGHT_PAD_W=1
// CHECK: -DMIOPEN_USE_FP32=1
// CHECK: -DMIOPEN_USE_FP16=0
// CHECK: -DMIOPEN_USE_BFP16=0
// CHECK: -DCK_PARAM_DEPENDENT_GRID_SIZE=2304
// CHECK: -DCK_PARAM_TUNABLE_BLOCK_SIZE=256
// CHECK: -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_K=8
// CHECK: -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_M=32
// CHECK: -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_DST_DATA_PER_WRITE_GEMM_M=4
// CHECK: -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_SRC_DATA_PER_READ_GEMM=4
// CHECK: -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_K=8
// CHECK: -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_N=32
// CHECK: -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_DST_DATA_PER_WRITE_GEMM_N=4
// CHECK: -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_SRC_DATA_PER_READ_GEMM=4
// CHECK: -DCK_PARAM_TUNABLE_GEMM_C_THREAD_COPY_DST_DATA_PER_WRITE_GEMM_N1=1
// CHECK: -DCK_PARAM_TUNABLE_GEMM_K_PER_BLOCK=8
// CHECK: -DCK_PARAM_TUNABLE_GEMM_M_LEVEL0_CLUSTER=4
// CHECK: -DCK_PARAM_TUNABLE_GEMM_M_LEVEL1_CLUSTER=4
// CHECK: -DCK_PARAM_TUNABLE_GEMM_M_PER_BLOCK=128
// CHECK: -DCK_PARAM_TUNABLE_GEMM_M_PER_THREAD=4
// CHECK: -DCK_PARAM_TUNABLE_GEMM_N_LEVEL0_CLUSTER=4
// CHECK: -DCK_PARAM_TUNABLE_GEMM_N_LEVEL1_CLUSTER=4
// CHECK: -DCK_PARAM_TUNABLE_GEMM_N_PER_BLOCK=128
// CHECK: -DCK_PARAM_TUNABLE_GEMM_N_PER_THREAD=4
// CHECK: -DCK_THREADWISE_GEMM_USE_AMD_INLINE_ASM=1
// CHECK: -DCK_USE_AMD_BUFFER_ATOMIC_ADD=0
// CHECK: -std=c++14
// CHECK: -D__HIP_PLATFORM_HCC__=1
102 changes: 102 additions & 0 deletions mlir/test/Dialect/MIOpen/translate_cflags_fw.mlir
@@ -0,0 +1,102 @@
// RUN: mlir-translate -mlir-to-miopen-cflags %s | FileCheck %s

func @basic_parsing(%filter : memref<?x?xf32>, %input : memref<?x?xf32>, %output : memref<?x?xf32>) {
miopen.gridwise_gemm(%filter, %input, %output) {
kernel_algorithm = "v4r4",
filter_dimension = [1024, 1024, 1, 1],
filter_layout = ["k", "c", "y", "x"],
input_dimension = [64, 1024, 14, 14],
input_layout = ["ni", "ci", "hi", "wi"],
output_dimension = [64, 1024, 14, 14],
output_layout = ["no", "ko", "ho", "wo"],
dilations = [1, 1],
padding = [[0, 0], [0, 0]],
strides = [1, 1]
} : memref<?x?xf32>,
memref<?x?xf32>,
memref<?x?xf32>
return
}
// CHECK-LABEL: basic_parsing
// CHECK-LABEL: -DCK_PARAM_PROBLEM_K=1024
// CHECK-LABEL: -DCK_PARAM_PROBLEM_C=1024
// CHECK-LABEL: -DCK_PARAM_PROBLEM_Y=1
// CHECK-LABEL: -DCK_PARAM_PROBLEM_X=1
// CHECK-LABEL: -DCK_PARAM_PROBLEM_N=64
// CHECK-LABEL: -DCK_PARAM_PROBLEM_HI=14
// CHECK-LABEL: -DCK_PARAM_PROBLEM_WI=14
// CHECK-LABEL: -DCK_PARAM_PROBLEM_HO=14
// CHECK-LABEL: -DCK_PARAM_PROBLEM_WO=14
// CHECK-LABEL: -DCK_PARAM_PROBLEM_CONV_STRIDE_H=1
// CHECK-LABEL: -DCK_PARAM_PROBLEM_CONV_STRIDE_W=1
// CHECK-LABEL: -DCK_PARAM_PROBLEM_CONV_DILATION_H=1
// CHECK-LABEL: -DCK_PARAM_PROBLEM_CONV_DILATION_W=1
// CHECK-LABEL: -DCK_PARAM_PROBLEM_IN_LEFT_PAD_H=0
// CHECK-LABEL: -DCK_PARAM_PROBLEM_IN_LEFT_PAD_W=0
// CHECK-LABEL: -DCK_PARAM_PROBLEM_IN_RIGHT_PAD_H=0
// CHECK-LABEL: -DCK_PARAM_PROBLEM_IN_RIGHT_PAD_W=0

func @all_params(%filter : memref<?x?xf32>, %input : memref<?x?xf32>, %output : memref<?x?xf32>) {
miopen.gridwise_gemm(%filter, %input, %output) {
kernel_algorithm = "v4r4",
filter_dimension = [128, 8, 3, 3],
filter_layout = ["k", "c", "y", "x"],
input_dimension = [128, 8, 32, 32],
input_layout = ["ni", "ci", "hi", "wi"],
output_dimension = [128, 128, 30, 30],
output_layout = ["no", "ko", "ho", "wo"],
dilations = [1, 1],
padding = [[0, 0], [0, 0]],
strides = [1, 1]
} : memref<?x?xf32>,
memref<?x?xf32>,
memref<?x?xf32>
return
}
// CHECK-LABEL: all_params
// CHECK: -DCK_PARAM_PROBLEM_K=128
// CHECK: -DCK_PARAM_PROBLEM_C=8
// CHECK: -DCK_PARAM_PROBLEM_Y=3
// CHECK: -DCK_PARAM_PROBLEM_X=3
// CHECK: -DCK_PARAM_PROBLEM_N=128
// CHECK: -DCK_PARAM_PROBLEM_HI=32
// CHECK: -DCK_PARAM_PROBLEM_WI=32
// CHECK: -DCK_PARAM_PROBLEM_HO=30
// CHECK: -DCK_PARAM_PROBLEM_WO=30
// CHECK: -DCK_PARAM_PROBLEM_CONV_STRIDE_H=1
// CHECK: -DCK_PARAM_PROBLEM_CONV_STRIDE_W=1
// CHECK: -DCK_PARAM_PROBLEM_CONV_DILATION_H=1
// CHECK: -DCK_PARAM_PROBLEM_CONV_DILATION_W=1
// CHECK: -DCK_PARAM_PROBLEM_IN_LEFT_PAD_H=0
// CHECK: -DCK_PARAM_PROBLEM_IN_LEFT_PAD_W=0
// CHECK: -DCK_PARAM_PROBLEM_IN_RIGHT_PAD_H=0
// CHECK: -DCK_PARAM_PROBLEM_IN_RIGHT_PAD_W=0
// CHECK: -DMIOPEN_USE_FP32=1
// CHECK: -DMIOPEN_USE_FP16=0
// CHECK: -DMIOPEN_USE_BFP16=0
// CHECK: -DCK_PARAM_PROBLEM_CONV_DIRECTION_FORWARD=1
// CHECK: -DCK_PARAM_PROBLEM_CONV_DIRECTION_BACKWARD_DATA=0
// CHECK: -DCK_PARAM_PROBLEM_CONV_DIRECTION_BACKWARD_WEIGHT=0
// CHECK: -DCK_PARAM_DEPENDENT_GRID_SIZE=900
// CHECK: -DCK_PARAM_TUNABLE_BLOCK_SIZE=256
// CHECK: -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_K=2
// CHECK: -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_M=128
// CHECK: -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_DST_DATA_PER_WRITE_GEMM_M=1
// CHECK: -DCK_PARAM_TUNABLE_GEMM_A_BLOCK_COPY_SRC_DATA_PER_READ_GEMM=4
// CHECK: -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_K=2
// CHECK: -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_CLUSTER_LENGTHS_GEMM_N=128
// CHECK: -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_DST_DATA_PER_WRITE_GEMM_N=1
// CHECK: -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_SRC_DATA_PER_READ_GEMM=1
// CHECK: -DCK_PARAM_TUNABLE_GEMM_C_THREAD_COPY_DST_DATA_PER_WRITE_GEMM_N1=4
// CHECK: -DCK_PARAM_TUNABLE_GEMM_K_PER_BLOCK=8
// CHECK: -DCK_PARAM_TUNABLE_GEMM_M_LEVEL0_CLUSTER=4
// CHECK: -DCK_PARAM_TUNABLE_GEMM_M_LEVEL1_CLUSTER=4
// CHECK: -DCK_PARAM_TUNABLE_GEMM_M_PER_BLOCK=128
// CHECK: -DCK_PARAM_TUNABLE_GEMM_M_PER_THREAD=4
// CHECK: -DCK_PARAM_TUNABLE_GEMM_N_LEVEL0_CLUSTER=4
// CHECK: -DCK_PARAM_TUNABLE_GEMM_N_LEVEL1_CLUSTER=4
// CHECK: -DCK_PARAM_TUNABLE_GEMM_N_PER_BLOCK=128
// CHECK: -DCK_PARAM_TUNABLE_GEMM_N_PER_THREAD=4
// CHECK: -DCK_THREADWISE_GEMM_USE_AMD_INLINE_ASM=1
// CHECK: -std=c++14
// CHECK: -D__HIP_PLATFORM_HCC__=1

0 comments on commit 45e5146

Please sign in to comment.