From 18bf4018cd13ca1d669f197e15280ca507c3f5ff Mon Sep 17 00:00:00 2001 From: Jianjian GUAN Date: Fri, 6 Sep 2024 16:15:47 +0800 Subject: [PATCH] [mlir][LLVMIR] Add more vector predication intrinsic ops This revision adds vector predication smax, smin, umax and umin intrinsic ops. --- .../mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td | 4 ++++ mlir/test/Dialect/LLVMIR/roundtrip.mlir | 19 +++++++++++++++++++ mlir/test/Target/LLVMIR/Import/intrinsic.ll | 12 ++++++++++++ .../test/Target/LLVMIR/llvmir-intrinsics.mlir | 16 ++++++++++++++++ 4 files changed, 51 insertions(+) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td index 3822eb3b3f1f6..5031426033aea 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td @@ -1156,6 +1156,10 @@ def LLVM_VPShlOp : LLVM_VPBinaryI<"shl">; def LLVM_VPOrOp : LLVM_VPBinaryI<"or">; def LLVM_VPAndOp : LLVM_VPBinaryI<"and">; def LLVM_VPXorOp : LLVM_VPBinaryI<"xor">; +def LLVM_VPSMaxOp : LLVM_VPBinaryI<"smax">; +def LLVM_VPSMinOp : LLVM_VPBinaryI<"smin">; +def LLVM_VPUMaxOp : LLVM_VPBinaryI<"umax">; +def LLVM_VPUMinOp : LLVM_VPBinaryI<"umin">; // Float Binary def LLVM_VPFAddOp : LLVM_VPBinaryF<"fadd">; diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir index ff16bb0f857dd..0b251b81e9787 100644 --- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir +++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir @@ -729,3 +729,22 @@ llvm.func @test_notail() -> i32 { %0 = llvm.call notail @tail_call_target() : () -> i32 llvm.return %0 : i32 } + +// CHECK-LABEL: @vector_predication_intrinsics +// CHECK-SAME: (%[[ARG0:.*]]: vector<8xi32>, %[[ARG1:.*]]: vector<8xi32>, %[[ARG2:.*]]: vector<8xi1>, %[[ARG3:.*]]: i32) +llvm.func @vector_predication_intrinsics(%A: vector<8xi32>, %B: vector<8xi32>, + %mask: vector<8xi1>, %evl: i32) { + // CHECK-NEXT: "llvm.intr.vp.smax"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) + "llvm.intr.vp.smax" (%A, %B, %mask, %evl) : + (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + // CHECK-NEXT: "llvm.intr.vp.smin"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) + "llvm.intr.vp.smin" (%A, %B, %mask, %evl) : + (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + // CHECK-NEXT: "llvm.intr.vp.umax"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) + "llvm.intr.vp.umax" (%A, %B, %mask, %evl) : + (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + // CHECK-NEXT: "llvm.intr.vp.umin"(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[ARG3]]) + "llvm.intr.vp.umin" (%A, %B, %mask, %evl) : + (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll index 0fa82cef0a0f5..2fc2c3c6c32ff 100644 --- a/mlir/test/Target/LLVMIR/Import/intrinsic.ll +++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll @@ -897,6 +897,14 @@ define void @vector_predication_intrinsics(<8 x i32> %0, <8 x i32> %1, <8 x floa %59 = call <8 x ptr> @llvm.vp.inttoptr.v8p0.v8i64(<8 x i64> %4, <8 x i1> %11, i32 %12) ; CHECK: "llvm.intr.vp.fmuladd"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>, vector<8xf32>, vector<8xi1>, i32) -> vector<8xf32> %60 = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %2, <8 x float> %3, <8 x float> %3, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.smax"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + %61 = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.smin"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + %62 = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.umax"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + %63 = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) + ; CHECK: "llvm.intr.vp.umin"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + %64 = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i1> %11, i32 %12) ret void } @@ -1113,6 +1121,10 @@ declare <8 x float> @llvm.vp.frem.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) declare <8 x float> @llvm.vp.fneg.v8f32(<8 x float>, <8 x i1>, i32) declare <8 x float> @llvm.vp.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32) declare <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.smax.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.smin.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.umax.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.umin.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) declare i32 @llvm.vp.reduce.add.v8i32(i32, <8 x i32>, <8 x i1>, i32) declare i32 @llvm.vp.reduce.mul.v8i32(i32, <8 x i32>, <8 x i1>, i32) declare i32 @llvm.vp.reduce.and.v8i32(i32, <8 x i32>, <8 x i1>, i32) diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir index e2eadf14fc97e..de0dc8d21584f 100644 --- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir @@ -798,6 +798,18 @@ llvm.func @vector_predication_intrinsics(%A: vector<8xi32>, %B: vector<8xi32>, // CHECK: call <8 x i32> @llvm.vp.xor.v8i32 "llvm.intr.vp.xor" (%A, %B, %mask, %evl) : (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + // CHECK: call <8 x i32> @llvm.vp.smax.v8i32 + "llvm.intr.vp.smax" (%A, %B, %mask, %evl) : + (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + // CHECK: call <8 x i32> @llvm.vp.smin.v8i32 + "llvm.intr.vp.smin" (%A, %B, %mask, %evl) : + (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + // CHECK: call <8 x i32> @llvm.vp.umax.v8i32 + "llvm.intr.vp.umax" (%A, %B, %mask, %evl) : + (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> + // CHECK: call <8 x i32> @llvm.vp.umin.v8i32 + "llvm.intr.vp.umin" (%A, %B, %mask, %evl) : + (vector<8xi32>, vector<8xi32>, vector<8xi1>, i32) -> vector<8xi32> // CHECK: call <8 x float> @llvm.vp.fadd.v8f32 "llvm.intr.vp.fadd" (%C, %D, %mask, %evl) : @@ -1123,6 +1135,10 @@ llvm.func @experimental_constrained_fptrunc(%s: f64, %v: vector<4xf32>) { // CHECK-DAG: declare <8 x i32> @llvm.vp.or.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) // CHECK-DAG: declare <8 x i32> @llvm.vp.and.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) // CHECK-DAG: declare <8 x i32> @llvm.vp.xor.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +// CHECK-DAG: declare <8 x i32> @llvm.vp.smax.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +// CHECK-DAG: declare <8 x i32> @llvm.vp.smin.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +// CHECK-DAG: declare <8 x i32> @llvm.vp.umax.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +// CHECK-DAG: declare <8 x i32> @llvm.vp.umin.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) // CHECK-DAG: declare <8 x float> @llvm.vp.fadd.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) // CHECK-DAG: declare <8 x float> @llvm.vp.fsub.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) // CHECK-DAG: declare <8 x float> @llvm.vp.fmul.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32)