diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td index a24a918357f2d..55629a2fd95f1 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td +++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td @@ -112,6 +112,32 @@ def AMDGPU_ExtPackedFp8Op : }]; } +def AMDGPU_ScaledExtPacked816Op + : AMDGPU_Op<"scaled_ext_packed816", [Pure]>, + Arguments<( + ins AnyTypeOf<[VectorOfLengthAndType<[8], [F4E2M1FN,F8E4M3FN,F8E5M2]>, + VectorOfLengthAndType<[16], [F6E2M3FN, F6E3M2FN]>]>:$source, + F32:$scale, + ConfinedAttr]>:$index)>, + Results<( + outs AnyTypeOf<[FixedVectorOfLengthAndType<[8], [F32]>, + FixedVectorOfLengthAndType<[8], [F16]>, + FixedVectorOfLengthAndType<[8], [BF16]>, + FixedVectorOfLengthAndType<[16], [F32]>, + FixedVectorOfLengthAndType<[16], [F16]>, + FixedVectorOfLengthAndType<[16], [BF16]>]>:$res)> { + + let summary = "Extend a vector of packed floating point values"; + + let description = [{ + Extend and scale 8/16 packed floats in to 8/16 floats and return them. + }]; + + let assemblyFormat = [{ + attr-dict $source `,` $scale `[` $index `]` `:` type($source) `to` type($res) + }]; +} + def AMDGPU_ScaledExtPackedOp : AMDGPU_Op<"scaled_ext_packed", [Pure]>, Arguments<( diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir index 369e0fff538e1..de4b9d9431a9e 100644 --- a/mlir/test/Dialect/AMDGPU/ops.mlir +++ b/mlir/test/Dialect/AMDGPU/ops.mlir @@ -221,6 +221,61 @@ func.func @scaled_ext_scalar_f4e2m1_bf16(%v: vector<2xf4E2M1FN>, %scale: f32) -> func.return %ret : vector<2xbf16> } +// CHECK-LABEL: func.func @scaled_ext_packed816_fp4 +func.func @scaled_ext_packed816_fp4(%v: vector<8xf4E2M1FN>, %scale: f32) -> (vector<8xf16>, vector<8xbf16>, vector<8xf32>) { + // CHECK: amdgpu.scaled_ext_packed816 + %ret0 = amdgpu.scaled_ext_packed816 %v, %scale[0] : vector<8xf4E2M1FN> to vector<8xf16> + // CHECK: amdgpu.scaled_ext_packed816 + %ret1 = amdgpu.scaled_ext_packed816 %v, %scale[0] : vector<8xf4E2M1FN> to vector<8xbf16> + // CHECK: amdgpu.scaled_ext_packed816 + %ret2 = amdgpu.scaled_ext_packed816 %v, %scale[0] : vector<8xf4E2M1FN> to vector<8xf32> + func.return %ret0, %ret1, %ret2 : vector<8xf16>, vector<8xbf16>, vector<8xf32> +} + +// CHECK-LABEL: func.func @scaled_ext_packed816_fp8 +func.func @scaled_ext_packed816_fp8(%v: vector<8xf8E4M3FN>, %scale: f32) -> (vector<8xf16>, vector<8xbf16>, vector<8xf32>) { + // CHECK: amdgpu.scaled_ext_packed816 + %ret0 = amdgpu.scaled_ext_packed816 %v, %scale[0] : vector<8xf8E4M3FN> to vector<8xf16> + // CHECK: amdgpu.scaled_ext_packed816 + %ret1 = amdgpu.scaled_ext_packed816 %v, %scale[0] : vector<8xf8E4M3FN> to vector<8xbf16> + // CHECK: amdgpu.scaled_ext_packed816 + %ret2 = amdgpu.scaled_ext_packed816 %v, %scale[0] : vector<8xf8E4M3FN> to vector<8xf32> + func.return %ret0, %ret1, %ret2 : vector<8xf16>, vector<8xbf16>, vector<8xf32> +} + +// CHECK-LABEL: func.func @scaled_ext_packed816_bf8 +func.func @scaled_ext_packed816_bf8(%v: vector<8xf8E5M2>, %scale: f32) -> (vector<8xf16>, vector<8xbf16>, vector<8xf32>) { + // CHECK: amdgpu.scaled_ext_packed816 + %ret0 = amdgpu.scaled_ext_packed816 %v, %scale[0] : vector<8xf8E5M2> to vector<8xf16> + // CHECK: amdgpu.scaled_ext_packed816 + %ret1 = amdgpu.scaled_ext_packed816 %v, %scale[0] : vector<8xf8E5M2> to vector<8xbf16> + // CHECK: amdgpu.scaled_ext_packed816 + %ret2 = amdgpu.scaled_ext_packed816 %v, %scale[0] : vector<8xf8E5M2> to vector<8xf32> + func.return %ret0, %ret1, %ret2 : vector<8xf16>, vector<8xbf16>, vector<8xf32> +} + +// CHECK-LABEL: func.func @scaled_ext_packed816_fp6 +func.func @scaled_ext_packed816_fp6(%v: vector<16xf6E2M3FN>, %scale: f32) -> (vector<16xf16>, vector<16xbf16>, vector<16xf32>) { + // CHECK: amdgpu.scaled_ext_packed816 + %ret0 = amdgpu.scaled_ext_packed816 %v, %scale[0] : vector<16xf6E2M3FN> to vector<16xf16> + // CHECK: amdgpu.scaled_ext_packed816 + %ret1 = amdgpu.scaled_ext_packed816 %v, %scale[0] : vector<16xf6E2M3FN> to vector<16xbf16> + // CHECK: amdgpu.scaled_ext_packed816 + %ret2 = amdgpu.scaled_ext_packed816 %v, %scale[0] : vector<16xf6E2M3FN> to vector<16xf32> + func.return %ret0, %ret1, %ret2 : vector<16xf16>, vector<16xbf16>, vector<16xf32> +} + +// CHECK-LABEL: func.func @scaled_ext_packed816_bf16 +func.func @scaled_ext_packed816_bf16(%v: vector<16xf6E3M2FN>, %scale: f32) -> (vector<16xf16>, vector<16xbf16>, vector<16xf32>) { + // CHECK: amdgpu.scaled_ext_packed816 + %ret0 = amdgpu.scaled_ext_packed816 %v, %scale[0] : vector<16xf6E3M2FN> to vector<16xf16> + // CHECK: amdgpu.scaled_ext_packed816 + %ret1 = amdgpu.scaled_ext_packed816 %v, %scale[0] : vector<16xf6E3M2FN> to vector<16xbf16> + // CHECK: amdgpu.scaled_ext_packed816 + %ret2 = amdgpu.scaled_ext_packed816 %v, %scale[0] : vector<16xf6E3M2FN> to vector<16xf32> + func.return %ret0, %ret1, %ret2 : vector<16xf16>, vector<16xbf16>, vector<16xf32> +} + // CHECK-LABEL: func.func @packed_scaled_trunc_f8e4m3_f32 // CHECK: amdgpu.packed_scaled_trunc func.func @packed_scaled_trunc_f8e4m3_f32(%v: vector<2xf32>, %scale: f32) -> vector<4xf8E4M3FN> {