[WebAssembly] Support promoting lower lanes of f16x8 to f32x4.#129786
Open
brendandahl wants to merge 1 commit intollvm:mainfrom
Open
[WebAssembly] Support promoting lower lanes of f16x8 to f32x4.#129786brendandahl wants to merge 1 commit intollvm:mainfrom
brendandahl wants to merge 1 commit intollvm:mainfrom
Conversation
Member
|
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-clang Author: Brendan Dahl (brendandahl) ChangesFull diff: https://github.com/llvm/llvm-project/pull/129786.diff 6 Files Affected:
diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h
index 08e39bf1a79b4..c509d7841135e 100644
--- a/clang/lib/Headers/wasm_simd128.h
+++ b/clang/lib/Headers/wasm_simd128.h
@@ -45,6 +45,7 @@ typedef int __i32x2 __attribute__((__vector_size__(8), __aligned__(8)));
typedef unsigned int __u32x2
__attribute__((__vector_size__(8), __aligned__(8)));
typedef float __f32x2 __attribute__((__vector_size__(8), __aligned__(8)));
+typedef __fp16 __f16x4 __attribute__((__vector_size__(8), __aligned__(8)));
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("simd128"), \
@@ -2010,6 +2011,13 @@ static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_convert_u16x8(v128_t __a) {
return (v128_t) __builtin_convertvector((__u16x8)__a, __f16x8);
}
+static __inline__ v128_t __FP16_FN_ATTRS
+wasm_f32x4_promote_low_f16x8(v128_t __a) {
+ return (v128_t) __builtin_convertvector(
+ (__f16x4){((__f16x8)__a)[0], ((__f16x8)__a)[1],
+ ((__f16x8)__a)[2], ((__f16x8)__a)[3]}, __f32x4);
+}
+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_relaxed_madd(v128_t __a,
v128_t __b,
v128_t __c) {
diff --git a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
index b601d90cfcc92..1f4809483589e 100644
--- a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
+++ b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
@@ -1033,6 +1033,12 @@ v128_t test_f64x2_promote_low_f32x4(v128_t a) {
return wasm_f64x2_promote_low_f32x4(a);
}
+// CHECK-LABEL: test_f32x4_promote_low_f16x8:
+// CHECK: f32x4.promote_low_f16x8{{$}}
+v128_t test_f32x4_promote_low_f16x8(v128_t a) {
+ return wasm_f32x4_promote_low_f16x8(a);
+}
+
// CHECK-LABEL: test_i8x16_shuffle:
// CHECK: i8x16.shuffle 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
// 0{{$}}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index b24a45c2d8898..4a034ed508cfe 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -2341,7 +2341,7 @@ WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
- if (Op.getValueType() != MVT::v2f64)
+ if (Op.getValueType() != MVT::v2f64 && Op.getValueType() != MVT::v4f32)
return SDValue();
auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
@@ -2354,6 +2354,7 @@ static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
Opcode = WebAssemblyISD::CONVERT_LOW_U;
break;
case ISD::FP_EXTEND:
+ case ISD::FP16_TO_FP:
Opcode = WebAssemblyISD::PROMOTE_LOW;
break;
default:
@@ -2372,36 +2373,52 @@ static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
return true;
};
- unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
- SDValue LHSSrcVec, RHSSrcVec;
- if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
- !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
+ unsigned NumLanes = Op.getValueType() == MVT::v2f64 ? 2 : 4;
+ unsigned FirstOpcode = 0, SecondOpcode = 0, ThirdOpcode = 0, FourthOpcode = 0;
+ unsigned FirstIndex = 0, SecondIndex = 0, ThirdIndex = 0, FourthIndex = 0;
+ SDValue FirstSrcVec, SecondSrcVec, ThirdSrcVec, FourthSrcVec;
+
+ if (!GetConvertedLane(Op.getOperand(0), FirstOpcode, FirstSrcVec, FirstIndex) ||
+ !GetConvertedLane(Op.getOperand(1), SecondOpcode, SecondSrcVec, SecondIndex))
+ return SDValue();
+
+ // If we're converting to v4f32, check the third and fourth lanes, too.
+ if (NumLanes == 4 && (!GetConvertedLane(Op.getOperand(2), ThirdOpcode, ThirdSrcVec, ThirdIndex) ||
+ !GetConvertedLane(Op.getOperand(3), FourthOpcode, FourthSrcVec, FourthIndex)))
+ return SDValue();
+
+ if (FirstOpcode != SecondOpcode)
return SDValue();
- if (LHSOpcode != RHSOpcode)
+ // TODO Add an optimization similar to the v2f64 below for shuffling the
+ // vectors when the lanes are in the wrong order or come from different src
+ // vectors.
+ if (NumLanes == 4 && (FirstOpcode != ThirdOpcode || FirstOpcode != FourthOpcode ||
+ FirstSrcVec != SecondSrcVec || FirstSrcVec != ThirdSrcVec || FirstSrcVec != FourthSrcVec ||
+ FirstIndex != 0 || SecondIndex != 1 || ThirdIndex != 2 || FourthIndex != 3))
return SDValue();
MVT ExpectedSrcVT;
- switch (LHSOpcode) {
+ switch (FirstOpcode) {
case WebAssemblyISD::CONVERT_LOW_S:
case WebAssemblyISD::CONVERT_LOW_U:
ExpectedSrcVT = MVT::v4i32;
break;
case WebAssemblyISD::PROMOTE_LOW:
- ExpectedSrcVT = MVT::v4f32;
+ ExpectedSrcVT = NumLanes == 2 ? MVT::v4f32 : MVT::v8i16;
break;
}
- if (LHSSrcVec.getValueType() != ExpectedSrcVT)
+ if (FirstSrcVec.getValueType() != ExpectedSrcVT)
return SDValue();
- auto Src = LHSSrcVec;
- if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
+ auto Src = FirstSrcVec;
+ if (NumLanes == 2 && (FirstIndex != 0 || SecondIndex != 1 || FirstSrcVec != SecondSrcVec)) {
// Shuffle the source vector so that the converted lanes are the low lanes.
Src = DAG.getVectorShuffle(
- ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
- {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
+ ExpectedSrcVT, DL, FirstSrcVec, SecondSrcVec,
+ {static_cast<int>(FirstIndex), static_cast<int>(SecondIndex) + 4, -1, -1});
}
- return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
+ return DAG.getNode(FirstOpcode, DL, NumLanes == 2 ? MVT::v2f64 : MVT::v4f32, Src);
}
SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index c591e5ef181a4..d2d62b8b62c3e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1468,6 +1468,7 @@ defm "" : SIMDConvert<F32x4, F64x2, demote_zero,
def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>;
defm "" : SIMDConvert<F64x2, F32x4, promote_low, "promote_low_f32x4", 0x5f>;
+defm "" : HalfPrecisionConvert<F32x4, I16x8, promote_low, "promote_low_f16x8", 0x14b>;
// Lower extending loads to load64_zero + promote_low
def extloadv2f32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll
index 4e8ff5955c63b..f0e23ea289265 100644
--- a/llvm/test/CodeGen/WebAssembly/half-precision.ll
+++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll
@@ -369,3 +369,23 @@ define <8 x half> @shuffle_poison_v8f16(<8 x half> %x, <8 x half> %y) {
i32 poison, i32 poison, i32 poison, i32 poison>
ret <8 x half> %res
}
+
+define <4 x float> @promote_low_v4f32(<8 x half> %x) {
+; CHECK-LABEL: promote_low_v4f32:
+; CHECK: .functype promote_low_v4f32 (v128) -> (v128){{$}}
+; CHECK-NEXT: f32x4.promote_low_f16x8 $push[[R:[0-9]+]]=, $0
+; CHECK-NEXT: return $pop[[R]]
+ %v = shufflevector <8 x half> %x, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %a = fpext <4 x half> %v to <4 x float>
+ ret <4 x float> %a
+}
+
+define <4 x float> @promote_low_v4f32_2(<8 x half> %x) {
+; CHECK-LABEL: promote_low_v4f32_2:
+; CHECK: .functype promote_low_v4f32_2 (v128) -> (v128)
+; CHECK-NEXT: f32x4.promote_low_f16x8 $push[[R:[0-9]+]]=, $0
+; CHECK-NEXT: return $pop[[R]]
+ %v = fpext <8 x half> %x to <8 x float>
+ %a = shufflevector <8 x float> %v, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %a
+}
diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s
index 48aec4bc52a0c..57af1daad0226 100644
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -935,4 +935,7 @@ main:
# CHECK: f16x8.convert_i16x8_u # encoding: [0xfd,0xc8,0x02]
f16x8.convert_i16x8_u
+ # CHECK: f32x4.promote_low_f16x8 # encoding: [0xfd,0xcb,0x02]
+ f32x4.promote_low_f16x8
+
end_function
|
Member
|
@llvm/pr-subscribers-backend-webassembly Author: Brendan Dahl (brendandahl) ChangesFull diff: https://github.com/llvm/llvm-project/pull/129786.diff 6 Files Affected:
diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h
index 08e39bf1a79b4..c509d7841135e 100644
--- a/clang/lib/Headers/wasm_simd128.h
+++ b/clang/lib/Headers/wasm_simd128.h
@@ -45,6 +45,7 @@ typedef int __i32x2 __attribute__((__vector_size__(8), __aligned__(8)));
typedef unsigned int __u32x2
__attribute__((__vector_size__(8), __aligned__(8)));
typedef float __f32x2 __attribute__((__vector_size__(8), __aligned__(8)));
+typedef __fp16 __f16x4 __attribute__((__vector_size__(8), __aligned__(8)));
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("simd128"), \
@@ -2010,6 +2011,13 @@ static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_convert_u16x8(v128_t __a) {
return (v128_t) __builtin_convertvector((__u16x8)__a, __f16x8);
}
+static __inline__ v128_t __FP16_FN_ATTRS
+wasm_f32x4_promote_low_f16x8(v128_t __a) {
+ return (v128_t) __builtin_convertvector(
+ (__f16x4){((__f16x8)__a)[0], ((__f16x8)__a)[1],
+ ((__f16x8)__a)[2], ((__f16x8)__a)[3]}, __f32x4);
+}
+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_relaxed_madd(v128_t __a,
v128_t __b,
v128_t __c) {
diff --git a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
index b601d90cfcc92..1f4809483589e 100644
--- a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
+++ b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
@@ -1033,6 +1033,12 @@ v128_t test_f64x2_promote_low_f32x4(v128_t a) {
return wasm_f64x2_promote_low_f32x4(a);
}
+// CHECK-LABEL: test_f32x4_promote_low_f16x8:
+// CHECK: f32x4.promote_low_f16x8{{$}}
+v128_t test_f32x4_promote_low_f16x8(v128_t a) {
+ return wasm_f32x4_promote_low_f16x8(a);
+}
+
// CHECK-LABEL: test_i8x16_shuffle:
// CHECK: i8x16.shuffle 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
// 0{{$}}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index b24a45c2d8898..4a034ed508cfe 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -2341,7 +2341,7 @@ WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
- if (Op.getValueType() != MVT::v2f64)
+ if (Op.getValueType() != MVT::v2f64 && Op.getValueType() != MVT::v4f32)
return SDValue();
auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
@@ -2354,6 +2354,7 @@ static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
Opcode = WebAssemblyISD::CONVERT_LOW_U;
break;
case ISD::FP_EXTEND:
+ case ISD::FP16_TO_FP:
Opcode = WebAssemblyISD::PROMOTE_LOW;
break;
default:
@@ -2372,36 +2373,52 @@ static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
return true;
};
- unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
- SDValue LHSSrcVec, RHSSrcVec;
- if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
- !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
+ unsigned NumLanes = Op.getValueType() == MVT::v2f64 ? 2 : 4;
+ unsigned FirstOpcode = 0, SecondOpcode = 0, ThirdOpcode = 0, FourthOpcode = 0;
+ unsigned FirstIndex = 0, SecondIndex = 0, ThirdIndex = 0, FourthIndex = 0;
+ SDValue FirstSrcVec, SecondSrcVec, ThirdSrcVec, FourthSrcVec;
+
+ if (!GetConvertedLane(Op.getOperand(0), FirstOpcode, FirstSrcVec, FirstIndex) ||
+ !GetConvertedLane(Op.getOperand(1), SecondOpcode, SecondSrcVec, SecondIndex))
+ return SDValue();
+
+ // If we're converting to v4f32, check the third and fourth lanes, too.
+ if (NumLanes == 4 && (!GetConvertedLane(Op.getOperand(2), ThirdOpcode, ThirdSrcVec, ThirdIndex) ||
+ !GetConvertedLane(Op.getOperand(3), FourthOpcode, FourthSrcVec, FourthIndex)))
+ return SDValue();
+
+ if (FirstOpcode != SecondOpcode)
return SDValue();
- if (LHSOpcode != RHSOpcode)
+ // TODO Add an optimization similar to the v2f64 below for shuffling the
+ // vectors when the lanes are in the wrong order or come from different src
+ // vectors.
+ if (NumLanes == 4 && (FirstOpcode != ThirdOpcode || FirstOpcode != FourthOpcode ||
+ FirstSrcVec != SecondSrcVec || FirstSrcVec != ThirdSrcVec || FirstSrcVec != FourthSrcVec ||
+ FirstIndex != 0 || SecondIndex != 1 || ThirdIndex != 2 || FourthIndex != 3))
return SDValue();
MVT ExpectedSrcVT;
- switch (LHSOpcode) {
+ switch (FirstOpcode) {
case WebAssemblyISD::CONVERT_LOW_S:
case WebAssemblyISD::CONVERT_LOW_U:
ExpectedSrcVT = MVT::v4i32;
break;
case WebAssemblyISD::PROMOTE_LOW:
- ExpectedSrcVT = MVT::v4f32;
+ ExpectedSrcVT = NumLanes == 2 ? MVT::v4f32 : MVT::v8i16;
break;
}
- if (LHSSrcVec.getValueType() != ExpectedSrcVT)
+ if (FirstSrcVec.getValueType() != ExpectedSrcVT)
return SDValue();
- auto Src = LHSSrcVec;
- if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
+ auto Src = FirstSrcVec;
+ if (NumLanes == 2 && (FirstIndex != 0 || SecondIndex != 1 || FirstSrcVec != SecondSrcVec)) {
// Shuffle the source vector so that the converted lanes are the low lanes.
Src = DAG.getVectorShuffle(
- ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
- {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
+ ExpectedSrcVT, DL, FirstSrcVec, SecondSrcVec,
+ {static_cast<int>(FirstIndex), static_cast<int>(SecondIndex) + 4, -1, -1});
}
- return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
+ return DAG.getNode(FirstOpcode, DL, NumLanes == 2 ? MVT::v2f64 : MVT::v4f32, Src);
}
SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index c591e5ef181a4..d2d62b8b62c3e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1468,6 +1468,7 @@ defm "" : SIMDConvert<F32x4, F64x2, demote_zero,
def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>;
defm "" : SIMDConvert<F64x2, F32x4, promote_low, "promote_low_f32x4", 0x5f>;
+defm "" : HalfPrecisionConvert<F32x4, I16x8, promote_low, "promote_low_f16x8", 0x14b>;
// Lower extending loads to load64_zero + promote_low
def extloadv2f32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll
index 4e8ff5955c63b..f0e23ea289265 100644
--- a/llvm/test/CodeGen/WebAssembly/half-precision.ll
+++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll
@@ -369,3 +369,23 @@ define <8 x half> @shuffle_poison_v8f16(<8 x half> %x, <8 x half> %y) {
i32 poison, i32 poison, i32 poison, i32 poison>
ret <8 x half> %res
}
+
+define <4 x float> @promote_low_v4f32(<8 x half> %x) {
+; CHECK-LABEL: promote_low_v4f32:
+; CHECK: .functype promote_low_v4f32 (v128) -> (v128){{$}}
+; CHECK-NEXT: f32x4.promote_low_f16x8 $push[[R:[0-9]+]]=, $0
+; CHECK-NEXT: return $pop[[R]]
+ %v = shufflevector <8 x half> %x, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %a = fpext <4 x half> %v to <4 x float>
+ ret <4 x float> %a
+}
+
+define <4 x float> @promote_low_v4f32_2(<8 x half> %x) {
+; CHECK-LABEL: promote_low_v4f32_2:
+; CHECK: .functype promote_low_v4f32_2 (v128) -> (v128)
+; CHECK-NEXT: f32x4.promote_low_f16x8 $push[[R:[0-9]+]]=, $0
+; CHECK-NEXT: return $pop[[R]]
+ %v = fpext <8 x half> %x to <8 x float>
+ %a = shufflevector <8 x float> %v, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %a
+}
diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s
index 48aec4bc52a0c..57af1daad0226 100644
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -935,4 +935,7 @@ main:
# CHECK: f16x8.convert_i16x8_u # encoding: [0xfd,0xc8,0x02]
f16x8.convert_i16x8_u
+ # CHECK: f32x4.promote_low_f16x8 # encoding: [0xfd,0xcb,0x02]
+ f32x4.promote_low_f16x8
+
end_function
|
Member
|
@llvm/pr-subscribers-mc Author: Brendan Dahl (brendandahl) ChangesFull diff: https://github.com/llvm/llvm-project/pull/129786.diff 6 Files Affected:
diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h
index 08e39bf1a79b4..c509d7841135e 100644
--- a/clang/lib/Headers/wasm_simd128.h
+++ b/clang/lib/Headers/wasm_simd128.h
@@ -45,6 +45,7 @@ typedef int __i32x2 __attribute__((__vector_size__(8), __aligned__(8)));
typedef unsigned int __u32x2
__attribute__((__vector_size__(8), __aligned__(8)));
typedef float __f32x2 __attribute__((__vector_size__(8), __aligned__(8)));
+typedef __fp16 __f16x4 __attribute__((__vector_size__(8), __aligned__(8)));
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("simd128"), \
@@ -2010,6 +2011,13 @@ static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_convert_u16x8(v128_t __a) {
return (v128_t) __builtin_convertvector((__u16x8)__a, __f16x8);
}
+static __inline__ v128_t __FP16_FN_ATTRS
+wasm_f32x4_promote_low_f16x8(v128_t __a) {
+ return (v128_t) __builtin_convertvector(
+ (__f16x4){((__f16x8)__a)[0], ((__f16x8)__a)[1],
+ ((__f16x8)__a)[2], ((__f16x8)__a)[3]}, __f32x4);
+}
+
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_relaxed_madd(v128_t __a,
v128_t __b,
v128_t __c) {
diff --git a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
index b601d90cfcc92..1f4809483589e 100644
--- a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
+++ b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c
@@ -1033,6 +1033,12 @@ v128_t test_f64x2_promote_low_f32x4(v128_t a) {
return wasm_f64x2_promote_low_f32x4(a);
}
+// CHECK-LABEL: test_f32x4_promote_low_f16x8:
+// CHECK: f32x4.promote_low_f16x8{{$}}
+v128_t test_f32x4_promote_low_f16x8(v128_t a) {
+ return wasm_f32x4_promote_low_f16x8(a);
+}
+
// CHECK-LABEL: test_i8x16_shuffle:
// CHECK: i8x16.shuffle 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
// 0{{$}}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index b24a45c2d8898..4a034ed508cfe 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -2341,7 +2341,7 @@ WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
- if (Op.getValueType() != MVT::v2f64)
+ if (Op.getValueType() != MVT::v2f64 && Op.getValueType() != MVT::v4f32)
return SDValue();
auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
@@ -2354,6 +2354,7 @@ static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
Opcode = WebAssemblyISD::CONVERT_LOW_U;
break;
case ISD::FP_EXTEND:
+ case ISD::FP16_TO_FP:
Opcode = WebAssemblyISD::PROMOTE_LOW;
break;
default:
@@ -2372,36 +2373,52 @@ static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
return true;
};
- unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
- SDValue LHSSrcVec, RHSSrcVec;
- if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
- !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
+ unsigned NumLanes = Op.getValueType() == MVT::v2f64 ? 2 : 4;
+ unsigned FirstOpcode = 0, SecondOpcode = 0, ThirdOpcode = 0, FourthOpcode = 0;
+ unsigned FirstIndex = 0, SecondIndex = 0, ThirdIndex = 0, FourthIndex = 0;
+ SDValue FirstSrcVec, SecondSrcVec, ThirdSrcVec, FourthSrcVec;
+
+ if (!GetConvertedLane(Op.getOperand(0), FirstOpcode, FirstSrcVec, FirstIndex) ||
+ !GetConvertedLane(Op.getOperand(1), SecondOpcode, SecondSrcVec, SecondIndex))
+ return SDValue();
+
+ // If we're converting to v4f32, check the third and fourth lanes, too.
+ if (NumLanes == 4 && (!GetConvertedLane(Op.getOperand(2), ThirdOpcode, ThirdSrcVec, ThirdIndex) ||
+ !GetConvertedLane(Op.getOperand(3), FourthOpcode, FourthSrcVec, FourthIndex)))
+ return SDValue();
+
+ if (FirstOpcode != SecondOpcode)
return SDValue();
- if (LHSOpcode != RHSOpcode)
+ // TODO Add an optimization similar to the v2f64 below for shuffling the
+ // vectors when the lanes are in the wrong order or come from different src
+ // vectors.
+ if (NumLanes == 4 && (FirstOpcode != ThirdOpcode || FirstOpcode != FourthOpcode ||
+ FirstSrcVec != SecondSrcVec || FirstSrcVec != ThirdSrcVec || FirstSrcVec != FourthSrcVec ||
+ FirstIndex != 0 || SecondIndex != 1 || ThirdIndex != 2 || FourthIndex != 3))
return SDValue();
MVT ExpectedSrcVT;
- switch (LHSOpcode) {
+ switch (FirstOpcode) {
case WebAssemblyISD::CONVERT_LOW_S:
case WebAssemblyISD::CONVERT_LOW_U:
ExpectedSrcVT = MVT::v4i32;
break;
case WebAssemblyISD::PROMOTE_LOW:
- ExpectedSrcVT = MVT::v4f32;
+ ExpectedSrcVT = NumLanes == 2 ? MVT::v4f32 : MVT::v8i16;
break;
}
- if (LHSSrcVec.getValueType() != ExpectedSrcVT)
+ if (FirstSrcVec.getValueType() != ExpectedSrcVT)
return SDValue();
- auto Src = LHSSrcVec;
- if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
+ auto Src = FirstSrcVec;
+ if (NumLanes == 2 && (FirstIndex != 0 || SecondIndex != 1 || FirstSrcVec != SecondSrcVec)) {
// Shuffle the source vector so that the converted lanes are the low lanes.
Src = DAG.getVectorShuffle(
- ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
- {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
+ ExpectedSrcVT, DL, FirstSrcVec, SecondSrcVec,
+ {static_cast<int>(FirstIndex), static_cast<int>(SecondIndex) + 4, -1, -1});
}
- return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
+ return DAG.getNode(FirstOpcode, DL, NumLanes == 2 ? MVT::v2f64 : MVT::v4f32, Src);
}
SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index c591e5ef181a4..d2d62b8b62c3e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1468,6 +1468,7 @@ defm "" : SIMDConvert<F32x4, F64x2, demote_zero,
def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>;
defm "" : SIMDConvert<F64x2, F32x4, promote_low, "promote_low_f32x4", 0x5f>;
+defm "" : HalfPrecisionConvert<F32x4, I16x8, promote_low, "promote_low_f16x8", 0x14b>;
// Lower extending loads to load64_zero + promote_low
def extloadv2f32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll
index 4e8ff5955c63b..f0e23ea289265 100644
--- a/llvm/test/CodeGen/WebAssembly/half-precision.ll
+++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll
@@ -369,3 +369,23 @@ define <8 x half> @shuffle_poison_v8f16(<8 x half> %x, <8 x half> %y) {
i32 poison, i32 poison, i32 poison, i32 poison>
ret <8 x half> %res
}
+
+define <4 x float> @promote_low_v4f32(<8 x half> %x) {
+; CHECK-LABEL: promote_low_v4f32:
+; CHECK: .functype promote_low_v4f32 (v128) -> (v128){{$}}
+; CHECK-NEXT: f32x4.promote_low_f16x8 $push[[R:[0-9]+]]=, $0
+; CHECK-NEXT: return $pop[[R]]
+ %v = shufflevector <8 x half> %x, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %a = fpext <4 x half> %v to <4 x float>
+ ret <4 x float> %a
+}
+
+define <4 x float> @promote_low_v4f32_2(<8 x half> %x) {
+; CHECK-LABEL: promote_low_v4f32_2:
+; CHECK: .functype promote_low_v4f32_2 (v128) -> (v128)
+; CHECK-NEXT: f32x4.promote_low_f16x8 $push[[R:[0-9]+]]=, $0
+; CHECK-NEXT: return $pop[[R]]
+ %v = fpext <8 x half> %x to <8 x float>
+ %a = shufflevector <8 x float> %v, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %a
+}
diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s
index 48aec4bc52a0c..57af1daad0226 100644
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -935,4 +935,7 @@ main:
# CHECK: f16x8.convert_i16x8_u # encoding: [0xfd,0xc8,0x02]
f16x8.convert_i16x8_u
+ # CHECK: f32x4.promote_low_f16x8 # encoding: [0xfd,0xcb,0x02]
+ f32x4.promote_low_f16x8
+
end_function
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
|
✅ With the latest revision this PR passed the undef deprecator. |
dschuff
approved these changes
Mar 4, 2025
| ; CHECK: .functype promote_low_v4f32 (v128) -> (v128){{$}} | ||
| ; CHECK-NEXT: f32x4.promote_low_f16x8 $push[[R:[0-9]+]]=, $0 | ||
| ; CHECK-NEXT: return $pop[[R]] | ||
| %v = shufflevector <8 x half> %x, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
Member
There was a problem hiding this comment.
Suggested change
| %v = shufflevector <8 x half> %x, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | |
| %v = shufflevector <8 x half> %x, <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: f32x4.promote_low_f16x8 $push[[R:[0-9]+]]=, $0 | ||
| ; CHECK-NEXT: return $pop[[R]] | ||
| %v = fpext <8 x half> %x to <8 x float> | ||
| %a = shufflevector <8 x float> %v, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
Member
There was a problem hiding this comment.
Suggested change
| %a = shufflevector <8 x float> %v, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | |
| %a = shufflevector <8 x float> %v, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
5ca39c1 to
50ad443
Compare
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
No description provided.