From 8b8aeb3f5672bd00518f27d69709eba8f2a54a50 Mon Sep 17 00:00:00 2001 From: RealityProgrammer Date: Tue, 14 Mar 2023 19:17:19 +0700 Subject: [PATCH] Fix source validator's code reports, remove 95% of AdvSimd implementation to prevent future consequences --- X10D/src/Collections/BoolListExtensions.cs | 2 +- X10D/src/Collections/ByteExtensions.cs | 2 +- X10D/src/Collections/Int32Extensions.cs | 20 +-- X10D/src/Core/IntrinsicExtensions.cs | 3 +- X10D/src/Core/IntrinsicUtility.cs | 176 +++++++++++---------- X10D/src/Core/SpanExtensions.cs | 44 ++---- 6 files changed, 125 insertions(+), 122 deletions(-) diff --git a/X10D/src/Collections/BoolListExtensions.cs b/X10D/src/Collections/BoolListExtensions.cs index 65f31b5ee..ee7005020 100644 --- a/X10D/src/Collections/BoolListExtensions.cs +++ b/X10D/src/Collections/BoolListExtensions.cs @@ -26,7 +26,7 @@ public static byte PackByte(this IReadOnlyList source) throw new ArgumentNullException(nameof(source)); } #endif - + if (source.Count > 8) { throw new ArgumentException("Source cannot contain more than than 8 elements.", nameof(source)); diff --git a/X10D/src/Collections/ByteExtensions.cs b/X10D/src/Collections/ByteExtensions.cs index 35403f8ca..6fde587b0 100644 --- a/X10D/src/Collections/ByteExtensions.cs +++ b/X10D/src/Collections/ByteExtensions.cs @@ -47,7 +47,7 @@ public static void Unpack(this byte value, Span destination) return; } #endif - + FallbackImplementation(value, destination); #if NETCOREAPP3_0_OR_GREATER diff --git a/X10D/src/Collections/Int32Extensions.cs b/X10D/src/Collections/Int32Extensions.cs index f226adf9d..f6e8fd771 100644 --- a/X10D/src/Collections/Int32Extensions.cs +++ b/X10D/src/Collections/Int32Extensions.cs @@ -42,7 +42,7 @@ public static void Unpack(this int value, Span destination) #if NETCOREAPP3_0_OR_GREATER // TODO: AdvSimd support. - + // https://stackoverflow.com/questions/24225786/fastest-way-to-unpack-32-bits-to-a-32-byte-simd-vector if (Avx2.IsSupported) { @@ -64,15 +64,15 @@ unsafe static void Avx2Implementation(int value, Span destination) fixed (bool* pDestination = destination) { var mask1 = Vector256.Create( - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03 ).AsByte(); var mask2 = Vector256.Create( - 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 ); @@ -81,7 +81,7 @@ unsafe static void Avx2Implementation(int value, Span destination) var and = Avx2.AndNot(shuffle, mask2); var cmp = Avx2.CompareEqual(and, Vector256.Zero); var correctness = Avx2.And(cmp, Vector256.Create((byte)0x01)); - + Avx.Store((byte*)pDestination, correctness); } } @@ -103,9 +103,9 @@ unsafe static void Ssse3Implementation(int value, Span destination) var and = Sse2.AndNot(shuffle, mask2); var cmp = Sse2.CompareEqual(and, Vector128.Zero); var correctness = Sse2.And(cmp, one); - + Sse2.Store((byte*)pDestination, correctness); - + shuffle = Ssse3.Shuffle(vec, mask1Hi); and = Sse2.AndNot(shuffle, mask2); cmp = Sse2.CompareEqual(and, Vector128.Zero); diff --git a/X10D/src/Core/IntrinsicExtensions.cs b/X10D/src/Core/IntrinsicExtensions.cs index 9e78dd1d3..11283606a 100644 --- a/X10D/src/Core/IntrinsicExtensions.cs +++ b/X10D/src/Core/IntrinsicExtensions.cs @@ -5,7 +5,8 @@ namespace X10D.Core; /// -/// Extension methods for SIMD vectors, namely , and . +/// Extension methods for SIMD vectors, namely , and +/// . /// public static class IntrinsicExtensions { diff --git a/X10D/src/Core/IntrinsicUtility.cs b/X10D/src/Core/IntrinsicUtility.cs index 7493001cf..07dd852ad 100644 --- a/X10D/src/Core/IntrinsicUtility.cs +++ b/X10D/src/Core/IntrinsicUtility.cs @@ -18,7 +18,9 @@ public static class IntrinsicUtility // FOR API CONSISTENCY. /// - ///
Correcting of into 0 and 1 depend on their boolean truthiness.
+ ///
+ /// Correcting of into 0 and 1 depend on their boolean truthiness. + ///
///
Operation (raw):
/// /// for (int i = 0; i < 8; i++) { @@ -33,19 +35,15 @@ public static class IntrinsicUtility /// ///
/// Vector of byte to correct. - /// A of which remapped back to 0 and 1 based on boolean truthiness. + /// + /// A of which remapped back to 0 and 1 based on boolean truthiness. + /// [Pure] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public static Vector64 CorrectBoolean(Vector64 vector) { - if (AdvSimd.IsSupported) - { - // Haven't tested since March 6th 2023 (Reason: Unavailable hardware). - var cmp = AdvSimd.CompareEqual(vector, Vector64.Zero); - var result = AdvSimd.BitwiseSelect(cmp, vector, Vector64.Zero); - - return result; - } + // TODO: AdvSimd implementation. + // TODO: WasmSimd implementation. (?) var output = GetUninitializedVector64(); @@ -64,7 +62,9 @@ public static Vector64 CorrectBoolean(Vector64 vector) } /// - ///
Correcting of into 0 and 1 depend on their boolean truthiness.
+ ///
+ /// Correcting of into 0 and 1 depend on their boolean truthiness. + ///
///
Operation (raw):
/// /// for (int i = 0; i < 16; i++) { @@ -79,7 +79,9 @@ public static Vector64 CorrectBoolean(Vector64 vector) /// ///
/// Vector of byte to correct. - /// A of which remapped back to 0 and 1 based on boolean truthiness. + /// + /// A of which remapped back to 0 and 1 based on boolean truthiness. + /// [Pure] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public static Vector128 CorrectBoolean(Vector128 vector) @@ -91,33 +93,25 @@ public static Vector128 CorrectBoolean(Vector128 vector) return result; } - if (AdvSimd.IsSupported) - { - // Haven't tested since March 6th 2023 (Reason: Unavailable hardware). - var cmp = AdvSimd.CompareEqual(vector, Vector128.Zero); - var result = AdvSimd.BitwiseSelect(cmp, vector, Vector128.Zero); - return result; - } + // TODO: AdvSimd implementation. + // TODO: WasmSimd implementation. var output = GetUninitializedVector128(); for (int i = 0; i < Vector128.Count; i++) { - ref var writeElement = ref Unsafe.Add(ref Unsafe.As, byte>(ref output), i); -#if NET7_0_OR_GREATER - writeElement = vector[i] == 0 ? (byte)0 : (byte)1; -#else - var element = Unsafe.Add(ref Unsafe.As, byte>(ref vector), i); - writeElement = element == 0 ? (byte)0 : (byte)1; -#endif + Unsafe.Add(ref Unsafe.As, byte>(ref output), i) = + Unsafe.Add(ref Unsafe.As, byte>(ref vector), i) == 0 ? (byte)0 : (byte)1; } return output; } /// - ///
Correcting of into 0 and 1 depend on their boolean truthiness.
+ ///
+ /// Correcting of into 0 and 1 depend on their boolean truthiness. + ///
///
Operation (raw):
/// /// for (int i = 0; i < 16; i++) { @@ -132,7 +126,9 @@ public static Vector128 CorrectBoolean(Vector128 vector) /// ///
/// Vector of byte to correct. - /// A of which remapped back to 0 and 1 based on boolean truthiness. + /// + /// A of which remapped back to 0 and 1 based on boolean truthiness. + /// [Pure] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public static Vector256 CorrectBoolean(Vector256 vector) @@ -149,20 +145,17 @@ public static Vector256 CorrectBoolean(Vector256 vector) for (int i = 0; i < Vector256.Count; i++) { - ref var writeElement = ref Unsafe.Add(ref Unsafe.As, byte>(ref output), i); -#if NET7_0_OR_GREATER - writeElement = vector[i] == 0 ? (byte)0 : (byte)1; -#else - var element = Unsafe.Add(ref Unsafe.As, byte>(ref vector), i); - writeElement = element == 0 ? (byte)0 : (byte)1; -#endif + Unsafe.Add(ref Unsafe.As, byte>(ref output), i) = + Unsafe.Add(ref Unsafe.As, byte>(ref vector), i) == 0 ? (byte)0 : (byte)1; } return output; } /// - ///
Multiply packed 64-bit unsigned integer elements in a and b and truncate the results to 64-bit integer.
+ ///
+ /// Multiply packed 64-bit unsigned integer elements in a and b and truncate the results to 64-bit integer. + ///
///
Operation:
/// /// dest[0] = lhs[0] * rhs[0]; @@ -171,7 +164,9 @@ public static Vector256 CorrectBoolean(Vector256 vector) ///
/// Left vector. /// Right vector. - /// A of whose elements is 64-bit truncated product of lhs and rhs. + /// + /// A of whose elements is 64-bit truncated product of lhs and rhs. + /// [Pure] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] @@ -191,32 +186,26 @@ public static Vector128 Multiply(Vector128 lhs, Vector128 r return Sse2.Add(high, ac); } - if (AdvSimd.IsSupported) - { - // https://stackoverflow.com/questions/60236627/facing-problem-in-implementing-multiplication-of-64-bit-variables-using-arm-neon - // Hasn't been tested since March 7th 2023 (Reason: Unavailable hardware) - var a = AdvSimd.ExtractNarrowingLower(lhs); - var b = AdvSimd.ExtractNarrowingLower(rhs); - - var mul = AdvSimd.Multiply(rhs.AsUInt32(), AdvSimd.ReverseElement32(lhs).AsUInt32()); - - return AdvSimd.MultiplyWideningLowerAndAdd(AdvSimd.ShiftLeftLogical(mul.AsUInt64(), 32), a, b); - } + // TODO: AdvSimd implementation. + // TODO: WasmSimd implementation. var output = GetUninitializedVector128(); Unsafe.As, ulong>(ref output) = Unsafe.As, ulong>(ref lhs) * Unsafe.As, ulong>(ref rhs); - Unsafe.Add(ref Unsafe.As, ulong>(ref output), 1) = - Unsafe.Add(ref Unsafe.As, ulong>(ref lhs), 1) * Unsafe.Add(ref Unsafe.As, ulong>(ref rhs), 1); + Unsafe.Add(ref Unsafe.As, ulong>(ref output), 1) = + Unsafe.Add(ref Unsafe.As, ulong>(ref lhs), 1) * + Unsafe.Add(ref Unsafe.As, ulong>(ref rhs), 1); return output; } /// - ///
Multiply packed 64-bit unsigned integer elements in a and b and truncate the results to 64-bit integer.
+ ///
+ /// Multiply packed 64-bit unsigned integer elements in a and b and truncate the results to 64-bit integer. + ///
///
Operation:
/// /// dest[0] = lhs[0] * rhs[0]; @@ -227,7 +216,9 @@ public static Vector128 Multiply(Vector128 lhs, Vector128 r ///
/// Left vector. /// Right vector. - /// A of whose elements is 64-bit truncated product of lhs and rhs. + /// + /// A of whose elements is 64-bit truncated product of lhs and rhs. + /// [Pure] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] @@ -253,14 +244,17 @@ public static Vector256 Multiply(Vector256 lhs, Vector256 r for (int i = 0; i < Vector256.Count; i++) { Unsafe.Add(ref Unsafe.As, ulong>(ref output), i) = - Unsafe.Add(ref Unsafe.As, ulong>(ref lhs), i) * Unsafe.Add(ref Unsafe.As, ulong>(ref rhs), i); + Unsafe.Add(ref Unsafe.As, ulong>(ref lhs), i) * + Unsafe.Add(ref Unsafe.As, ulong>(ref rhs), i); } return output; } /// - ///
Multiply packed 64-bit signed integer elements in a and b and truncate the results to 64-bit integer.
+ ///
+ /// Multiply packed 64-bit signed integer elements in a and b and truncate the results to 64-bit integer. + ///
///
Operation:
/// /// dest[0] = lhs[0] * rhs[0]; @@ -269,7 +263,9 @@ public static Vector256 Multiply(Vector256 lhs, Vector256 r ///
/// Left vector. /// Right vector. - /// A of whose elements is 64-bit truncated product of lhs and rhs. + /// + /// A of whose elements is 64-bit truncated product of lhs and rhs. + /// [Pure] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public static Vector128 Multiply(Vector128 lhs, Vector128 rhs) @@ -278,7 +274,9 @@ public static Vector128 Multiply(Vector128 lhs, Vector128 rhs) } /// - ///
Multiply packed 64-bit signed integer elements in a and b and truncate the results to 64-bit integer.
+ ///
+ /// Multiply packed 64-bit signed integer elements in a and b and truncate the results to 64-bit integer. + ///
///
Operation:
/// /// dest[0] = lhs[0] * rhs[0]; @@ -289,7 +287,9 @@ public static Vector128 Multiply(Vector128 lhs, Vector128 rhs) ///
/// Left vector. /// Right vector. - /// A of whose elements is 64-bit truncated product of lhs and rhs. + /// + /// A of whose elements is 64-bit truncated product of lhs and rhs. + /// [Pure] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public static Vector256 Multiply(Vector256 lhs, Vector256 rhs) @@ -298,7 +298,10 @@ public static Vector256 Multiply(Vector256 lhs, Vector256 rhs) } /// - ///
Horizontally apply OR operation on adjacent pairs of single-precision (32-bit) floating-point elements in lhs and rhs.
+ ///
+ /// Horizontally apply OR operation on adjacent pairs of single-precision (32-bit) floating-point elements in lhs and + /// rhs. + ///
///
Operation:
/// /// dest[0] = lhs[0] | lhs[1]; @@ -309,7 +312,10 @@ public static Vector256 Multiply(Vector256 lhs, Vector256 rhs) ///
/// Left vector. /// Right vector. - /// A of with all elements is result of OR operation on adjacent pairs of elements in lhs and rhs. + /// + /// A of with all elements is result of OR operation on adjacent pairs of + /// elements in lhs and rhs. + /// [Pure] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public static Vector128 HorizontalOr(Vector128 lhs, Vector128 rhs) @@ -321,34 +327,35 @@ public static Vector128 HorizontalOr(Vector128 lhs, Vector128 output = GetUninitializedVector128(); - Unsafe.As, uint>(ref output) = - Unsafe.As, uint>(ref lhs) | Unsafe.Add(ref Unsafe.As, uint>(ref lhs), 1); + Unsafe.As, uint>(ref output) = + Unsafe.As, uint>(ref lhs) | + Unsafe.Add(ref Unsafe.As, uint>(ref lhs), 1); Unsafe.Add(ref Unsafe.As, uint>(ref output), 1) = - Unsafe.Add(ref Unsafe.As, uint>(ref lhs), 2) | Unsafe.Add(ref Unsafe.As, uint>(ref lhs), 3); + Unsafe.Add(ref Unsafe.As, uint>(ref lhs), 2) | + Unsafe.Add(ref Unsafe.As, uint>(ref lhs), 3); Unsafe.Add(ref Unsafe.As, uint>(ref output), 2) = - Unsafe.As, uint>(ref rhs) | Unsafe.Add(ref Unsafe.As, uint>(ref rhs), 1); + Unsafe.As, uint>(ref rhs) | + Unsafe.Add(ref Unsafe.As, uint>(ref rhs), 1); Unsafe.Add(ref Unsafe.As, uint>(ref output), 3) = - Unsafe.Add(ref Unsafe.As, uint>(ref rhs), 2) | Unsafe.Add(ref Unsafe.As, uint>(ref rhs), 3); + Unsafe.Add(ref Unsafe.As, uint>(ref rhs), 2) | + Unsafe.Add(ref Unsafe.As, uint>(ref rhs), 3); return output; } /// - ///
Horizontally apply OR operation on adjacent pairs of 32-bit integer elements in lhs and rhs.
+ ///
+ /// Horizontally apply OR operation on adjacent pairs of 32-bit integer elements in lhs and rhs. + ///
///
Operation:
/// /// dest[0] = lhs[0] | lhs[1]; @@ -359,9 +366,10 @@ public static Vector128 HorizontalOr(Vector128 lhs, Vector128 /// Left vector. /// Right vector. - /// A of with all elements is result of OR operation on adjacent pairs of elements in lhs and rhs. - /// API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares. - /// Hardware doesn't support ARM64 NEON or SSE instruction set. + /// + /// A of with all elements is result of OR operation on adjacent pairs of + /// elements in lhs and rhs. + /// [Pure] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public static Vector128 HorizontalOr(Vector128 lhs, Vector128 rhs) @@ -370,7 +378,9 @@ public static Vector128 HorizontalOr(Vector128 lhs, Vector128 rhs } /// - ///
Horizontally apply OR operation on adjacent pairs of 32-bit unsigned integer elements in lhs and rhs.
+ ///
+ /// Horizontally apply OR operation on adjacent pairs of 32-bit unsigned integer elements in lhs and rhs. + ///
///
Operation:
/// /// dest[0] = lhs[0] | lhs[1]; @@ -381,9 +391,10 @@ public static Vector128 HorizontalOr(Vector128 lhs, Vector128 rhs ///
/// Left vector. /// Right vector. - /// A of with all elements is result of OR operation on adjacent pairs of elements in lhs and rhs. - /// API avaliable on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, ARM64 NEON (untested) hardwares. - /// Hardware doesn't support ARM64 NEON or SSE2 instruction set. + /// + /// A of with all elements is result of OR operation on adjacent pairs of + /// elements in lhs and rhs. + /// [Pure] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] [CLSCompliant(false)] @@ -402,9 +413,10 @@ public static Vector128 HorizontalOr(Vector128 lhs, Vector128 /// ///
/// Input vector. - /// A of with elements the same as input vector except their positions/indices are reversed. - /// API available on SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 hardwares. - /// Hardware doesn't support SSE2 instruction set. + /// + /// A of with elements the same as input vector except their positions + /// (or indices) are reversed. + /// [Pure] [CLSCompliant(false)] [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] diff --git a/X10D/src/Core/SpanExtensions.cs b/X10D/src/Core/SpanExtensions.cs index 68bd05af7..e4ecf0799 100644 --- a/X10D/src/Core/SpanExtensions.cs +++ b/X10D/src/Core/SpanExtensions.cs @@ -72,13 +72,14 @@ private static Vector256 IntegerPackingMagicV256 public static bool Contains(this ReadOnlySpan span, T value) where T : struct, Enum { #if NET6_0_OR_GREATER - // Use MemoryMarshal.CreateSpan instead of using creating new Span instance from pointer will trim down a lot of instructions - // on Release mode. - // https://sharplab.io/#v2:EYLgxg9gTgpgtADwGwBYA0AXEBDAzgWwB8ABABgAJiBGAOgCUBXAOwwEt8YaBJFmKCAA4BlPgDdWYGLgDcAWABQZSrUYt2nAMIR8A1gBs+IqOMkyFxAExVzFIQAtsUAQBlsweszYc588wGZyGCYGfHIAFSkMAFFg0JByVhZyAG8FcnTyAEE0cgAhHI0cgBE0BQBfBX9KC3INFLSMgG0AKVYMAHEgvgkACgwATwEYCAAzHojcaNiASmmAXQb0xoBZGAw7CAATLh09HtX1rZ2BPQB5ATYIJlwaTIBzO9hcXFZRGB49RMS78kJyA4221250u11uDyeLzeIPYrAAXthQfNFpQAtQkORmLhsCMYORgBAIHp/mtAVQADxhAB8PSEAmwTEpVPIuHpTByYXIomwegYMGm5AA7nY+HjOfEYiF6vIMrLyLARgkkkEQrhyABeeUwRUAVWuOM4mVwlJyiQwNIVJPw0H6y0cuAcehonQwdG1oqYkh6rIZsx8coyxAA7FabXaoA6eTQNLBETA6QyepaVfhcDkfUwaM4gnd1tNo1cMNhErgenrsbjbsawqaWBbtVyeXy/SiKjKMiiWm1OkxumA+oNhmMJlMQrMFu2lgCjrt9qSZycYVcbvdHlIoe8mJ8mN9fiTDkDFxdWMvwWvnq8YDD8PDESemMjJ6jlBisQb8YTidPNhYmbS2UyLJshyja8vyQoirA4TkBKsTSgG6TBuQvaCuQCaMmaNLlgaVYAAoQGafBJg2qzWlAtr2o6zprG6uKwJ6MDemyszpmyWY5nmBYsMW1xlvqlZGiaSrmsRircmBLZPm2ZRAA=== + // Use MemoryMarshal.CreateSpan instead of using creating new Span instance from pointer will trim down a lot of + // instructions on Release mode. - // Also use reference instead of MemoryMarshal.Cast to remove boundary check (or something, it just result in something like that). + // Also use reference instead of MemoryMarshal.Cast to remove boundary check (or something, it just result in something + // like that). - // TODO: Figure out some kind of way to directly pass the Span directly into Contains call, which make method smaller and more prone to inlining... + // TODO: Figure out some kind of way to directly pass the Span directly into Contains call, which make method smaller and + // more prone to inlining... unsafe { #pragma warning disable CS8500 // This takes the address of, gets the size of, or declares a pointer to a managed type @@ -176,6 +177,10 @@ public static unsafe byte PackByte(this ReadOnlySpan source) return unchecked((byte)(IntegerPackingMagic * correct.AsUInt64().GetElement(0) >> 56)); } + + // Probably should remove this piece of code because it is untested, but I see no reason why it should fail + // unless vld1_u8 reverse positions of 8 bytes for some reason. + if (AdvSimd.IsSupported) { // Hasn't been tested since March 6th 2023 (Reason: Unavailable hardware). @@ -240,12 +245,12 @@ public static unsafe short PackInt16(this ReadOnlySpan source) goto default; } - fixed (bool* pSource = source) - { - // TODO: .NET 8.0 Wasm support. - // TODO: Implement a replacement for UInt64 vector multiplication (there are no instruction for this built-in). + // TODO: AdvSimd implementation. + // TODO: WasmSimd implementation. - if (Sse2.IsSupported) + if (Sse2.IsSupported) + { + fixed (bool* pSource = source) { var load = Sse2.LoadVector128((byte*)pSource); var correct = IntrinsicUtility.CorrectBoolean(load).AsUInt64(); @@ -254,21 +259,9 @@ public static unsafe short PackInt16(this ReadOnlySpan source) return (short)(shift.GetElement(0) | (shift.GetElement(1) << 8)); } - if (AdvSimd.IsSupported) - { - // Hasn't been tested since March 6th 2023 (Reason: Unavailable hardware). - var load = AdvSimd.LoadVector128((byte*)pSource); - var correct = IntrinsicUtility.CorrectBoolean(load).AsUInt64(); - var multiply = IntrinsicUtility.Multiply(IntegerPackingMagicV128, correct); - var shift = AdvSimd.ShiftRightLogical(multiply, 56); - - return (short)(shift.GetElement(0) | (shift.GetElement(1) << 8)); - } - else - { - goto default; - } } + + goto default; #endif default: @@ -324,9 +317,6 @@ public static unsafe int PackInt32(this ReadOnlySpan source) fixed (bool* pSource = source) { - // TODO: .NET 8.0 Wasm support. - // TODO: Implement a replacement for UInt64 vector multiplication (there are no instruction for this built-in). - if (Avx2.IsSupported) { var load = Avx.LoadVector256((byte*)pSource);