From 502318845f3efcb566e716732eefc41979678a7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julius=20H=C3=A4ger?= Date: Fri, 5 Mar 2021 15:28:03 +0100 Subject: [PATCH 1/6] Implemented ~90% faster matrix invert functions --- src/OpenTK.Graphics/OpenTK.Graphics.csproj | 2 +- src/OpenTK.Mathematics/Matrix/Matrix2.cs | 9 +- src/OpenTK.Mathematics/Matrix/Matrix2d.cs | 9 +- src/OpenTK.Mathematics/Matrix/Matrix3.cs | 138 ++----- src/OpenTK.Mathematics/Matrix/Matrix3d.cs | 138 ++----- src/OpenTK.Mathematics/Matrix/Matrix4.cs | 384 +++++++++++++----- src/OpenTK.Mathematics/Matrix/Matrix4d.cs | 174 ++++---- .../OpenTK.Mathematics.csproj | 4 +- .../OpenTK.Windowing.Common.csproj | 2 +- 9 files changed, 437 insertions(+), 423 deletions(-) diff --git a/src/OpenTK.Graphics/OpenTK.Graphics.csproj b/src/OpenTK.Graphics/OpenTK.Graphics.csproj index bf41f765eb..b48b6ee442 100644 --- a/src/OpenTK.Graphics/OpenTK.Graphics.csproj +++ b/src/OpenTK.Graphics/OpenTK.Graphics.csproj @@ -1,7 +1,7 @@  - netstandard2.1 + netcoreapp3.1 true OpenTK.Graphics diff --git a/src/OpenTK.Mathematics/Matrix/Matrix2.cs b/src/OpenTK.Mathematics/Matrix/Matrix2.cs index a332e6e63f..ec86c1592c 100644 --- a/src/OpenTK.Mathematics/Matrix/Matrix2.cs +++ b/src/OpenTK.Mathematics/Matrix/Matrix2.cs @@ -551,7 +551,7 @@ public static Matrix2 Subtract(Matrix2 left, Matrix2 right) /// Thrown if the Matrix2 is singular. public static void Invert(in Matrix2 mat, out Matrix2 result) { - var det = mat.Determinant; + var det = (mat.Row0.X * mat.Row1.Y) - (mat.Row0.Y * mat.Row1.X); if (det == 0) { @@ -560,10 +560,15 @@ public static void Invert(in Matrix2 mat, out Matrix2 result) var invDet = 1f / det; + // Because the c# jit assumes noalias for byref types we need to + // save this value as the write to result.Row0.X could change the + // value of mat.Row0.X. + var row0x = mat.Row0.X; + result.Row0.X = mat.Row1.Y * invDet; result.Row0.Y = -mat.Row0.Y * invDet; result.Row1.X = -mat.Row1.X * invDet; - result.Row1.Y = mat.Row0.X * invDet; + result.Row1.Y = row0x * invDet; } /// diff --git a/src/OpenTK.Mathematics/Matrix/Matrix2d.cs b/src/OpenTK.Mathematics/Matrix/Matrix2d.cs index 0465d05abb..08d2f04664 100644 --- a/src/OpenTK.Mathematics/Matrix/Matrix2d.cs +++ b/src/OpenTK.Mathematics/Matrix/Matrix2d.cs @@ -551,7 +551,7 @@ public static Matrix2d Subtract(Matrix2d left, Matrix2d right) /// Thrown if the Matrix2d is singular. public static void Invert(in Matrix2d mat, out Matrix2d result) { - var det = mat.Determinant; + var det = (mat.Row0.X * mat.Row1.Y) - (mat.Row0.Y * mat.Row1.X); if (det == 0) { @@ -560,10 +560,15 @@ public static void Invert(in Matrix2d mat, out Matrix2d result) var invDet = 1f / det; + // Because the c# jit assumes noalias for byref types we need to + // save this value as the write to result.Row0.X could change the + // value of mat.Row0.X. + var row0x = mat.Row0.X; + result.Row0.X = mat.Row1.Y * invDet; result.Row0.Y = -mat.Row0.Y * invDet; result.Row1.X = -mat.Row1.X * invDet; - result.Row1.Y = mat.Row0.X * invDet; + result.Row1.Y = row0x * invDet; } /// diff --git a/src/OpenTK.Mathematics/Matrix/Matrix3.cs b/src/OpenTK.Mathematics/Matrix/Matrix3.cs index 7558359b5d..88c8b4cd85 100644 --- a/src/OpenTK.Mathematics/Matrix/Matrix3.cs +++ b/src/OpenTK.Mathematics/Matrix/Matrix3.cs @@ -804,116 +804,52 @@ public static void Mult(in Matrix3 left, in Matrix3 right, out Matrix3 result) /// /// Calculate the inverse of the given matrix. /// - /// The matrix to invert. + /// The matrix to invert. /// The inverse of the given matrix if it has one, or the input if it is singular. /// Thrown if the Matrix3 is singular. - public static void Invert(in Matrix3 mat, out Matrix3 result) + public static void Invert(in Matrix3 matrix, out Matrix3 result) { - int[] colIdx = { 0, 0, 0 }; - int[] rowIdx = { 0, 0, 0 }; - int[] pivotIdx = { -1, -1, -1 }; - - float[,] inverse = - { - { mat.Row0.X, mat.Row0.Y, mat.Row0.Z }, - { mat.Row1.X, mat.Row1.Y, mat.Row1.Z }, - { mat.Row2.X, mat.Row2.Y, mat.Row2.Z } - }; - - var icol = 0; - var irow = 0; - for (var i = 0; i < 3; i++) - { - var maxPivot = 0.0f; - for (var j = 0; j < 3; j++) - { - if (pivotIdx[j] != 0) - { - for (var k = 0; k < 3; ++k) - { - if (pivotIdx[k] == -1) - { - var absVal = Math.Abs(inverse[j, k]); - if (absVal > maxPivot) - { - maxPivot = absVal; - irow = j; - icol = k; - } - } - else if (pivotIdx[k] > 0) - { - result = mat; - return; - } - } - } - } - - ++pivotIdx[icol]; - - if (irow != icol) - { - for (var k = 0; k < 3; ++k) - { - var f = inverse[irow, k]; - inverse[irow, k] = inverse[icol, k]; - inverse[icol, k] = f; - } - } - - rowIdx[i] = irow; - colIdx[i] = icol; + // Original implementation can be found here: + // https://github.com/niswegmann/small-matrix-inverse/blob/6eac02b84ad06870692abaf828638a391548502c/invert3x3_c.h + Matrix3 mat = matrix; - var pivot = inverse[icol, icol]; + // Compute the elements needed to calculate the determinant + // so that we can throw without writing anything to the out parameter. + float invRow0X = (+mat.Row1.Y * mat.Row2.Z) - (mat.Row1.Z * mat.Row2.Y); + float invRow1X = (-mat.Row1.X * mat.Row2.Z) + (mat.Row1.Z * mat.Row2.X); + float invRow2X = (+mat.Row1.X * mat.Row2.Y) - (mat.Row1.Y * mat.Row2.X); - if (pivot == 0.0f) - { - throw new InvalidOperationException("Matrix is singular and cannot be inverted."); - } - - var oneOverPivot = 1.0f / pivot; - inverse[icol, icol] = 1.0f; - for (var k = 0; k < 3; ++k) - { - inverse[icol, k] *= oneOverPivot; - } + // Compute determinant: + float det = (mat.Row0.X * invRow0X) + (mat.Row0.Y * invRow1X) + (mat.Row0.Z * invRow2X); - for (var j = 0; j < 3; ++j) - { - if (icol != j) - { - var f = inverse[j, icol]; - inverse[j, icol] = 0.0f; - for (var k = 0; k < 3; ++k) - { - inverse[j, k] -= inverse[icol, k] * f; - } - } - } - } - - for (var j = 2; j >= 0; --j) + if (det == 0f) { - var ir = rowIdx[j]; - var ic = colIdx[j]; - for (var k = 0; k < 3; ++k) - { - var f = inverse[k, ir]; - inverse[k, ir] = inverse[k, ic]; - inverse[k, ic] = f; - } + throw new InvalidOperationException("Matrix is singular and cannot be inverted."); } - result.Row0.X = inverse[0, 0]; - result.Row0.Y = inverse[0, 1]; - result.Row0.Z = inverse[0, 2]; - result.Row1.X = inverse[1, 0]; - result.Row1.Y = inverse[1, 1]; - result.Row1.Z = inverse[1, 2]; - result.Row2.X = inverse[2, 0]; - result.Row2.Y = inverse[2, 1]; - result.Row2.Z = inverse[2, 2]; + // Compute adjoint: + result.Row0.X = invRow0X; + result.Row0.Y = (-mat.Row0.Y * mat.Row2.Z) + (mat.Row0.Z * mat.Row2.Y); + result.Row0.Z = (+mat.Row0.Y * mat.Row1.Z) - (mat.Row0.Z * mat.Row1.Y); + result.Row1.X = invRow1X; + result.Row1.Y = (+mat.Row0.X * mat.Row2.Z) - (mat.Row0.Z * mat.Row2.X); + result.Row1.Z = (-mat.Row0.X * mat.Row1.Z) + (mat.Row0.Z * mat.Row1.X); + result.Row2.X = invRow2X; + result.Row2.Y = (-mat.Row0.X * mat.Row2.Y) + (mat.Row0.Y * mat.Row2.X); + result.Row2.Z = (+mat.Row0.X * mat.Row1.Y) - (mat.Row0.Y * mat.Row1.X); + + // Multiply adjoint with reciprocal of determinant: + det = 1.0f / det; + + result.Row0.X *= det; + result.Row0.Y *= det; + result.Row0.Z *= det; + result.Row1.X *= det; + result.Row1.Y *= det; + result.Row1.Z *= det; + result.Row2.X *= det; + result.Row2.Y *= det; + result.Row2.Z *= det; } /// diff --git a/src/OpenTK.Mathematics/Matrix/Matrix3d.cs b/src/OpenTK.Mathematics/Matrix/Matrix3d.cs index 491b5393fa..f49b6effe4 100644 --- a/src/OpenTK.Mathematics/Matrix/Matrix3d.cs +++ b/src/OpenTK.Mathematics/Matrix/Matrix3d.cs @@ -800,116 +800,52 @@ public static void Mult(in Matrix3d left, in Matrix3d right, out Matrix3d result /// /// Calculate the inverse of the given matrix. /// - /// The matrix to invert. + /// The matrix to invert. /// The inverse of the given matrix if it has one, or the input if it is singular. /// Thrown if the Matrix3d is singular. - public static void Invert(in Matrix3d mat, out Matrix3d result) + public static void Invert(in Matrix3d matrix, out Matrix3d result) { - int[] colIdx = { 0, 0, 0 }; - int[] rowIdx = { 0, 0, 0 }; - int[] pivotIdx = { -1, -1, -1 }; - - double[,] inverse = - { - { mat.Row0.X, mat.Row0.Y, mat.Row0.Z }, - { mat.Row1.X, mat.Row1.Y, mat.Row1.Z }, - { mat.Row2.X, mat.Row2.Y, mat.Row2.Z } - }; - - var icol = 0; - var irow = 0; - for (var i = 0; i < 3; i++) - { - var maxPivot = 0.0; - for (var j = 0; j < 3; j++) - { - if (pivotIdx[j] != 0) - { - for (var k = 0; k < 3; ++k) - { - if (pivotIdx[k] == -1) - { - var absVal = Math.Abs(inverse[j, k]); - if (absVal > maxPivot) - { - maxPivot = absVal; - irow = j; - icol = k; - } - } - else if (pivotIdx[k] > 0) - { - result = mat; - return; - } - } - } - } - - ++pivotIdx[icol]; - - if (irow != icol) - { - for (var k = 0; k < 3; ++k) - { - var f = inverse[irow, k]; - inverse[irow, k] = inverse[icol, k]; - inverse[icol, k] = f; - } - } - - rowIdx[i] = irow; - colIdx[i] = icol; + // Original implementation can be found here: + // https://github.com/niswegmann/small-matrix-inverse/blob/6eac02b84ad06870692abaf828638a391548502c/invert3x3_c.h + Matrix3d mat = matrix; - var pivot = inverse[icol, icol]; + // Compute the elements needed to calculate the determinant + // so that we can throw without writing anything to the out parameter. + double invRow0X = (+mat.Row1.Y * mat.Row2.Z) - (mat.Row1.Z * mat.Row2.Y); + double invRow1X = (-mat.Row1.X * mat.Row2.Z) + (mat.Row1.Z * mat.Row2.X); + double invRow2X = (+mat.Row1.X * mat.Row2.Y) - (mat.Row1.Y * mat.Row2.X); - if (pivot == 0.0) - { - throw new InvalidOperationException("Matrix is singular and cannot be inverted."); - } - - var oneOverPivot = 1.0 / pivot; - inverse[icol, icol] = 1.0; - for (var k = 0; k < 3; ++k) - { - inverse[icol, k] *= oneOverPivot; - } + // Compute determinant: + double det = (mat.Row0.X * invRow0X) + (mat.Row0.Y * invRow1X) + (mat.Row0.Z * invRow2X); - for (var j = 0; j < 3; ++j) - { - if (icol != j) - { - var f = inverse[j, icol]; - inverse[j, icol] = 0.0; - for (var k = 0; k < 3; ++k) - { - inverse[j, k] -= inverse[icol, k] * f; - } - } - } - } - - for (var j = 2; j >= 0; --j) + if (det == 0f) { - var ir = rowIdx[j]; - var ic = colIdx[j]; - for (var k = 0; k < 3; ++k) - { - var f = inverse[k, ir]; - inverse[k, ir] = inverse[k, ic]; - inverse[k, ic] = f; - } + throw new InvalidOperationException("Matrix is singular and cannot be inverted."); } - result.Row0.X = inverse[0, 0]; - result.Row0.Y = inverse[0, 1]; - result.Row0.Z = inverse[0, 2]; - result.Row1.X = inverse[1, 0]; - result.Row1.Y = inverse[1, 1]; - result.Row1.Z = inverse[1, 2]; - result.Row2.X = inverse[2, 0]; - result.Row2.Y = inverse[2, 1]; - result.Row2.Z = inverse[2, 2]; + // Compute adjoint: + result.Row0.X = invRow0X; + result.Row0.Y = (-mat.Row0.Y * mat.Row2.Z) + (mat.Row0.Z * mat.Row2.Y); + result.Row0.Z = (+mat.Row0.Y * mat.Row1.Z) - (mat.Row0.Z * mat.Row1.Y); + result.Row1.X = invRow1X; + result.Row1.Y = (+mat.Row0.X * mat.Row2.Z) - (mat.Row0.Z * mat.Row2.X); + result.Row1.Z = (-mat.Row0.X * mat.Row1.Z) + (mat.Row0.Z * mat.Row1.X); + result.Row2.X = invRow2X; + result.Row2.Y = (-mat.Row0.X * mat.Row2.Y) + (mat.Row0.Y * mat.Row2.X); + result.Row2.Z = (+mat.Row0.X * mat.Row1.Y) - (mat.Row0.Y * mat.Row1.X); + + // Multiply adjoint with reciprocal of determinant: + det = 1.0f / det; + + result.Row0.X *= det; + result.Row0.Y *= det; + result.Row0.Z *= det; + result.Row1.X *= det; + result.Row1.Y *= det; + result.Row1.Z *= det; + result.Row2.X *= det; + result.Row2.Y *= det; + result.Row2.Z *= det; } /// diff --git a/src/OpenTK.Mathematics/Matrix/Matrix4.cs b/src/OpenTK.Mathematics/Matrix/Matrix4.cs index 17850f5fce..0ae3a53938 100644 --- a/src/OpenTK.Mathematics/Matrix/Matrix4.cs +++ b/src/OpenTK.Mathematics/Matrix/Matrix4.cs @@ -23,7 +23,10 @@ using System; using System.Diagnostics.CodeAnalysis; using System.Diagnostics.Contracts; +using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace OpenTK.Mathematics { @@ -1500,125 +1503,284 @@ public static void Mult(in Matrix4 left, float right, out Matrix4 result) /// Thrown if the Matrix4 is singular. public static void Invert(in Matrix4 mat, out Matrix4 result) { - int[] colIdx = { 0, 0, 0, 0 }; - int[] rowIdx = { 0, 0, 0, 0 }; - int[] pivotIdx = { -1, -1, -1, -1 }; - - // convert the matrix to an array for easy looping - float[,] inverse = + if (Sse3.IsSupported) { - { mat.Row0.X, mat.Row0.Y, mat.Row0.Z, mat.Row0.W }, - { mat.Row1.X, mat.Row1.Y, mat.Row1.Z, mat.Row1.W }, - { mat.Row2.X, mat.Row2.Y, mat.Row2.Z, mat.Row2.W }, - { mat.Row3.X, mat.Row3.Y, mat.Row3.Z, mat.Row3.W } - }; - var icol = 0; - var irow = 0; - for (var i = 0; i < 4; i++) + InvertSse3(in mat, out result); + } + else { - // Find the largest pivot value - var maxPivot = 0.0f; - for (var j = 0; j < 4; j++) - { - if (pivotIdx[j] != 0) - { - for (var k = 0; k < 4; ++k) - { - if (pivotIdx[k] == -1) - { - var absVal = Math.Abs(inverse[j, k]); - if (absVal > maxPivot) - { - maxPivot = absVal; - irow = j; - icol = k; - } - } - else if (pivotIdx[k] > 0) - { - result = mat; - return; - } - } - } - } - - ++pivotIdx[icol]; - - // Swap rows over so pivot is on diagonal - if (irow != icol) - { - for (var k = 0; k < 4; ++k) - { - var f = inverse[irow, k]; - inverse[irow, k] = inverse[icol, k]; - inverse[icol, k] = f; - } - } - - rowIdx[i] = irow; - colIdx[i] = icol; - - var pivot = inverse[icol, icol]; - - // check for singular matrix - if (pivot == 0.0f) - { - throw new InvalidOperationException("Matrix is singular and cannot be inverted."); - } - - // Scale row so it has a unit diagonal - var oneOverPivot = 1.0f / pivot; - inverse[icol, icol] = 1.0f; - for (var k = 0; k < 4; ++k) - { - inverse[icol, k] *= oneOverPivot; - } + InvertFallback(in mat, out result); + } + } - // Do elimination of non-diagonal elements - for (var j = 0; j < 4; ++j) - { - // check this isn't on the diagonal - if (icol != j) - { - var f = inverse[j, icol]; - inverse[j, icol] = 0.0f; - for (var k = 0; k < 4; ++k) - { - inverse[j, k] -= inverse[icol, k] * f; - } - } - } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe void InvertSse3(in Matrix4 mat, out Matrix4 result) + { +#pragma warning disable SA1114 // Parameter list should follow declaration +#pragma warning disable SA1312 // Variable names should begin with lower-case letter +#pragma warning disable SA1512 // Single-line comments should not be followed by blank line +#pragma warning disable SA1515 // Single-line comment should be preceded by blank line + + // Original derivation and implementation can be found here: + // https://lxjk.github.io/2017/09/03/Fast-4x4-Matrix-Inverse-with-SSE-SIMD-Explained.html + + float* m = (float*)Unsafe.AsPointer(ref Unsafe.AsRef(in mat)); + Vector128 row0 = Sse.LoadVector128(m); + Vector128 row1 = Sse.LoadVector128(m + 4); + Vector128 row2 = Sse.LoadVector128(m + 8); + Vector128 row3 = Sse.LoadVector128(m + 12); + + // __m128 A = VecShuffle_0101(inM.mVec[0], inM.mVec[1]); + var A = Sse.MoveLowToHigh(row0, row1); + // __m128 B = VecShuffle_2323(inM.mVec[0], inM.mVec[1]); + var B = Sse.MoveHighToLow(row1, row0); + // __m128 C = VecShuffle_0101(inM.mVec[2], inM.mVec[3]); + var C = Sse.MoveLowToHigh(row2, row3); + // __m128 D = VecShuffle_2323(inM.mVec[2], inM.mVec[3]); + var D = Sse.MoveHighToLow(row3, row2); + + const byte Shuffle_0202 = 0b1000_1000; + const byte Shuffle_1313 = 0b1101_1101; + // __m128 detSub = _mm_sub_ps( + var detSub = Sse.Subtract( + // _mm_mul_ps(VecShuffle(inM.mVec[0], inM.mVec[2], 0, 2, 0, 2), VecShuffle(inM.mVec[1], inM.mVec[3], 1, 3, 1, 3)), + Sse.Multiply( + Sse.Shuffle(row0, row2, Shuffle_0202), + Sse.Shuffle(row1, row3, Shuffle_1313)), + // _mm_mul_ps(VecShuffle(inM.mVec[0], inM.mVec[2], 1, 3, 1, 3), VecShuffle(inM.mVec[1], inM.mVec[3], 0, 2, 0, 2))); + Sse.Multiply( + Sse.Shuffle(row0, row2, Shuffle_1313), + Sse.Shuffle(row1, row3, Shuffle_0202))); + + const byte Shuffle_0000 = 0b0000_0000; + const byte Shuffle_1111 = 0b0101_0101; + const byte Shuffle_2222 = 0b1010_1010; + const byte Shuffle_3333 = 0b1111_1111; + + // VecSwizzle1(detSub, 0); + // #define VecSwizzle1(vec, x) VecSwizzleMask(vec, MakeShuffleMask(x,x,x,x)) + // #define VecSwizzleMask(vec, mask) _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vec), mask)) + + // __m128 detA = VecSwizzle1(detSub, 0); + var detA = Sse2.Shuffle(detSub.AsInt32(), Shuffle_0000).AsSingle(); + // __m128 detB = VecSwizzle1(detSub, 1); + var detB = Sse2.Shuffle(detSub.AsInt32(), Shuffle_1111).AsSingle(); + // __m128 detC = VecSwizzle1(detSub, 2); + var detC = Sse2.Shuffle(detSub.AsInt32(), Shuffle_2222).AsSingle(); + // __m128 detD = VecSwizzle1(detSub, 3); + var detD = Sse2.Shuffle(detSub.AsInt32(), Shuffle_3333).AsSingle(); + + const byte Shuffle_3300 = 0b0000_1111; + const byte Shuffle_1122 = 0b1010_0101; + const byte Shuffle_2301 = 0b0100_1110; + + // __m128 D_C = Mat2AdjMul(D, C); + // Mat2AdjMul(vec1, vec2): + // _mm_sub_ps( + var D_C = Sse.Subtract( + // _mm_mul_ps(VecSwizzle(vec1, 3,3,0,0), vec2), + Sse.Multiply(Sse2.Shuffle(D.AsInt32(), Shuffle_3300).AsSingle(), C), + // _mm_mul_ps( + Sse.Multiply( + // VecSwizzle(vec1, 1, 1, 2, 2), + Sse2.Shuffle(D.AsInt32(), Shuffle_1122).AsSingle(), + // VecSwizzle(vec2, 2, 3, 0, 1))); + Sse2.Shuffle(C.AsInt32(), Shuffle_2301).AsSingle())); + + // __m128 A_B = Mat2AdjMul(A, B); + var A_B = Sse.Subtract( + Sse.Multiply(Sse2.Shuffle(A.AsInt32(), Shuffle_3300).AsSingle(), B), + Sse.Multiply( + Sse2.Shuffle(A.AsInt32(), Shuffle_1122).AsSingle(), + Sse2.Shuffle(B.AsInt32(), Shuffle_2301).AsSingle())); + + const byte Shuffle_0303 = 0b1100_1100; + const byte Shuffle_1032 = 0b1011_0001; + const byte Shuffle_2121 = 0b0110_0110; + + // Mat2Mul(vec1, vec2): + //_mm_add_ps(_mm_mul_ps(vec1, VecSwizzle(vec2, 0, 3, 0, 3)), + // _mm_mul_ps(VecSwizzle(vec1, 1, 0, 3, 2), VecSwizzle(vec2, 2, 1, 2, 1))); + + // __m128 X_ = _mm_sub_ps( + var X_ = Sse.Subtract( + // _mm_mul_ps(detD, A), + Sse.Multiply(detD, A), + // Mat2Mul(B, D_C)); + // _mm_add_ps( + Sse.Add( + // _mm_mul_ps(vec1, VecSwizzle(vec2, 0, 3, 0, 3)), + Sse.Multiply(B, Sse2.Shuffle(D_C.AsInt32(), Shuffle_0303).AsSingle()), + // _mm_mul_ps( + Sse.Multiply( + // VecSwizzle(vec1, 1, 0, 3, 2), + Sse2.Shuffle(B.AsInt32(), Shuffle_1032).AsSingle(), + // VecSwizzle(vec2, 2, 1, 2, 1))); + Sse2.Shuffle(D_C.AsInt32(), Shuffle_2121).AsSingle()))); + + // __m128 W_ = _mm_sub_ps(_mm_mul_ps(detA, D), Mat2Mul(C, A_B)); + var W_ = Sse.Subtract( + Sse.Multiply(detA, D), + Sse.Add( + Sse.Multiply(C, Sse2.Shuffle(A_B.AsInt32(), Shuffle_0303).AsSingle()), + Sse.Multiply( + Sse2.Shuffle(C.AsInt32(), Shuffle_1032).AsSingle(), + Sse2.Shuffle(A_B.AsInt32(), Shuffle_2121).AsSingle()))); + + // __m128 detM = _mm_mul_ps(detA, detD); + var detM = Sse.Multiply(detA, detD); + + const byte Shuffle_3030 = 0b0011_0011; + + // Mat2MulAdj(vec1, vec2): + //_mm_sub_ps(_mm_mul_ps(vec1, VecSwizzle(vec2, 3, 0, 3, 0)), + // _mm_mul_ps(VecSwizzle(vec1, 1, 0, 3, 2), VecSwizzle(vec2, 2, 1, 2, 1))); + + // __m128 Y_ = _mm_sub_ps( + var Y_ = Sse.Subtract( + // _mm_mul_ps(detB, C), + Sse.Multiply(detB, C), + // Mat2MulAdj(D, A_B)); + // _mm_sub_ps( + Sse.Subtract( + // _mm_mul_ps(vec1, VecSwizzle(vec2, 3, 0, 3, 0)), + Sse.Multiply(D, Sse2.Shuffle(A_B.AsInt32(), Shuffle_3030).AsSingle()), + // _mm_mul_ps( + Sse.Multiply( + // VecSwizzle(vec1, 1, 0, 3, 2), + Sse2.Shuffle(D.AsInt32(), Shuffle_1032).AsSingle(), + // VecSwizzle(vec2, 2, 1, 2, 1))); + Sse2.Shuffle(A_B.AsInt32(), Shuffle_2121).AsSingle()))); + + // __m128 Z_ = _mm_sub_ps(_mm_mul_ps(detC, B), Mat2MulAdj(A, D_C)); + var Z_ = Sse.Subtract( + Sse.Multiply(detC, B), + Sse.Subtract( + Sse.Multiply(A, Sse2.Shuffle(D_C.AsInt32(), Shuffle_3030).AsSingle()), + Sse.Multiply( + Sse2.Shuffle(A.AsInt32(), Shuffle_1032).AsSingle(), + Sse2.Shuffle(D_C.AsInt32(), Shuffle_2121).AsSingle()))); + + // detM = _mm_add_ps(detM, _mm_mul_ps(detB, detC)); + detM = Sse.Add(detM, Sse.Multiply(detB, detC)); + + const byte Shuffle_0213 = 0b1101_1000; + + // __m128 tr = _mm_mul_ps(A_B, VecSwizzle(D_C, 0,2,1,3)); + var tr = Sse.Multiply(A_B, Sse2.Shuffle(D_C.AsInt32(), Shuffle_0213).AsSingle()); + // tr = _mm_hadd_ps(tr, tr); + tr = Sse3.HorizontalAdd(tr, tr); + // tr = _mm_hadd_ps(tr, tr); + tr = Sse3.HorizontalAdd(tr, tr); + + // detM = _mm_sub_ps(detM, tr); + detM = Sse.Subtract(detM, tr); + + if (detM.GetElement(0) < 0.000001f) + { + throw new InvalidOperationException("Matrix is singular and cannot be inverted."); } - for (var j = 3; j >= 0; --j) + // const __m128 adjSignMask = _mm_setr_ps(1.f, -1.f, -1.f, 1.f); + var adjSignMask = Vector128.Create(1.0f, -1.0f, -1.0f, 1.0f); + + // __m128 rDetM = _mm_div_ps(adjSignMask, detM); + var rDetM = Sse.Divide(adjSignMask, detM); + + // X_ = _mm_mul_ps(X_, rDetM); + X_ = Sse.Multiply(X_, rDetM); + // Y_ = _mm_mul_ps(Y_, rDetM); + Y_ = Sse.Multiply(Y_, rDetM); + // Z_ = _mm_mul_ps(Z_, rDetM); + Z_ = Sse.Multiply(Z_, rDetM); + // W_ = _mm_mul_ps(W_, rDetM); + W_ = Sse.Multiply(W_, rDetM); + + const byte Shuffle_3131 = 0b0111_0111; + const byte Shuffle_2020 = 0b0010_0010; + + Unsafe.SkipInit(out result); + + // r.mVec[0] = VecShuffle(X_, Y_, 3, 1, 3, 1); + Sse.Store((float*)Unsafe.AsPointer(ref result) + 0, Sse.Shuffle(X_, Y_, Shuffle_3131)); + // r.mVec[1] = VecShuffle(X_, Y_, 2, 0, 2, 0); + Sse.Store((float*)Unsafe.AsPointer(ref result) + 4, Sse.Shuffle(X_, Y_, Shuffle_2020)); + // r.mVec[2] = VecShuffle(Z_, W_, 3, 1, 3, 1); + Sse.Store((float*)Unsafe.AsPointer(ref result) + 8, Sse.Shuffle(Z_, W_, Shuffle_3131)); + // r.mVec[3] = VecShuffle(Z_, W_, 2, 0, 2, 0); + Sse.Store((float*)Unsafe.AsPointer(ref result) + 12, Sse.Shuffle(Z_, W_, Shuffle_2020)); +#pragma warning restore SA1114 // Parameter list should follow declaration +#pragma warning restore SA1312 // Variable names should begin with lower-case letter +#pragma warning restore SA1512 // Single-line comments should not be followed by blank lines +#pragma warning restore SA1515 // Single-line comment should be preceded by blank line + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe void InvertFallback(in Matrix4 mat, out Matrix4 result) + { + // Original implementation can be found here: + // https://github.com/dotnet/runtime/blob/79ae74f5ca5c8a6fe3a48935e85bd7374959c570/src/libraries/System.Private.CoreLib/src/System/Numerics/Matrix4x4.cs#L1556 +#pragma warning disable SA1407 // Arithmetic expressions should declare precedence + float a = mat.M11, b = mat.M21, c = mat.M31, d = mat.M41; + float e = mat.M12, f = mat.M22, g = mat.M32, h = mat.M42; + float i = mat.M13, j = mat.M23, k = mat.M33, l = mat.M43; + float m = mat.M14, n = mat.M24, o = mat.M34, p = mat.M44; + + float kp_lo = k * p - l * o; + float jp_ln = j * p - l * n; + float jo_kn = j * o - k * n; + float ip_lm = i * p - l * m; + float io_km = i * o - k * m; + float in_jm = i * n - j * m; + + float a11 = +(f * kp_lo - g * jp_ln + h * jo_kn); + float a12 = -(e * kp_lo - g * ip_lm + h * io_km); + float a13 = +(e * jp_ln - f * ip_lm + h * in_jm); + float a14 = -(e * jo_kn - f * io_km + g * in_jm); + + float det = a * a11 + b * a12 + c * a13 + d * a14; + + if (Math.Abs(det) < float.Epsilon) { - var ir = rowIdx[j]; - var ic = colIdx[j]; - for (var k = 0; k < 4; ++k) - { - var f = inverse[k, ir]; - inverse[k, ir] = inverse[k, ic]; - inverse[k, ic] = f; - } + throw new InvalidOperationException("Matrix is singular and cannot be inverted."); } - result.Row0.X = inverse[0, 0]; - result.Row0.Y = inverse[0, 1]; - result.Row0.Z = inverse[0, 2]; - result.Row0.W = inverse[0, 3]; - result.Row1.X = inverse[1, 0]; - result.Row1.Y = inverse[1, 1]; - result.Row1.Z = inverse[1, 2]; - result.Row1.W = inverse[1, 3]; - result.Row2.X = inverse[2, 0]; - result.Row2.Y = inverse[2, 1]; - result.Row2.Z = inverse[2, 2]; - result.Row2.W = inverse[2, 3]; - result.Row3.X = inverse[3, 0]; - result.Row3.Y = inverse[3, 1]; - result.Row3.Z = inverse[3, 2]; - result.Row3.W = inverse[3, 3]; + float invDet = 1.0f / det; + + result.Row0 = new Vector4(a11, a12, a13, a14) * invDet; + + result.Row1 = new Vector4( + -(b * kp_lo - c * jp_ln + d * jo_kn), + +(a * kp_lo - c * ip_lm + d * io_km), + -(a * jp_ln - b * ip_lm + d * in_jm), + +(a * jo_kn - b * io_km + c * in_jm)) * invDet; + + float gp_ho = g * p - h * o; + float fp_hn = f * p - h * n; + float fo_gn = f * o - g * n; + float ep_hm = e * p - h * m; + float eo_gm = e * o - g * m; + float en_fm = e * n - f * m; + + result.Row2 = new Vector4( + +(b * gp_ho - c * fp_hn + d * fo_gn), + -(a * gp_ho - c * ep_hm + d * eo_gm), + +(a * fp_hn - b * ep_hm + d * en_fm), + -(a * fo_gn - b * eo_gm + c * en_fm)) * invDet; + + float gl_hk = g * l - h * k; + float fl_hj = f * l - h * j; + float fk_gj = f * k - g * j; + float el_hi = e * l - h * i; + float ek_gi = e * k - g * i; + float ej_fi = e * j - f * i; + + result.Row3 = new Vector4( + -(b * gl_hk - c * fl_hj + d * fk_gj), + +(a * gl_hk - c * el_hi + d * ek_gi), + -(a * fl_hj - b * el_hi + d * ej_fi), + +(a * fk_gj - b * ek_gi + c * ej_fi)) * invDet; +#pragma warning restore SA1407 // Arithmetic expressions should declare precedence } /// diff --git a/src/OpenTK.Mathematics/Matrix/Matrix4d.cs b/src/OpenTK.Mathematics/Matrix/Matrix4d.cs index 2f3a3aa439..b759bdacdf 100644 --- a/src/OpenTK.Mathematics/Matrix/Matrix4d.cs +++ b/src/OpenTK.Mathematics/Matrix/Matrix4d.cs @@ -1576,118 +1576,88 @@ public static void Mult(in Matrix4d left, double right, out Matrix4d result) /// Calculate the inverse of the given matrix. /// /// The matrix to invert. - /// The inverse of the given matrix. + /// The inverted matrix. /// Thrown if the Matrix4d is singular. [Pure] - public static Matrix4d Invert(Matrix4d mat) - { - int[] colIdx = { 0, 0, 0, 0 }; - int[] rowIdx = { 0, 0, 0, 0 }; - int[] pivotIdx = { -1, -1, -1, -1 }; - - // convert the matrix to an array for easy looping - double[,] inverse = - { - { mat.Row0.X, mat.Row0.Y, mat.Row0.Z, mat.Row0.W }, - { mat.Row1.X, mat.Row1.Y, mat.Row1.Z, mat.Row1.W }, - { mat.Row2.X, mat.Row2.Y, mat.Row2.Z, mat.Row2.W }, - { mat.Row3.X, mat.Row3.Y, mat.Row3.Z, mat.Row3.W } - }; - var icol = 0; - var irow = 0; - for (var i = 0; i < 4; i++) + public static void Invert(in Matrix4d mat, out Matrix4d result) + { + // Original implementation can be found here: + // https://github.com/dotnet/runtime/blob/79ae74f5ca5c8a6fe3a48935e85bd7374959c570/src/libraries/System.Private.CoreLib/src/System/Numerics/Matrix4x4.cs#L1556 +#pragma warning disable SA1407 // Arithmetic expressions should declare precedence + double a = mat.M11, b = mat.M21, c = mat.M31, d = mat.M41; + double e = mat.M12, f = mat.M22, g = mat.M32, h = mat.M42; + double i = mat.M13, j = mat.M23, k = mat.M33, l = mat.M43; + double m = mat.M14, n = mat.M24, o = mat.M34, p = mat.M44; + + double kp_lo = k * p - l * o; + double jp_ln = j * p - l * n; + double jo_kn = j * o - k * n; + double ip_lm = i * p - l * m; + double io_km = i * o - k * m; + double in_jm = i * n - j * m; + + double a11 = +(f * kp_lo - g * jp_ln + h * jo_kn); + double a12 = -(e * kp_lo - g * ip_lm + h * io_km); + double a13 = +(e * jp_ln - f * ip_lm + h * in_jm); + double a14 = -(e * jo_kn - f * io_km + g * in_jm); + + double det = a * a11 + b * a12 + c * a13 + d * a14; + + if (Math.Abs(det) < double.Epsilon) { - // Find the largest pivot value - var maxPivot = 0.0; - for (var j = 0; j < 4; j++) - { - if (pivotIdx[j] != 0) - { - for (var k = 0; k < 4; ++k) - { - if (pivotIdx[k] == -1) - { - var absVal = Math.Abs(inverse[j, k]); - if (absVal > maxPivot) - { - maxPivot = absVal; - irow = j; - icol = k; - } - } - else if (pivotIdx[k] > 0) - { - return mat; - } - } - } - } + throw new InvalidOperationException("Matrix is singular and cannot be inverted."); + } - ++pivotIdx[icol]; + double invDet = 1.0f / det; - // Swap rows over so pivot is on diagonal - if (irow != icol) - { - for (var k = 0; k < 4; ++k) - { - var f = inverse[irow, k]; - inverse[irow, k] = inverse[icol, k]; - inverse[icol, k] = f; - } - } + result.Row0 = new Vector4d(a11, a12, a13, a14) * invDet; - rowIdx[i] = irow; - colIdx[i] = icol; + result.Row1 = new Vector4d( + -(b * kp_lo - c * jp_ln + d * jo_kn), + +(a * kp_lo - c * ip_lm + d * io_km), + -(a * jp_ln - b * ip_lm + d * in_jm), + +(a * jo_kn - b * io_km + c * in_jm)) * invDet; - var pivot = inverse[icol, icol]; + double gp_ho = g * p - h * o; + double fp_hn = f * p - h * n; + double fo_gn = f * o - g * n; + double ep_hm = e * p - h * m; + double eo_gm = e * o - g * m; + double en_fm = e * n - f * m; - // check for singular matrix - if (pivot == 0.0) - { - throw new InvalidOperationException("Matrix is singular and cannot be inverted."); - } + result.Row2 = new Vector4d( + +(b * gp_ho - c * fp_hn + d * fo_gn), + -(a * gp_ho - c * ep_hm + d * eo_gm), + +(a * fp_hn - b * ep_hm + d * en_fm), + -(a * fo_gn - b * eo_gm + c * en_fm)) * invDet; - // Scale row so it has a unit diagonal - var oneOverPivot = 1.0 / pivot; - inverse[icol, icol] = 1.0; - for (var k = 0; k < 4; ++k) - { - inverse[icol, k] *= oneOverPivot; - } + double gl_hk = g * l - h * k; + double fl_hj = f * l - h * j; + double fk_gj = f * k - g * j; + double el_hi = e * l - h * i; + double ek_gi = e * k - g * i; + double ej_fi = e * j - f * i; - // Do elimination of non-diagonal elements - for (var j = 0; j < 4; ++j) - { - // check this isn't on the diagonal - if (icol != j) - { - var f = inverse[j, icol]; - inverse[j, icol] = 0.0; - for (var k = 0; k < 4; ++k) - { - inverse[j, k] -= inverse[icol, k] * f; - } - } - } - } - - for (var j = 3; j >= 0; --j) - { - var ir = rowIdx[j]; - var ic = colIdx[j]; - for (var k = 0; k < 4; ++k) - { - var f = inverse[k, ir]; - inverse[k, ir] = inverse[k, ic]; - inverse[k, ic] = f; - } - } + result.Row3 = new Vector4d( + -(b * gl_hk - c * fl_hj + d * fk_gj), + +(a * gl_hk - c * el_hi + d * ek_gi), + -(a * fl_hj - b * el_hi + d * ej_fi), + +(a * fk_gj - b * ek_gi + c * ej_fi)) * invDet; +#pragma warning restore SA1407 // Arithmetic expressions should declare precedence + } - mat.Row0 = new Vector4d(inverse[0, 0], inverse[0, 1], inverse[0, 2], inverse[0, 3]); - mat.Row1 = new Vector4d(inverse[1, 0], inverse[1, 1], inverse[1, 2], inverse[1, 3]); - mat.Row2 = new Vector4d(inverse[2, 0], inverse[2, 1], inverse[2, 2], inverse[2, 3]); - mat.Row3 = new Vector4d(inverse[3, 0], inverse[3, 1], inverse[3, 2], inverse[3, 3]); - return mat; + /// + /// Calculate the inverse of the given matrix. + /// + /// The matrix to invert. + /// The inverse of the given matrix. + /// Thrown if the Matrix4d is singular. + [Pure] + public static Matrix4d Invert(in Matrix4d mat) + { + Matrix4d result; + Invert(mat, out result); + return result; } /// @@ -1733,7 +1703,7 @@ public static void Transpose(in Matrix4d mat, out Matrix4d result) /// right-hand operand. /// A new Matrix4d which holds the result of the multiplication. [Pure] - public static Matrix4d operator *(Matrix4d left, float right) + public static Matrix4d operator *(Matrix4d left, double right) { return Mult(left, right); } diff --git a/src/OpenTK.Mathematics/OpenTK.Mathematics.csproj b/src/OpenTK.Mathematics/OpenTK.Mathematics.csproj index 263d95745d..0b993c176f 100644 --- a/src/OpenTK.Mathematics/OpenTK.Mathematics.csproj +++ b/src/OpenTK.Mathematics/OpenTK.Mathematics.csproj @@ -1,6 +1,6 @@  - netstandard2.1 + netcoreapp3.1 true true OpenTK.Mathematics @@ -11,7 +11,7 @@ - + diff --git a/src/OpenTK.Windowing.Common/OpenTK.Windowing.Common.csproj b/src/OpenTK.Windowing.Common/OpenTK.Windowing.Common.csproj index d3318e7fd4..8e8cc63dcd 100644 --- a/src/OpenTK.Windowing.Common/OpenTK.Windowing.Common.csproj +++ b/src/OpenTK.Windowing.Common/OpenTK.Windowing.Common.csproj @@ -1,7 +1,7 @@  - netstandard2.1 + netcoreapp3.1 OpenTK.Windowing.Common true From cf311f9e182d42bd840b781a24a13d21505783bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julius=20H=C3=A4ger?= Date: Sun, 7 Mar 2021 18:03:47 +0100 Subject: [PATCH 2/6] Fixed pinning issue with the invert code --- src/OpenTK.Mathematics/Matrix/Matrix4.cs | 40 +++++++++++++++--------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/src/OpenTK.Mathematics/Matrix/Matrix4.cs b/src/OpenTK.Mathematics/Matrix/Matrix4.cs index 0ae3a53938..057ab0fa43 100644 --- a/src/OpenTK.Mathematics/Matrix/Matrix4.cs +++ b/src/OpenTK.Mathematics/Matrix/Matrix4.cs @@ -1524,11 +1524,18 @@ private static unsafe void InvertSse3(in Matrix4 mat, out Matrix4 result) // Original derivation and implementation can be found here: // https://lxjk.github.io/2017/09/03/Fast-4x4-Matrix-Inverse-with-SSE-SIMD-Explained.html - float* m = (float*)Unsafe.AsPointer(ref Unsafe.AsRef(in mat)); - Vector128 row0 = Sse.LoadVector128(m); - Vector128 row1 = Sse.LoadVector128(m + 4); - Vector128 row2 = Sse.LoadVector128(m + 8); - Vector128 row3 = Sse.LoadVector128(m + 12); + Vector128 row0; + Vector128 row1; + Vector128 row2; + Vector128 row3; + + fixed (float* m = &mat.Row0.X) + { + row0 = Sse.LoadVector128(m); + row1 = Sse.LoadVector128(m + 4); + row2 = Sse.LoadVector128(m + 8); + row3 = Sse.LoadVector128(m + 12); + } // __m128 A = VecShuffle_0101(inM.mVec[0], inM.mVec[1]); var A = Sse.MoveLowToHigh(row0, row1); @@ -1599,7 +1606,7 @@ private static unsafe void InvertSse3(in Matrix4 mat, out Matrix4 result) const byte Shuffle_2121 = 0b0110_0110; // Mat2Mul(vec1, vec2): - //_mm_add_ps(_mm_mul_ps(vec1, VecSwizzle(vec2, 0, 3, 0, 3)), + // _mm_add_ps(_mm_mul_ps(vec1, VecSwizzle(vec2, 0, 3, 0, 3)), // _mm_mul_ps(VecSwizzle(vec1, 1, 0, 3, 2), VecSwizzle(vec2, 2, 1, 2, 1))); // __m128 X_ = _mm_sub_ps( @@ -1633,7 +1640,7 @@ private static unsafe void InvertSse3(in Matrix4 mat, out Matrix4 result) const byte Shuffle_3030 = 0b0011_0011; // Mat2MulAdj(vec1, vec2): - //_mm_sub_ps(_mm_mul_ps(vec1, VecSwizzle(vec2, 3, 0, 3, 0)), + // _mm_sub_ps(_mm_mul_ps(vec1, VecSwizzle(vec2, 3, 0, 3, 0)), // _mm_mul_ps(VecSwizzle(vec1, 1, 0, 3, 2), VecSwizzle(vec2, 2, 1, 2, 1))); // __m128 Y_ = _mm_sub_ps( @@ -1701,14 +1708,17 @@ private static unsafe void InvertSse3(in Matrix4 mat, out Matrix4 result) Unsafe.SkipInit(out result); - // r.mVec[0] = VecShuffle(X_, Y_, 3, 1, 3, 1); - Sse.Store((float*)Unsafe.AsPointer(ref result) + 0, Sse.Shuffle(X_, Y_, Shuffle_3131)); - // r.mVec[1] = VecShuffle(X_, Y_, 2, 0, 2, 0); - Sse.Store((float*)Unsafe.AsPointer(ref result) + 4, Sse.Shuffle(X_, Y_, Shuffle_2020)); - // r.mVec[2] = VecShuffle(Z_, W_, 3, 1, 3, 1); - Sse.Store((float*)Unsafe.AsPointer(ref result) + 8, Sse.Shuffle(Z_, W_, Shuffle_3131)); - // r.mVec[3] = VecShuffle(Z_, W_, 2, 0, 2, 0); - Sse.Store((float*)Unsafe.AsPointer(ref result) + 12, Sse.Shuffle(Z_, W_, Shuffle_2020)); + fixed (float* r = &result.Row0.X) + { + // r.mVec[0] = VecShuffle(X_, Y_, 3, 1, 3, 1); + Sse.Store(r + 0, Sse.Shuffle(X_, Y_, Shuffle_3131)); + // r.mVec[1] = VecShuffle(X_, Y_, 2, 0, 2, 0); + Sse.Store(r + 4, Sse.Shuffle(X_, Y_, Shuffle_2020)); + // r.mVec[2] = VecShuffle(Z_, W_, 3, 1, 3, 1); + Sse.Store(r + 8, Sse.Shuffle(Z_, W_, Shuffle_3131)); + // r.mVec[3] = VecShuffle(Z_, W_, 2, 0, 2, 0); + Sse.Store(r + 12, Sse.Shuffle(Z_, W_, Shuffle_2020)); + } #pragma warning restore SA1114 // Parameter list should follow declaration #pragma warning restore SA1312 // Variable names should begin with lower-case letter #pragma warning restore SA1512 // Single-line comments should not be followed by blank lines From a432bf6968b9681a97402268d9f7f9ed855d7c3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julius=20H=C3=A4ger?= Date: Sun, 7 Mar 2021 18:05:18 +0100 Subject: [PATCH 3/6] Fix usage of wrong terminology --- src/OpenTK.Mathematics/Matrix/Matrix2.cs | 2 +- src/OpenTK.Mathematics/Matrix/Matrix2d.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/OpenTK.Mathematics/Matrix/Matrix2.cs b/src/OpenTK.Mathematics/Matrix/Matrix2.cs index ec86c1592c..c9430b65b5 100644 --- a/src/OpenTK.Mathematics/Matrix/Matrix2.cs +++ b/src/OpenTK.Mathematics/Matrix/Matrix2.cs @@ -560,7 +560,7 @@ public static void Invert(in Matrix2 mat, out Matrix2 result) var invDet = 1f / det; - // Because the c# jit assumes noalias for byref types we need to + // Because the c# jit assumes alias for byref types we need to // save this value as the write to result.Row0.X could change the // value of mat.Row0.X. var row0x = mat.Row0.X; diff --git a/src/OpenTK.Mathematics/Matrix/Matrix2d.cs b/src/OpenTK.Mathematics/Matrix/Matrix2d.cs index 08d2f04664..a676047d27 100644 --- a/src/OpenTK.Mathematics/Matrix/Matrix2d.cs +++ b/src/OpenTK.Mathematics/Matrix/Matrix2d.cs @@ -560,7 +560,7 @@ public static void Invert(in Matrix2d mat, out Matrix2d result) var invDet = 1f / det; - // Because the c# jit assumes noalias for byref types we need to + // Because the c# jit assumes alias for byref types we need to // save this value as the write to result.Row0.X could change the // value of mat.Row0.X. var row0x = mat.Row0.X; From 55a7baa3ec0d03404adaecbed5eb9e24e46642c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julius=20H=C3=A4ger?= Date: Sun, 7 Mar 2021 18:16:49 +0100 Subject: [PATCH 4/6] Made OpenTK.Mathematics have multiple targets to have instrinsics support on .net core while still being able to run on .net standard --- src/OpenTK.Mathematics/Matrix/Matrix4.cs | 8 ++++++++ src/OpenTK.Mathematics/OpenTK.Mathematics.csproj | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/OpenTK.Mathematics/Matrix/Matrix4.cs b/src/OpenTK.Mathematics/Matrix/Matrix4.cs index 057ab0fa43..ed8dc492d4 100644 --- a/src/OpenTK.Mathematics/Matrix/Matrix4.cs +++ b/src/OpenTK.Mathematics/Matrix/Matrix4.cs @@ -25,8 +25,10 @@ using System.Diagnostics.Contracts; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +#if NETCOREAPP3_1_OR_GREATER using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; +#endif namespace OpenTK.Mathematics { @@ -1503,6 +1505,7 @@ public static void Mult(in Matrix4 left, float right, out Matrix4 result) /// Thrown if the Matrix4 is singular. public static void Invert(in Matrix4 mat, out Matrix4 result) { +#if NETCOREAPP3_1_OR_GREATER if (Sse3.IsSupported) { InvertSse3(in mat, out result); @@ -1511,8 +1514,12 @@ public static void Invert(in Matrix4 mat, out Matrix4 result) { InvertFallback(in mat, out result); } +#else + InvertFallback(in mat, out result); +#endif } +#if NETCOREAPP3_1_OR_GREATER [MethodImpl(MethodImplOptions.AggressiveInlining)] private static unsafe void InvertSse3(in Matrix4 mat, out Matrix4 result) { @@ -1724,6 +1731,7 @@ private static unsafe void InvertSse3(in Matrix4 mat, out Matrix4 result) #pragma warning restore SA1512 // Single-line comments should not be followed by blank lines #pragma warning restore SA1515 // Single-line comment should be preceded by blank line } +#endif [MethodImpl(MethodImplOptions.AggressiveInlining)] private static unsafe void InvertFallback(in Matrix4 mat, out Matrix4 result) diff --git a/src/OpenTK.Mathematics/OpenTK.Mathematics.csproj b/src/OpenTK.Mathematics/OpenTK.Mathematics.csproj index 0b993c176f..06a5141836 100644 --- a/src/OpenTK.Mathematics/OpenTK.Mathematics.csproj +++ b/src/OpenTK.Mathematics/OpenTK.Mathematics.csproj @@ -1,6 +1,6 @@  - netcoreapp3.1 + netstandard2.1;netcoreapp3.1 true true OpenTK.Mathematics From 5e5ac0abb9ac2f74612c6bea2e0074421a031c86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julius=20H=C3=A4ger?= Date: Sun, 7 Mar 2021 18:39:51 +0100 Subject: [PATCH 5/6] Move Matrix3.Invert copy to local floats/doubles for better code gen. --- src/OpenTK.Mathematics/Matrix/Matrix3.cs | 24 ++++++++++++----------- src/OpenTK.Mathematics/Matrix/Matrix3d.cs | 24 ++++++++++++----------- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/src/OpenTK.Mathematics/Matrix/Matrix3.cs b/src/OpenTK.Mathematics/Matrix/Matrix3.cs index 88c8b4cd85..291e34dc3b 100644 --- a/src/OpenTK.Mathematics/Matrix/Matrix3.cs +++ b/src/OpenTK.Mathematics/Matrix/Matrix3.cs @@ -811,16 +811,18 @@ public static void Invert(in Matrix3 matrix, out Matrix3 result) { // Original implementation can be found here: // https://github.com/niswegmann/small-matrix-inverse/blob/6eac02b84ad06870692abaf828638a391548502c/invert3x3_c.h - Matrix3 mat = matrix; + float row0X = matrix.Row0.X, row0Y = matrix.Row0.Y, row0Z = matrix.Row0.Z; + float row1X = matrix.Row1.X, row1Y = matrix.Row1.Y, row1Z = matrix.Row1.Z; + float row2X = matrix.Row2.X, row2Y = matrix.Row2.Y, row2Z = matrix.Row2.Z; // Compute the elements needed to calculate the determinant // so that we can throw without writing anything to the out parameter. - float invRow0X = (+mat.Row1.Y * mat.Row2.Z) - (mat.Row1.Z * mat.Row2.Y); - float invRow1X = (-mat.Row1.X * mat.Row2.Z) + (mat.Row1.Z * mat.Row2.X); - float invRow2X = (+mat.Row1.X * mat.Row2.Y) - (mat.Row1.Y * mat.Row2.X); + float invRow0X = (+row1Y * row2Z) - (row1Z * row2Y); + float invRow1X = (-row1X * row2Z) + (row1Z * row2X); + float invRow2X = (+row1X * row2Y) - (row1Y * row2X); // Compute determinant: - float det = (mat.Row0.X * invRow0X) + (mat.Row0.Y * invRow1X) + (mat.Row0.Z * invRow2X); + float det = (row0X * invRow0X) + (row0Y * invRow1X) + (row0Z * invRow2X); if (det == 0f) { @@ -829,14 +831,14 @@ public static void Invert(in Matrix3 matrix, out Matrix3 result) // Compute adjoint: result.Row0.X = invRow0X; - result.Row0.Y = (-mat.Row0.Y * mat.Row2.Z) + (mat.Row0.Z * mat.Row2.Y); - result.Row0.Z = (+mat.Row0.Y * mat.Row1.Z) - (mat.Row0.Z * mat.Row1.Y); + result.Row0.Y = (-row0Y * row2Z) + (row0Z * row2Y); + result.Row0.Z = (+row0Y * row1Z) - (row0Z * row1Y); result.Row1.X = invRow1X; - result.Row1.Y = (+mat.Row0.X * mat.Row2.Z) - (mat.Row0.Z * mat.Row2.X); - result.Row1.Z = (-mat.Row0.X * mat.Row1.Z) + (mat.Row0.Z * mat.Row1.X); + result.Row1.Y = (+row0X * row2Z) - (row0Z * row2X); + result.Row1.Z = (-row0X * row1Z) + (row0Z * row1X); result.Row2.X = invRow2X; - result.Row2.Y = (-mat.Row0.X * mat.Row2.Y) + (mat.Row0.Y * mat.Row2.X); - result.Row2.Z = (+mat.Row0.X * mat.Row1.Y) - (mat.Row0.Y * mat.Row1.X); + result.Row2.Y = (-row0X * row2Y) + (row0Y * row2X); + result.Row2.Z = (+row0X * row1Y) - (row0Y * row1X); // Multiply adjoint with reciprocal of determinant: det = 1.0f / det; diff --git a/src/OpenTK.Mathematics/Matrix/Matrix3d.cs b/src/OpenTK.Mathematics/Matrix/Matrix3d.cs index f49b6effe4..7183322334 100644 --- a/src/OpenTK.Mathematics/Matrix/Matrix3d.cs +++ b/src/OpenTK.Mathematics/Matrix/Matrix3d.cs @@ -807,16 +807,18 @@ public static void Invert(in Matrix3d matrix, out Matrix3d result) { // Original implementation can be found here: // https://github.com/niswegmann/small-matrix-inverse/blob/6eac02b84ad06870692abaf828638a391548502c/invert3x3_c.h - Matrix3d mat = matrix; + double row0X = matrix.Row0.X, row0Y = matrix.Row0.Y, row0Z = matrix.Row0.Z; + double row1X = matrix.Row1.X, row1Y = matrix.Row1.Y, row1Z = matrix.Row1.Z; + double row2X = matrix.Row2.X, row2Y = matrix.Row2.Y, row2Z = matrix.Row2.Z; // Compute the elements needed to calculate the determinant // so that we can throw without writing anything to the out parameter. - double invRow0X = (+mat.Row1.Y * mat.Row2.Z) - (mat.Row1.Z * mat.Row2.Y); - double invRow1X = (-mat.Row1.X * mat.Row2.Z) + (mat.Row1.Z * mat.Row2.X); - double invRow2X = (+mat.Row1.X * mat.Row2.Y) - (mat.Row1.Y * mat.Row2.X); + double invRow0X = (+row1Y * row2Z) - (row1Z * row2Y); + double invRow1X = (-row1X * row2Z) + (row1Z * row2X); + double invRow2X = (+row1X * row2Y) - (row1Y * row2X); // Compute determinant: - double det = (mat.Row0.X * invRow0X) + (mat.Row0.Y * invRow1X) + (mat.Row0.Z * invRow2X); + double det = (row0X * invRow0X) + (row0Y * invRow1X) + (row0Z * invRow2X); if (det == 0f) { @@ -825,14 +827,14 @@ public static void Invert(in Matrix3d matrix, out Matrix3d result) // Compute adjoint: result.Row0.X = invRow0X; - result.Row0.Y = (-mat.Row0.Y * mat.Row2.Z) + (mat.Row0.Z * mat.Row2.Y); - result.Row0.Z = (+mat.Row0.Y * mat.Row1.Z) - (mat.Row0.Z * mat.Row1.Y); + result.Row0.Y = (-row0Y * row2Z) + (row0Z * row2Y); + result.Row0.Z = (+row0Y * row1Z) - (row0Z * row1Y); result.Row1.X = invRow1X; - result.Row1.Y = (+mat.Row0.X * mat.Row2.Z) - (mat.Row0.Z * mat.Row2.X); - result.Row1.Z = (-mat.Row0.X * mat.Row1.Z) + (mat.Row0.Z * mat.Row1.X); + result.Row1.Y = (+row0X * row2Z) - (row0Z * row2X); + result.Row1.Z = (-row0X * row1Z) + (row0Z * row1X); result.Row2.X = invRow2X; - result.Row2.Y = (-mat.Row0.X * mat.Row2.Y) + (mat.Row0.Y * mat.Row2.X); - result.Row2.Z = (+mat.Row0.X * mat.Row1.Y) - (mat.Row0.Y * mat.Row1.X); + result.Row2.Y = (-row0X * row2Y) + (row0Y * row2X); + result.Row2.Z = (+row0X * row1Y) - (row0Y * row1X); // Multiply adjoint with reciprocal of determinant: det = 1.0f / det; From dd09253f8c96b02a8da4daaeac7a11f8142fc36a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julius=20H=C3=A4ger?= Date: Sun, 7 Mar 2021 20:30:23 +0100 Subject: [PATCH 6/6] Multitarget for the projects that depend on OpenTK.Mathematics --- src/OpenTK.Graphics/OpenTK.Graphics.csproj | 2 +- src/OpenTK.Windowing.Common/OpenTK.Windowing.Common.csproj | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/OpenTK.Graphics/OpenTK.Graphics.csproj b/src/OpenTK.Graphics/OpenTK.Graphics.csproj index b48b6ee442..0b5dfd80b8 100644 --- a/src/OpenTK.Graphics/OpenTK.Graphics.csproj +++ b/src/OpenTK.Graphics/OpenTK.Graphics.csproj @@ -1,7 +1,7 @@  - netcoreapp3.1 + netstandard2.1;netcoreapp3.1 true OpenTK.Graphics diff --git a/src/OpenTK.Windowing.Common/OpenTK.Windowing.Common.csproj b/src/OpenTK.Windowing.Common/OpenTK.Windowing.Common.csproj index 8e8cc63dcd..58942a6755 100644 --- a/src/OpenTK.Windowing.Common/OpenTK.Windowing.Common.csproj +++ b/src/OpenTK.Windowing.Common/OpenTK.Windowing.Common.csproj @@ -1,7 +1,7 @@  - netcoreapp3.1 + netstandard2.1;netcoreapp3.1 OpenTK.Windowing.Common true