First attempt for building for Arm64 on MSVC (#25)

microsoft · May 11, 2020 · 19c5d8d · 19c5d8d
1 parent 8ab5fd4
commit 19c5d8d
Show file tree

Hide file tree

Showing 4 changed files with 238 additions and 0 deletions.
diff --git a/patches_for_WebRTC_org/m80/6401-Arm64-is-a-thing-and-has-intrinsic-to-mul-two-64bit-.patch b/patches_for_WebRTC_org/m80/6401-Arm64-is-a-thing-and-has-intrinsic-to-mul-two-64bit-.patch
@@ -0,0 +1,44 @@
+From 19985a00f53fb5368c74d01f1dae5a9d67c8a4ae Mon Sep 17 00:00:00 2001
+From: Augusto Righetto <aurighet@microsoft.com>
+Date: Wed, 25 Mar 2020 23:26:53 -0700
+Subject: [PATCH] Arm64 is a thing and has intrinsic to mul two 64bit uint
+
+---
+ crypto/fipsmodule/bn/internal.h | 6 +++++-
+ include/openssl/base.h          | 2 +-
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h
+index d58a2acce..7b4fba775 100644
+--- a/crypto/fipsmodule/bn/internal.h
++++ b/crypto/fipsmodule/bn/internal.h
+@@ -404,8 +404,12 @@ uint64_t bn_mont_n0(const BIGNUM *n);
+ int bn_mod_exp_base_2_consttime(BIGNUM *r, unsigned p, const BIGNUM *n,
+                                 BN_CTX *ctx);
+
+-#if defined(OPENSSL_X86_64) && defined(_MSC_VER)
++#if defined(_MSC_VER)
++#if defined(OPENSSL_X86_64)
+ #define BN_UMULT_LOHI(low, high, a, b) ((low) = _umul128((a), (b), &(high)))
++#elif defined(OPENSSL_AARCH64)
++#define BN_UMULT_LOHI(low, high, a, b) ((low) = (a) * (b), high = __umulh((a), (b)))
++#endif
+ #endif
+
+ #if !defined(BN_ULLONG) && !defined(BN_UMULT_LOHI)
+diff --git a/include/openssl/base.h b/include/openssl/base.h
+index e347c09ae..0265c5103 100644
+--- a/include/openssl/base.h
++++ b/include/openssl/base.h
+@@ -90,7 +90,7 @@ extern "C" {
+ #elif defined(__x86) || defined(__i386) || defined(__i386__) || defined(_M_IX86)
+ #define OPENSSL_32_BIT
+ #define OPENSSL_X86
+-#elif defined(__aarch64__)
++#elif defined(__aarch64__) || defined(_M_ARM64)
+ #define OPENSSL_64_BIT
+ #define OPENSSL_AARCH64
+ #elif defined(__arm) || defined(__arm__) || defined(_M_ARM)
+-- 
+2.17.1.windows.2
+
diff --git a/patches_for_WebRTC_org/m80/6401-Shift-operator-in-Arm-doesn-t-work-the-same-as-Intel.patch b/patches_for_WebRTC_org/m80/6401-Shift-operator-in-Arm-doesn-t-work-the-same-as-Intel.patch
@@ -0,0 +1,49 @@
+From 39dc26711df8784a015db2501a85fe92726b3cc9 Mon Sep 17 00:00:00 2001
+From: Augusto Righetto <aurighet@microsoft.com>
+Date: Wed, 25 Mar 2020 23:24:15 -0700
+Subject: [PATCH] Shift operator in Arm doesn't work the same as Intel
+
+---
+ modules/audio_processing/aec3/fft_data.h             | 3 +++
+ modules/audio_processing/legacy_noise_suppression.cc | 3 ++-
+ 2 files changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/modules/audio_processing/aec3/fft_data.h b/modules/audio_processing/aec3/fft_data.h
+index 5e5adb62de..7707249704 100644
+--- a/modules/audio_processing/aec3/fft_data.h
++++ b/modules/audio_processing/aec3/fft_data.h
+@@ -44,6 +44,8 @@ struct FftData {
+   void Spectrum(Aec3Optimization optimization,
+                 rtc::ArrayView<float> power_spectrum) const {
+     RTC_DCHECK_EQ(kFftLengthBy2Plus1, power_spectrum.size());
++#pragma warning(push)
++#pragma warning(disable:4065)
+     switch (optimization) {
+ #if defined(WEBRTC_ARCH_X86_FAMILY)
+       case Aec3Optimization::kSse2: {
+@@ -65,6 +67,7 @@ struct FftData {
+         std::transform(re.begin(), re.end(), im.begin(), power_spectrum.begin(),
+                        [](float a, float b) { return a * a + b * b; });
+     }
++#pragma warning(pop)
+   }
+
+   // Copy the data from an interleaved array.
+diff --git a/modules/audio_processing/legacy_noise_suppression.cc b/modules/audio_processing/legacy_noise_suppression.cc
+index b2c88536ca..cc429397f8 100644
+--- a/modules/audio_processing/legacy_noise_suppression.cc
++++ b/modules/audio_processing/legacy_noise_suppression.cc
+@@ -151,8 +151,9 @@ std::vector<float> NoiseSuppression::NoiseEstimate() {
+     int q_noise;
+     const uint32_t* noise =
+         WebRtcNsx_noise_estimate(suppressor->state(), &q_noise);
++    const int32_t noise_factor = 1i32 << static_cast<int32_t>(q_noise);
+     const float kNormalizationFactor =
+-        1.f / ((1 << q_noise) * suppressors_.size());
++        1.f / (noise_factor * suppressors_.size());
+     for (size_t i = 0; i < noise_estimate.size(); ++i) {
+       noise_estimate[i] += kNormalizationFactor * noise[i];
+     }
+-- 
+2.17.1.windows.2
+
diff --git a/patches_for_WebRTC_org/m80/6401-cl-aligns-differently-and-hack-for-extracting-first-.patch b/patches_for_WebRTC_org/m80/6401-cl-aligns-differently-and-hack-for-extracting-first-.patch
@@ -0,0 +1,139 @@
+From 42d3d0b999c03d4250bb0cbc235c3b9ec5ac4b38 Mon Sep 17 00:00:00 2001
+From: Augusto Righetto <aurighet@microsoft.com>
+Date: Wed, 25 Mar 2020 23:29:10 -0700
+Subject: [PATCH] cl aligns differently and hack for extracting first component
+ of a SIMD vector
+
+---
+ simd/arm/common/jidctint-neon.c | 33 +++++++++++++++++++--------------
+ simd/arm/common/jidctred-neon.c | 25 ++++++++++++++++---------
+ 2 files changed, 35 insertions(+), 23 deletions(-)
+
+diff --git a/simd/arm/common/jidctint-neon.c b/simd/arm/common/jidctint-neon.c
+index 7fb683b..038bdd2 100644
+--- a/simd/arm/common/jidctint-neon.c
++++ b/simd/arm/common/jidctint-neon.c
+@@ -75,15 +75,20 @@
+ #define F_2_053_MINUS_2_562  (F_2_053 - F_2_562)
+ #define F_0_541_PLUS_0_765   (F_0_541 + F_0_765)
+
+-__attribute__ ((aligned(8))) static int16_t jsimd_idct_islow_neon_consts[] = {
+-                              F_0_899,             F_0_541,
+-                              F_2_562,             F_0_298_MINUS_0_899,
+-                              F_1_501_MINUS_0_899, F_2_053_MINUS_2_562,
+-                              F_0_541_PLUS_0_765,  F_1_175,
+-                              F_1_175_MINUS_0_390, F_0_541_MINUS_1_847,
+-                              F_3_072_MINUS_2_562, F_1_175_MINUS_1_961,
+-                              0, 0, 0, 0
+-                            };
++#if defined(_MSC_VER)
++__declspec(align(8))
++#else
++__attribute__ ((aligned(8)))
++#endif
++static int16_t jsimd_idct_islow_neon_consts[] = {
++    F_0_899,             F_0_541,
++    F_2_562,             F_0_298_MINUS_0_899,
++    F_1_501_MINUS_0_899, F_2_053_MINUS_2_562,
++    F_0_541_PLUS_0_765,  F_1_175,
++    F_1_175_MINUS_0_390, F_0_541_MINUS_1_847,
++    F_3_072_MINUS_2_562, F_1_175_MINUS_1_961,
++    0, 0, 0, 0
++};
+
+ /* Forward declaration of regular and sparse IDCT helper functions. */
+
+@@ -214,13 +219,13 @@ void jsimd_idct_islow_neon(void *dct_table,
+   int16x4_t bitmap = vorr_s16(row7, row6);
+   bitmap = vorr_s16(bitmap, row5);
+   bitmap = vorr_s16(bitmap, row4);
+-  int64_t bitmap_rows_4567 = vreinterpret_s64_s16(bitmap);
++  int64_t bitmap_rows_4567 = *((int64_t*)(&(vreinterpret_s64_s16(bitmap))));
+
+   if (bitmap_rows_4567 == 0) {
+     bitmap = vorr_s16(bitmap, row3);
+     bitmap = vorr_s16(bitmap, row2);
+     bitmap = vorr_s16(bitmap, row1);
+-    int64_t left_ac_bitmap = vreinterpret_s64_s16(bitmap);
++    int64_t left_ac_bitmap =  *((int64_t*)(&(vreinterpret_s64_s16(bitmap))));
+
+     if (left_ac_bitmap == 0) {
+       int16x4_t dcval = vshl_n_s16(vmul_s16(row0, quant_row0), PASS1_BITS);
+@@ -266,18 +271,18 @@ void jsimd_idct_islow_neon(void *dct_table,
+   bitmap = vorr_s16(row7, row6);
+   bitmap = vorr_s16(bitmap, row5);
+   bitmap = vorr_s16(bitmap, row4);
+-  bitmap_rows_4567 = vreinterpret_s64_s16(bitmap);
++  bitmap_rows_4567 = *((int64_t*)(&(vreinterpret_s64_s16(bitmap))));
+   bitmap = vorr_s16(bitmap, row3);
+   bitmap = vorr_s16(bitmap, row2);
+   bitmap = vorr_s16(bitmap, row1);
+-  int64_t right_ac_bitmap = vreinterpret_s64_s16(bitmap);
++  int64_t right_ac_bitmap = *((int64_t*)(&(vreinterpret_s64_s16(bitmap))));
+
+   /* Initialise to non-zero value: defaults to regular second pass. */
+   int64_t right_ac_dc_bitmap = 1;
+
+   if (right_ac_bitmap == 0) {
+     bitmap = vorr_s16(bitmap, row0);
+-    right_ac_dc_bitmap = vreinterpret_s64_s16(bitmap);
++    right_ac_dc_bitmap = *((int64_t*)(&(vreinterpret_s64_s16(bitmap))));
+
+     if (right_ac_dc_bitmap != 0) {
+       int16x4_t dcval = vshl_n_s16(vmul_s16(row0, quant_row0), PASS1_BITS);
+diff --git a/simd/arm/common/jidctred-neon.c b/simd/arm/common/jidctred-neon.c
+index aa10799..2de38b8 100644
+--- a/simd/arm/common/jidctred-neon.c
++++ b/simd/arm/common/jidctred-neon.c
+@@ -72,7 +72,7 @@
+
+ void jsimd_idct_2x2_neon(void *dct_table,
+                          JCOEFPTR coef_block,
+-                         JSAMPARRAY restrict output_buf,
++                         JSAMPARRAY /* restrict */ output_buf,
+                          JDIMENSION output_col)
+ {
+   ISLOW_MULT_TYPE *quantptr = dct_table;
+@@ -183,15 +183,20 @@ void jsimd_idct_2x2_neon(void *dct_table,
+  *       exact compatibility with jpeg-6b.
+  */
+
+-__attribute__ ((aligned(8))) static int16_t jsimd_idct_4x4_neon_consts[] = {
+-                                        F_1_847, -F_0_765, -F_0_211,  F_1_451,
+-                                       -F_2_172,  F_1_061, -F_0_509, -F_0_601,
+-                                        F_0_899,  F_2_562,        0,        0
+-                                      };
++#if defined(_MSC_VER)
++__declspec(align(8))
++#else
++__attribute__ ((aligned(8)))
++#endif
++static int16_t jsimd_idct_4x4_neon_consts[] = {
++    F_1_847, -F_0_765, -F_0_211,  F_1_451,
++    -F_2_172,  F_1_061, -F_0_509, -F_0_601,
++    F_0_899,  F_2_562,        0,        0
++};
+
+ void jsimd_idct_4x4_neon(void *dct_table,
+                          JCOEFPTR coef_block,
+-                         JSAMPARRAY restrict output_buf,
++                         JSAMPARRAY /* restrict */ output_buf,
+                          JDIMENSION output_col)
+ {
+   ISLOW_MULT_TYPE *quantptr = dct_table;
+@@ -217,8 +222,10 @@ void jsimd_idct_4x4_neon(void *dct_table,
+   bitmap = vorrq_s16(bitmap, row6);
+   bitmap = vorrq_s16(bitmap, row7);
+
+-  int64_t left_ac_bitmap = vreinterpret_s64_s16(vget_low_s16(bitmap));
+-  int64_t right_ac_bitmap = vreinterpret_s64_s16(vget_high_s16(bitmap));
++  int16x4_t low_s16 = vget_low_s16(bitmap);
++  int64_t left_ac_bitmap = *((int64_t*)(&(vreinterpret_s64_s16(low_s16))));
++  int16x4_t high_s16 = vget_high_s16(bitmap);
++  int64_t right_ac_bitmap = *((int64_t*)(&(vreinterpret_s64_s16(high_s16))));
+
+   /* Load constants for IDCT computation. */
+   const int16x4x3_t consts = vld1_s16_x3(jsimd_idct_4x4_neon_consts);
+-- 
+2.17.1.windows.2
+
diff --git a/patches_for_WebRTC_org/m80/patchWebRTCM80.cmd b/patches_for_WebRTC_org/m80/patchWebRTCM80.cmd
@@ -17,6 +17,11 @@ popd
 
 pushd %WEBRTCM80_ROOT%\third_party\boringssl\src
 git.exe am "%PATCH_DIR%0003-Replacing-deprecated-and-non-UWP-supported-RtlGenRan.patch"
+git.exe am "%PATCH_DIR%6401-Arm64-is-a-thing-and-has-intrinsic-to-mul-two-64bit-.patch"
+popd
+
+pushd %WEBRTCM80_ROOT%\third_party\libjpeg_turbo
+git.exe am "%PATCH_DIR%6401-cl-aligns-differently-and-hack-for-extracting-first-.patch"
 popd
 
 pushd %WEBRTCM80_ROOT%
@@ -25,4 +30,5 @@ git.exe am "%PATCH_DIR%0005-Fixing-UWP-build-for-time_utils.cc.patch"
 git.exe am "%PATCH_DIR%0006-Fixing-UWP-build-for-file_rotating_stream.cc.patch"
 git.exe am "%PATCH_DIR%0007-Fixing-UWP-build-for-modules-video_capture.patch"
 git.exe am "%PATCH_DIR%0008-Fixing-UWP-build-for-modules-audio_device.patch"
+git.exe am "%PATCH_DIR%6401-Shift-operator-in-Arm-doesn-t-work-the-same-as-Intel.patch"
 popd