Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
First attempt for building for Arm64 on MSVC (#25)
- Loading branch information
1 parent
8ab5fd4
commit 19c5d8d
Showing
4 changed files
with
238 additions
and
0 deletions.
There are no files selected for viewing
44 changes: 44 additions & 0 deletions
44
patches_for_WebRTC_org/m80/6401-Arm64-is-a-thing-and-has-intrinsic-to-mul-two-64bit-.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
From 19985a00f53fb5368c74d01f1dae5a9d67c8a4ae Mon Sep 17 00:00:00 2001 | ||
From: Augusto Righetto <aurighet@microsoft.com> | ||
Date: Wed, 25 Mar 2020 23:26:53 -0700 | ||
Subject: [PATCH] Arm64 is a thing and has intrinsic to mul two 64bit uint | ||
|
||
--- | ||
crypto/fipsmodule/bn/internal.h | 6 +++++- | ||
include/openssl/base.h | 2 +- | ||
2 files changed, 6 insertions(+), 2 deletions(-) | ||
|
||
diff --git a/crypto/fipsmodule/bn/internal.h b/crypto/fipsmodule/bn/internal.h | ||
index d58a2acce..7b4fba775 100644 | ||
--- a/crypto/fipsmodule/bn/internal.h | ||
+++ b/crypto/fipsmodule/bn/internal.h | ||
@@ -404,8 +404,12 @@ uint64_t bn_mont_n0(const BIGNUM *n); | ||
int bn_mod_exp_base_2_consttime(BIGNUM *r, unsigned p, const BIGNUM *n, | ||
BN_CTX *ctx); | ||
|
||
-#if defined(OPENSSL_X86_64) && defined(_MSC_VER) | ||
+#if defined(_MSC_VER) | ||
+#if defined(OPENSSL_X86_64) | ||
#define BN_UMULT_LOHI(low, high, a, b) ((low) = _umul128((a), (b), &(high))) | ||
+#elif defined(OPENSSL_AARCH64) | ||
+#define BN_UMULT_LOHI(low, high, a, b) ((low) = (a) * (b), high = __umulh((a), (b))) | ||
+#endif | ||
#endif | ||
|
||
#if !defined(BN_ULLONG) && !defined(BN_UMULT_LOHI) | ||
diff --git a/include/openssl/base.h b/include/openssl/base.h | ||
index e347c09ae..0265c5103 100644 | ||
--- a/include/openssl/base.h | ||
+++ b/include/openssl/base.h | ||
@@ -90,7 +90,7 @@ extern "C" { | ||
#elif defined(__x86) || defined(__i386) || defined(__i386__) || defined(_M_IX86) | ||
#define OPENSSL_32_BIT | ||
#define OPENSSL_X86 | ||
-#elif defined(__aarch64__) | ||
+#elif defined(__aarch64__) || defined(_M_ARM64) | ||
#define OPENSSL_64_BIT | ||
#define OPENSSL_AARCH64 | ||
#elif defined(__arm) || defined(__arm__) || defined(_M_ARM) | ||
-- | ||
2.17.1.windows.2 | ||
|
49 changes: 49 additions & 0 deletions
49
patches_for_WebRTC_org/m80/6401-Shift-operator-in-Arm-doesn-t-work-the-same-as-Intel.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
From 39dc26711df8784a015db2501a85fe92726b3cc9 Mon Sep 17 00:00:00 2001 | ||
From: Augusto Righetto <aurighet@microsoft.com> | ||
Date: Wed, 25 Mar 2020 23:24:15 -0700 | ||
Subject: [PATCH] Shift operator in Arm doesn't work the same as Intel | ||
|
||
--- | ||
modules/audio_processing/aec3/fft_data.h | 3 +++ | ||
modules/audio_processing/legacy_noise_suppression.cc | 3 ++- | ||
2 files changed, 5 insertions(+), 1 deletion(-) | ||
|
||
diff --git a/modules/audio_processing/aec3/fft_data.h b/modules/audio_processing/aec3/fft_data.h | ||
index 5e5adb62de..7707249704 100644 | ||
--- a/modules/audio_processing/aec3/fft_data.h | ||
+++ b/modules/audio_processing/aec3/fft_data.h | ||
@@ -44,6 +44,8 @@ struct FftData { | ||
void Spectrum(Aec3Optimization optimization, | ||
rtc::ArrayView<float> power_spectrum) const { | ||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, power_spectrum.size()); | ||
+#pragma warning(push) | ||
+#pragma warning(disable:4065) | ||
switch (optimization) { | ||
#if defined(WEBRTC_ARCH_X86_FAMILY) | ||
case Aec3Optimization::kSse2: { | ||
@@ -65,6 +67,7 @@ struct FftData { | ||
std::transform(re.begin(), re.end(), im.begin(), power_spectrum.begin(), | ||
[](float a, float b) { return a * a + b * b; }); | ||
} | ||
+#pragma warning(pop) | ||
} | ||
|
||
// Copy the data from an interleaved array. | ||
diff --git a/modules/audio_processing/legacy_noise_suppression.cc b/modules/audio_processing/legacy_noise_suppression.cc | ||
index b2c88536ca..cc429397f8 100644 | ||
--- a/modules/audio_processing/legacy_noise_suppression.cc | ||
+++ b/modules/audio_processing/legacy_noise_suppression.cc | ||
@@ -151,8 +151,9 @@ std::vector<float> NoiseSuppression::NoiseEstimate() { | ||
int q_noise; | ||
const uint32_t* noise = | ||
WebRtcNsx_noise_estimate(suppressor->state(), &q_noise); | ||
+ const int32_t noise_factor = 1i32 << static_cast<int32_t>(q_noise); | ||
const float kNormalizationFactor = | ||
- 1.f / ((1 << q_noise) * suppressors_.size()); | ||
+ 1.f / (noise_factor * suppressors_.size()); | ||
for (size_t i = 0; i < noise_estimate.size(); ++i) { | ||
noise_estimate[i] += kNormalizationFactor * noise[i]; | ||
} | ||
-- | ||
2.17.1.windows.2 | ||
|
139 changes: 139 additions & 0 deletions
139
patches_for_WebRTC_org/m80/6401-cl-aligns-differently-and-hack-for-extracting-first-.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
From 42d3d0b999c03d4250bb0cbc235c3b9ec5ac4b38 Mon Sep 17 00:00:00 2001 | ||
From: Augusto Righetto <aurighet@microsoft.com> | ||
Date: Wed, 25 Mar 2020 23:29:10 -0700 | ||
Subject: [PATCH] cl aligns differently and hack for extracting first component | ||
of a SIMD vector | ||
|
||
--- | ||
simd/arm/common/jidctint-neon.c | 33 +++++++++++++++++++-------------- | ||
simd/arm/common/jidctred-neon.c | 25 ++++++++++++++++--------- | ||
2 files changed, 35 insertions(+), 23 deletions(-) | ||
|
||
diff --git a/simd/arm/common/jidctint-neon.c b/simd/arm/common/jidctint-neon.c | ||
index 7fb683b..038bdd2 100644 | ||
--- a/simd/arm/common/jidctint-neon.c | ||
+++ b/simd/arm/common/jidctint-neon.c | ||
@@ -75,15 +75,20 @@ | ||
#define F_2_053_MINUS_2_562 (F_2_053 - F_2_562) | ||
#define F_0_541_PLUS_0_765 (F_0_541 + F_0_765) | ||
|
||
-__attribute__ ((aligned(8))) static int16_t jsimd_idct_islow_neon_consts[] = { | ||
- F_0_899, F_0_541, | ||
- F_2_562, F_0_298_MINUS_0_899, | ||
- F_1_501_MINUS_0_899, F_2_053_MINUS_2_562, | ||
- F_0_541_PLUS_0_765, F_1_175, | ||
- F_1_175_MINUS_0_390, F_0_541_MINUS_1_847, | ||
- F_3_072_MINUS_2_562, F_1_175_MINUS_1_961, | ||
- 0, 0, 0, 0 | ||
- }; | ||
+#if defined(_MSC_VER) | ||
+__declspec(align(8)) | ||
+#else | ||
+__attribute__ ((aligned(8))) | ||
+#endif | ||
+static int16_t jsimd_idct_islow_neon_consts[] = { | ||
+ F_0_899, F_0_541, | ||
+ F_2_562, F_0_298_MINUS_0_899, | ||
+ F_1_501_MINUS_0_899, F_2_053_MINUS_2_562, | ||
+ F_0_541_PLUS_0_765, F_1_175, | ||
+ F_1_175_MINUS_0_390, F_0_541_MINUS_1_847, | ||
+ F_3_072_MINUS_2_562, F_1_175_MINUS_1_961, | ||
+ 0, 0, 0, 0 | ||
+}; | ||
|
||
/* Forward declaration of regular and sparse IDCT helper functions. */ | ||
|
||
@@ -214,13 +219,13 @@ void jsimd_idct_islow_neon(void *dct_table, | ||
int16x4_t bitmap = vorr_s16(row7, row6); | ||
bitmap = vorr_s16(bitmap, row5); | ||
bitmap = vorr_s16(bitmap, row4); | ||
- int64_t bitmap_rows_4567 = vreinterpret_s64_s16(bitmap); | ||
+ int64_t bitmap_rows_4567 = *((int64_t*)(&(vreinterpret_s64_s16(bitmap)))); | ||
|
||
if (bitmap_rows_4567 == 0) { | ||
bitmap = vorr_s16(bitmap, row3); | ||
bitmap = vorr_s16(bitmap, row2); | ||
bitmap = vorr_s16(bitmap, row1); | ||
- int64_t left_ac_bitmap = vreinterpret_s64_s16(bitmap); | ||
+ int64_t left_ac_bitmap = *((int64_t*)(&(vreinterpret_s64_s16(bitmap)))); | ||
|
||
if (left_ac_bitmap == 0) { | ||
int16x4_t dcval = vshl_n_s16(vmul_s16(row0, quant_row0), PASS1_BITS); | ||
@@ -266,18 +271,18 @@ void jsimd_idct_islow_neon(void *dct_table, | ||
bitmap = vorr_s16(row7, row6); | ||
bitmap = vorr_s16(bitmap, row5); | ||
bitmap = vorr_s16(bitmap, row4); | ||
- bitmap_rows_4567 = vreinterpret_s64_s16(bitmap); | ||
+ bitmap_rows_4567 = *((int64_t*)(&(vreinterpret_s64_s16(bitmap)))); | ||
bitmap = vorr_s16(bitmap, row3); | ||
bitmap = vorr_s16(bitmap, row2); | ||
bitmap = vorr_s16(bitmap, row1); | ||
- int64_t right_ac_bitmap = vreinterpret_s64_s16(bitmap); | ||
+ int64_t right_ac_bitmap = *((int64_t*)(&(vreinterpret_s64_s16(bitmap)))); | ||
|
||
/* Initialise to non-zero value: defaults to regular second pass. */ | ||
int64_t right_ac_dc_bitmap = 1; | ||
|
||
if (right_ac_bitmap == 0) { | ||
bitmap = vorr_s16(bitmap, row0); | ||
- right_ac_dc_bitmap = vreinterpret_s64_s16(bitmap); | ||
+ right_ac_dc_bitmap = *((int64_t*)(&(vreinterpret_s64_s16(bitmap)))); | ||
|
||
if (right_ac_dc_bitmap != 0) { | ||
int16x4_t dcval = vshl_n_s16(vmul_s16(row0, quant_row0), PASS1_BITS); | ||
diff --git a/simd/arm/common/jidctred-neon.c b/simd/arm/common/jidctred-neon.c | ||
index aa10799..2de38b8 100644 | ||
--- a/simd/arm/common/jidctred-neon.c | ||
+++ b/simd/arm/common/jidctred-neon.c | ||
@@ -72,7 +72,7 @@ | ||
|
||
void jsimd_idct_2x2_neon(void *dct_table, | ||
JCOEFPTR coef_block, | ||
- JSAMPARRAY restrict output_buf, | ||
+ JSAMPARRAY /* restrict */ output_buf, | ||
JDIMENSION output_col) | ||
{ | ||
ISLOW_MULT_TYPE *quantptr = dct_table; | ||
@@ -183,15 +183,20 @@ void jsimd_idct_2x2_neon(void *dct_table, | ||
* exact compatibility with jpeg-6b. | ||
*/ | ||
|
||
-__attribute__ ((aligned(8))) static int16_t jsimd_idct_4x4_neon_consts[] = { | ||
- F_1_847, -F_0_765, -F_0_211, F_1_451, | ||
- -F_2_172, F_1_061, -F_0_509, -F_0_601, | ||
- F_0_899, F_2_562, 0, 0 | ||
- }; | ||
+#if defined(_MSC_VER) | ||
+__declspec(align(8)) | ||
+#else | ||
+__attribute__ ((aligned(8))) | ||
+#endif | ||
+static int16_t jsimd_idct_4x4_neon_consts[] = { | ||
+ F_1_847, -F_0_765, -F_0_211, F_1_451, | ||
+ -F_2_172, F_1_061, -F_0_509, -F_0_601, | ||
+ F_0_899, F_2_562, 0, 0 | ||
+}; | ||
|
||
void jsimd_idct_4x4_neon(void *dct_table, | ||
JCOEFPTR coef_block, | ||
- JSAMPARRAY restrict output_buf, | ||
+ JSAMPARRAY /* restrict */ output_buf, | ||
JDIMENSION output_col) | ||
{ | ||
ISLOW_MULT_TYPE *quantptr = dct_table; | ||
@@ -217,8 +222,10 @@ void jsimd_idct_4x4_neon(void *dct_table, | ||
bitmap = vorrq_s16(bitmap, row6); | ||
bitmap = vorrq_s16(bitmap, row7); | ||
|
||
- int64_t left_ac_bitmap = vreinterpret_s64_s16(vget_low_s16(bitmap)); | ||
- int64_t right_ac_bitmap = vreinterpret_s64_s16(vget_high_s16(bitmap)); | ||
+ int16x4_t low_s16 = vget_low_s16(bitmap); | ||
+ int64_t left_ac_bitmap = *((int64_t*)(&(vreinterpret_s64_s16(low_s16)))); | ||
+ int16x4_t high_s16 = vget_high_s16(bitmap); | ||
+ int64_t right_ac_bitmap = *((int64_t*)(&(vreinterpret_s64_s16(high_s16)))); | ||
|
||
/* Load constants for IDCT computation. */ | ||
const int16x4x3_t consts = vld1_s16_x3(jsimd_idct_4x4_neon_consts); | ||
-- | ||
2.17.1.windows.2 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters