diff --git a/src/include/sof/math/exp_fcn.h b/src/include/sof/math/exp_fcn.h index d85be59260f6..abb324fc97df 100644 --- a/src/include/sof/math/exp_fcn.h +++ b/src/include/sof/math/exp_fcn.h @@ -10,6 +10,18 @@ #include +#if defined(__XCC__) +/* HiFi */ +#include +#if XCHAL_HAVE_HIFI4 == 1 +#define SOFM_EXPONENTIAL_HIFI4 1 +#endif +#else +/* !XCC */ +#define EXPONENTIAL_GENERIC 1 + +#endif + int32_t sofm_exp_int32(int32_t x); #endif diff --git a/src/math/CMakeLists.txt b/src/math/CMakeLists.txt index f03228b696f4..0a27e771d2b3 100644 --- a/src/math/CMakeLists.txt +++ b/src/math/CMakeLists.txt @@ -14,8 +14,8 @@ if(CONFIG_SQRT_FIXED) add_local_sources(sof sqrt_int16.c) endif() -if(CONFIG_EXP_FIXED) - add_local_sources(sof exp_fcn.c) +if(CONFIG_MATH_EXP) + add_local_sources(sof exp_fcn.c exp_fcn_hifi4.c) endif() if(CONFIG_MATH_DECIBELS) diff --git a/src/math/Kconfig b/src/math/Kconfig index 212059a11d81..78a67ac868d1 100644 --- a/src/math/Kconfig +++ b/src/math/Kconfig @@ -38,13 +38,13 @@ config SQRT_FIXED to calculate square root.square function having positive number y as input and return the positive number x multiplied by itself (squared) -config EXP_FIXED +config MATH_EXP bool "Exponential functions" default n help By selecting this, the 32-bit sofm_exp_int32() function can be used to calculate exponential values. With a maximum ulp of 5, an exponential function with - an input range of -5 to +5 y gives positive numbers between 0.00673794699908547 and + an input range of -5 to +5 gives positive numbers between 0.00673794699908547 and 148.413159102577. The precision of this function is 1e-4. config NATURAL_LOGARITHM_FIXED diff --git a/src/math/exp_fcn.c b/src/math/exp_fcn.c index 7aa6b8f58101..dd334268a9a6 100644 --- a/src/math/exp_fcn.c +++ b/src/math/exp_fcn.c @@ -6,7 +6,6 @@ * */ -/* Include Files */ #include #include #include @@ -14,12 +13,14 @@ #include #include -#define BIT_MASK_Q62P2 0x4000000000000000LL -#define CONVERG_ERROR 28823037607936LL // error smaller than 1e-4,1/2 ^ -44.7122876200884 -#define BIT_MASK_LOW_Q27P5 0x8000000 -#define QUOTIENT_SCALE BIT(30) -#define TERMS_Q23P9 8388608 -#define LSHIFT_BITS 8192 +#if defined(EXPONENTIAL_GENERIC) + +#define SOFM_BIT_MASK_Q62P2 0x4000000000000000LL +#define SOFM_CONVERG_ERROR 28823037607936LL // error smaller than 1e-4,1/2 ^ -44.7122876200884 +#define SOFM_BIT_MASK_LOW_Q27P5 0x8000000 +#define SOFM_QUOTIENT_SCALE BIT(30) +#define SOFM_TERMS_Q23P9 8388608 +#define SOFM_LSHIFT_BITS 8192 /* inv multiplication lookup table */ /* LUT = ceil(1/factorial(b_n) * 2 ^ 63) */ @@ -152,7 +153,7 @@ static inline int64_t lomul_s64_sr_sat_near(int64_t a, int64_t b) uint64_t u64_rlo; mul_s64(a, b, &u64_rhi, &u64_rlo); - const bool roundup = (u64_rlo & BIT_MASK_LOW_Q27P5) != 0; + const bool roundup = (u64_rlo & SOFM_BIT_MASK_LOW_Q27P5) != 0; u64_rlo = (u64_rhi << 36 | u64_rlo >> 28) + (roundup ? 1 : 0); return u64_rlo; @@ -179,8 +180,8 @@ int32_t sofm_exp_int32(int32_t x) uint64_t ou0Lo; int64_t qt; int32_t b_n; - int32_t ts = TERMS_Q23P9; /* Q23.9 */ - int64_t dividend = (x + LSHIFT_BITS) >> 14; /* x in Q50.14 */ + int32_t ts = SOFM_TERMS_Q23P9; /* Q23.9 */ + int64_t dividend = (x + SOFM_LSHIFT_BITS) >> 14; /* x in Q50.14 */ static const int32_t i_emin = -1342177280; /* Q4.28 */ static const int32_t o_emin = 56601; /* Q9.23 */ static const int32_t i_emax = 1342177280; /* Q4.28 */ @@ -195,22 +196,23 @@ int32_t sofm_exp_int32(int32_t x) return o_emax; /* 148.4131494760513306 in Q9.23 */ /* pre-computation of 1st & 2nd terms */ - mul_s64(dividend, BIT_MASK_Q62P2, &ou0Hi, &ou0Lo); + mul_s64(dividend, SOFM_BIT_MASK_Q62P2, &ou0Hi, &ou0Lo); qt = (ou0Hi << 46) | (ou0Lo >> 18);/* Q6.26 */ - ts += (int32_t)((qt >> 35) + ((qt & QUOTIENT_SCALE) >> 18)); + ts += (int32_t)((qt >> 35) + ((qt & SOFM_QUOTIENT_SCALE) >> 18)); dividend = lomul_s64_sr_sat_near(dividend, x); for (b_n = 0; b_n < ARRAY_SIZE(exp_iv_ilookup); b_n++) { mul_s64(dividend, exp_iv_ilookup[b_n], &ou0Hi, &ou0Lo); qt = (ou0Hi << 45) | (ou0Lo >> 19); /* sum of the remaining terms */ - ts += (int32_t)((qt >> 35) + ((qt & QUOTIENT_SCALE) ? 1 : 0)); + ts += (int32_t)((qt >> 35) + ((qt & SOFM_QUOTIENT_SCALE) ? 1 : 0)); dividend = lomul_s64_sr_sat_near(dividend, x); qt = ABS(qt); - /* For inputs between -5 and 5, (qt < CONVERG_ERROR) is always true */ - if (qt < CONVERG_ERROR) + /* For inputs between -5 and 5, (qt < SOFM_CONVERG_ERROR) is always true */ + if (qt < SOFM_CONVERG_ERROR) break; } return ts; } +#endif diff --git a/src/math/exp_fcn_hifi4.c b/src/math/exp_fcn_hifi4.c new file mode 100644 index 000000000000..130dbd0cb8bb --- /dev/null +++ b/src/math/exp_fcn_hifi4.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* + *Copyright(c) 2023 Intel Corporation. All rights reserved. + * + * Author: Shriram Shastry + * + */ + +#include +#include +#include +#include + +#if defined(SOFM_EXPONENTIAL_HIFI4) +#include + +#define SOFM_BIT_MASK_LOW_Q27P5 0x0000000008000000 +#define SOFM_BIT_MASK_Q62P2 0x4000000000000000 +#define SOFM_CONVERG_ERROR 0x1A36E2EB4000LL /* error smaller than 1e-4,1/2 ^ -44.7122876209085 */ +#define SOFM_QUOTIENT_SCALE 0x400000000 +#define SOFM_TERMS_Q23P9 0x800000 +#define SOFM_LSHIFT_BITS BIT(13) + +/* + * Arguments : int64_t in_0 + * int64_t in_1 + * uint64_t *ptroutbitshi + * uint64_t *ptroutbitslo + * Return Type : void + * Description:Perform element-wise multiplication on in_0 and in_1 + * while keeping the required product word length and fractional + * length in mind. mul_s64 function divide the 64-bit quantities + * into two 32-bit words,multiply the low words to produce the + * lowest and second-lowest words in the result, then both pairs + * of low and high words from different numbers to produce the + * second and third lowest words in the result, and finally both + * high words to produce the two highest words in the outcome. + * Add them all up, taking carry into consideration. + * + * The 64 x 64 bit multiplication of operands in_0 and in_1 is + * shown in the image below. The 64-bit operand in_0,in_1 is + * represented by the notation in0_H, in1_H for the top 32 bits + * and in0_L, in1_L for the bottom 32 bits. + * + * in0_H : in0_L + * x in1_H : in1_L + * --------------------- + * P0 in0_L x in1_L + * P1 in0_H x in1_L 64 bit inner multiplication + * P2 in0_L x in1_H 64 bit inner multiplication + * P3 in0_H x in1_H + * -------------------- + * [64 x 64 bit multiplication] sum of inner products + * All combinations are multiplied by one another and then added. + * Each inner product is moved into its proper power location.given the names + * of the inner products, redoing the addition where 000 represents 32 zero + * bits.The inner products can be added together in 64 bit addition.The sum + * of two 64-bit numbers yields a 65-bit output. + * (P0H:P0L) + * P1H(P1L:000) + * P2H(P2L:000) + * P3H:P3L(000:000) + * .......(aaa:P0L) + * By combining P0H:P0L and P1L:000. This can lead to a carry, denote as CRY0. + * The partial result is then multiplied by P2L:000. + * We call it CRY1 because it has the potential to carry again. + * (CRY0 + CRY1)P0H:P0L + * ( P1H)P1L:000 + * ( P2H)P2L:000 + * (P3H: P3L)000:000 + * -------------------- + * (ccc:bbb)aaa:P0L + * P1H, P2H, and P3H:P3L are added to the carry CRY0 + CRY1.This increase will + * not result in an overflow. + * + */ +static void mul_s64(ae_int64 in_0, ae_int64 in_1, ae_int64 *ptroutbitshi, ae_int64 *ptroutbitslo) +{ + ae_int64 producthihi, producthilo, productlolo; + ae_int64 producthi, carry, product_hl_lh_h, product_hl_lh_l; + + ae_int32x2 in0_32 = AE_MOVINT32X2_FROMINT64(in_0); + ae_int32x2 in1_32 = AE_MOVINT32X2_FROMINT64(in_1); + + ae_ep ep_lolo = AE_MOVEA(0); + ae_ep ep_hilo = AE_MOVEA(0); + ae_ep ep_HL_LH = AE_MOVEA(0); + + producthihi = AE_MUL32_HH(in0_32, in1_32); + + /* AE_MULZAAD32USEP.HL.LH - Unsigned lower parts and signed higher 32-bit parts dual */ + /* multiply and accumulate operation on two 32x2 operands with 72-bit output */ + /* Input-32x32-bit(in1_32xin0_32)into 72-bit multiplication operations */ + /* Output-lower 64 bits of the result are stored in producthilo */ + /* Output-upper eight bits are stored in ep_hilo */ + AE_MULZAAD32USEP_HL_LH(ep_hilo, producthilo, in1_32, in0_32); + productlolo = AE_MUL32U_LL(in0_32, in1_32); + + product_hl_lh_h = AE_SRAI72(ep_hilo, producthilo, 32); + product_hl_lh_l = AE_SLAI64(producthilo, 32); + + /* The AE_ADD72 procedure adds two 72-bit elements. The first 72-bit value is created */ + /* by concatenating the MSBs and LSBs of operands ep[7:0] and d[63:0]. Similarly, the */ + /* second value is created by concatenating bits from operands ep1[7:0] and d1[63:0]. */ + AE_ADD72(ep_lolo, productlolo, ep_HL_LH, product_hl_lh_l); + + carry = AE_SRAI72(ep_lolo, productlolo, 32); + + carry = AE_SRLI64(carry, 32); + producthi = AE_ADD64(producthihi, carry); + + producthi = AE_ADD64(producthi, product_hl_lh_h); + + *ptroutbitslo = productlolo; + *ptroutbitshi = producthi; +} + +/* + * Arguments : int64_t a + * int64_t b + * Return Type : int64_t + */ +static int64_t mul_s64_sr_sat_near(int64_t a, int64_t b) +{ + ae_int64 result; + ae_int64 u64_chi; + ae_int64 u64_clo; + ae_int64 temp; + + mul_s64(a, b, &u64_chi, &u64_clo); + + ae_int64 roundup = AE_AND64(u64_clo, SOFM_SOFM_BIT_MASK_LOW_Q27P5); + + roundup = AE_SRLI64(roundup, 27); + temp = AE_OR64(AE_SLAI64(u64_chi, 36), AE_SRLI64(u64_clo, 28)); + + return AE_ADD64(temp, roundup); +} + +static ae_int64 onebyfact_Q63[19] = { + 4611686018427387904LL, + 1537228672809129301LL, + 384307168202282325LL, + 76861433640456465LL, + 12810238940076077LL, + 1830034134296582LL, + 228754266787072LL, + 25417140754119LL, + 2541714075411LL, + 231064915946LL, + 19255409662LL, + 1481185358LL, + 105798954LL, + 7053264LL, + 440829LL, + 25931LL, + 1441LL, + 76LL, + 4LL +}; + +/* f(x) = a^x, x is variable and a is base + * + * Arguments : int32_t x(Q4.28) + * input range : -5 to 5 + * + * Return Type : int32_t ts(Q9.23) + * output range 0.0067465305 to 148.4131488800 + *+------------------+-----------------+--------+--------+ + *| x | ts (returntype) | x | ts | + *+----+-----+-------+----+----+-------+--------+--------+ + *|WLen| FLen|Signbit|WLen|FLen|Signbit| Qformat| Qformat| + *+----+-----+-------+----+----+-------+--------+--------+ + *| 32 | 28 | 1 | 32 | 23 | 0 | 4.28 | 9.23 | + *+------------------+-----------------+--------+--------+ + */ +int32_t sofm_exp_int32(int32_t x) +{ + ae_int64 outhi; + ae_int64 outlo; + ae_int64 qt; + ae_int64 onebyfact; + ae_int64 temp; + + ae_int64 *ponebyfact_Q63 = &onebyfact_Q63[0]; + ae_int64 ts = SOFM_TERMS_Q23P9; + ae_int64 mp = (x + SOFM_LSHIFT_BITS) >> 14; /* x in Q50.14 */; + xtbool flag; + + int64_t qt_temp; + int64_t b_n; + + mul_s64(mp, SOFM_BIT_MASK_Q62P2, &outhi, &outlo); + qt = AE_OR64(AE_SLAI64(outhi, 46), AE_SRLI64(outlo, 18)); + + temp = AE_SRAI64(AE_ADD64(qt, SOFM_QUOTIENT_SCALE), 35); + + ts = AE_ADD64(ts, temp); + + mp = mul_s64_sr_sat_near(mp, (int64_t)x); + + for (b_n = 0; b_n < 64;) { + AE_L64_IP(onebyfact, ponebyfact_Q63, 8); + + mul_s64(mp, onebyfact, &outhi, &outlo); + qt = AE_OR64(AE_SLAI64(outhi, 45), AE_SRLI64(outlo, 19)); + + temp = AE_SRAI64(AE_ADD64(qt, SOFM_QUOTIENT_SCALE), 35); + ts = AE_ADD64(ts, temp); + + mp = mul_s64_sr_sat_near(mp, (int64_t)x); + + const ae_int64 sign = AE_NEG64(qt); + + flag = AE_LT64(qt, 0); + AE_MOVT64(qt, sign, flag); + + if (!(qt < (ae_int64)SOFM_CONVERG_ERROR)) + b_n++; + else + b_n = 64; + } + + return AE_MOVAD32_L(AE_MOVINT32X2_FROMINT64(ts)); +} +#endif diff --git a/test/cmocka/src/math/arithmetic/CMakeLists.txt b/test/cmocka/src/math/arithmetic/CMakeLists.txt index c580a40610bd..3b808b53ef8e 100644 --- a/test/cmocka/src/math/arithmetic/CMakeLists.txt +++ b/test/cmocka/src/math/arithmetic/CMakeLists.txt @@ -13,6 +13,7 @@ cmocka_test(base2_logarithm cmocka_test(exponential exponential.c ${PROJECT_SOURCE_DIR}/src/math/exp_fcn.c + ${PROJECT_SOURCE_DIR}/src/math/exp_fcn_hifi4.c ) cmocka_test(square_root square_root.c diff --git a/zephyr/CMakeLists.txt b/zephyr/CMakeLists.txt index d6aff8b0f547..a1b8347e27a6 100644 --- a/zephyr/CMakeLists.txt +++ b/zephyr/CMakeLists.txt @@ -696,8 +696,9 @@ zephyr_library_sources_ifdef(CONFIG_SQRT_FIXED ${SOF_MATH_PATH}/sqrt_int16.c ) -zephyr_library_sources_ifdef(CONFIG_EXP_FIXED +zephyr_library_sources_ifdef(CONFIG_MATH_EXP ${SOF_MATH_PATH}/exp_fcn.c + ${SOF_MATH_PATH}/exp_fcn_hifi4.c ) zephyr_library_sources_ifdef(CONFIG_COMP_UP_DOWN_MIXER