From 6b936a37a14aa035ec6020afbe463092104b397a Mon Sep 17 00:00:00 2001 From: Seppo Ingalsuo Date: Fri, 3 Aug 2018 14:44:11 +0300 Subject: [PATCH 1/5] EQ FIR: Changes to support Xtensa optimized versions and updated IPC This patch add possibility to use two samples per call processed filter core. The use of optimized version depends on FIR core header files settings. Normally when compiled with xt-xcc the optimized filter version will be used. The set data and get data IPC is updated to use binary and enum control commands. A pass-trough copy function is added get working pipeline with non-configured EQ. The full-circular EQ copy function is needed since it is unnecessary. Normal SOF components work with buffers EQ mute is removed since SOF components do not usually support it except volume. The IPC is updated to match ALSA binary and enum controls style. The previous approach was not practical for kernel driver side. Code style is updated the pass the checks. Signed-off-by: Ranjani Sridharan Signed-off-by: Seppo Ingalsuo --- src/audio/eq_fir.c | 374 +++++++++++++++++++++++------------------- src/audio/eq_fir.h | 36 ---- src/include/uapi/eq.h | 38 ++++- 3 files changed, 240 insertions(+), 208 deletions(-) delete mode 100644 src/audio/eq_fir.h diff --git a/src/audio/eq_fir.c b/src/audio/eq_fir.c index 856681898843..bc4e5cda408c 100644 --- a/src/audio/eq_fir.c +++ b/src/audio/eq_fir.c @@ -30,24 +30,23 @@ * Keyon Jie */ -#include -#include -#include #include #include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include #include +#include "fir_config.h" + +#if FIR_GENERIC #include "fir.h" -#include "eq_fir.h" +#endif + +#if FIR_HIFIEP +#include "fir_hifi2ep.h" +#endif + +#if FIR_HIFI3 +#include "fir_hifi3.h" +#endif #ifdef MODULE_TEST #include @@ -59,81 +58,56 @@ /* src component private data */ struct comp_data { + struct fir_state_32x16 fir[PLATFORM_MAX_CHANNELS]; struct sof_eq_fir_config *config; uint32_t period_bytes; - struct fir_state_32x16 fir[PLATFORM_MAX_CHANNELS]; - void (*eq_fir_func)(struct comp_dev *dev, - struct comp_buffer *source, - struct comp_buffer *sink, - uint32_t frames); + void (*eq_fir_func)(struct fir_state_32x16 fir[], + struct comp_buffer *source, + struct comp_buffer *sink, + int frames, int nch); + void (*eq_fir_func_odd)(struct fir_state_32x16 fir[], + struct comp_buffer *source, + struct comp_buffer *sink, + int frames, int nch); }; -/* - * EQ FIR algorithm code - */ - -static void eq_fir_s32_default(struct comp_dev *dev, - struct comp_buffer *source, struct comp_buffer *sink, uint32_t frames) +static void eq_fir_passthrough(struct fir_state_32x16 fir[], + struct comp_buffer *source, + struct comp_buffer *sink, + int frames, int nch) { - struct comp_data *cd = comp_get_drvdata(dev); - int ch; - int n; - int n_wrap_src; - int n_wrap_snk; - int n_wrap_min; - int32_t *src = (int32_t *) source->r_ptr; - int32_t *snk = (int32_t *) sink->w_ptr; - int nch = dev->params.channels; - int32_t *x = src + nch - 1; - int32_t *y = snk + nch - 1; - - for (ch = 0; ch < nch; ch++) { - n = frames * nch; - x = src++; - y = snk++; - while (n > 0) { - n_wrap_src = (int32_t *) source->end_addr - x; - n_wrap_snk = (int32_t *) sink->end_addr - y; - n_wrap_min = (n_wrap_src < n_wrap_snk) ? - n_wrap_src : n_wrap_snk; - if (n < n_wrap_min) { - /* No circular wrap need */ - while (n > 0) { - *y = fir_32x16(&cd->fir[ch], *x); - x += nch; - y += nch; - n -= nch; - } - } else { - /* Wrap in n_wrap_min/nch samples */ - while (n_wrap_min > 0) { - *y = fir_32x16(&cd->fir[ch], *x); - x += nch; - y += nch; - n_wrap_min -= nch; - n -= nch; - } - /* Check both source and destination for wrap */ - if (x > (int32_t *) source->end_addr) - x = (int32_t *) - ((size_t) x - source->size); - if (snk > (int32_t *) sink->end_addr) - y = (int32_t *) - ((size_t) y - sink->size); - } - } + int32_t *src = (int32_t *)source->r_ptr; + int32_t *dest = (int32_t *)sink->w_ptr; + int n = frames * nch; - } + memcpy(dest, src, n * sizeof(int32_t)); } +/* + * EQ control code is next. The processing is in fir_ C modules. + */ + static void eq_fir_free_parameters(struct sof_eq_fir_config **config) { - if (*config != NULL) + if (*config) rfree(*config); *config = NULL; } +static void eq_fir_clear_delaylines(struct fir_state_32x16 fir[]) +{ + int i = 0; + + /* 1st active EQ data is at beginning of the single allocated buffer */ + for (i = 0; i < PLATFORM_MAX_CHANNELS; i++) { + if (fir[i].delay) { + memset(fir[i].delay, 0, + fir[i].length * sizeof(int32_t)); + } + } +} + static void eq_fir_free_delaylines(struct fir_state_32x16 fir[]) { int i = 0; @@ -141,15 +115,21 @@ static void eq_fir_free_delaylines(struct fir_state_32x16 fir[]) /* 1st active EQ data is at beginning of the single allocated buffer */ for (i = 0; i < PLATFORM_MAX_CHANNELS; i++) { - if ((fir[i].delay != NULL) && (data == NULL)) - data = fir[i].delay; + if (fir[i].delay && !data) + data = (int32_t *)fir[i].delay; /* Set all to NULL to avoid duplicated free later */ fir[i].delay = NULL; } - if (data != NULL) + if (data) { + trace_eq("fr1"); + trace_value((uint32_t)data); + rfree(data); + + trace_eq("fr2"); + } } static int eq_fir_setup(struct fir_state_32x16 fir[], @@ -165,18 +145,25 @@ static int eq_fir_setup(struct fir_state_32x16 fir[], int response_index[PLATFORM_MAX_CHANNELS]; int length_sum = 0; - if ((nch > PLATFORM_MAX_CHANNELS) - || (config->channels_in_config > PLATFORM_MAX_CHANNELS)) + trace_eq("fse"); + trace_value(config->channels_in_config); + trace_value(config->number_of_responses); + if (nch > PLATFORM_MAX_CHANNELS || + config->channels_in_config > PLATFORM_MAX_CHANNELS) { + trace_eq_error("ech"); return -EINVAL; + } /* Collect index of respose start positions in all_coefficients[] */ j = 0; assign_response = &config->data[0]; coef_data = &config->data[config->channels_in_config]; + trace_eq("idx"); for (i = 0; i < PLATFORM_MAX_CHANNELS; i++) { if (i < config->number_of_responses) { response_index[i] = j; - j += 3 + coef_data[j]; + trace_value(j); + j += SOF_EQ_FIR_COEF_NHEADER + coef_data[j]; } else { response_index[i] = 0; } @@ -186,36 +173,47 @@ static int eq_fir_setup(struct fir_state_32x16 fir[], eq_fir_free_delaylines(fir); /* Initialize 1st phase */ + trace_eq("asr"); for (i = 0; i < nch; i++) { resp = assign_response[i]; - if (resp > config->number_of_responses - 1) + trace_value(resp); + if (resp >= config->number_of_responses || resp < 0) { + trace_eq_error("eas"); + trace_value(resp); return -EINVAL; + } - if (resp < 0) { - /* Initialize EQ channel to bypass */ - fir_reset(&fir[i]); + /* Initialize EQ coefficients. Each channel EQ returns the + * number of samples it needs to store into the delay line. The + * sum is used to allocate storate for all EQs. + */ + idx = response_index[resp]; + length = fir_init_coef(&fir[i], &coef_data[idx]); + if (length > 0) { + length_sum += length; } else { - /* Initialize EQ coefficients */ - idx = response_index[resp]; - length = fir_init_coef(&fir[i], &coef_data[idx]); - if (length > 0) - length_sum += length; + trace_eq_error("ecl"); + trace_value(length); + return -EINVAL; } - } + trace_eq("all"); /* Allocate all FIR channels data in a big chunk and clear it */ fir_data = rballoc(RZONE_SYS, SOF_MEM_CAPS_RAM, length_sum * sizeof(int32_t)); - if (fir_data == NULL) + if (!fir_data) return -ENOMEM; memset(fir_data, 0, length_sum * sizeof(int32_t)); /* Initialize 2nd phase to set EQ delay lines pointers */ + trace_eq("ini"); for (i = 0; i < nch; i++) { resp = assign_response[i]; if (resp >= 0) { + trace_value((uint32_t)fir_data); + trace_value(fir->length); fir_init_delay(&fir[i], &fir_data); } } @@ -228,8 +226,8 @@ static int eq_fir_switch_response(struct fir_state_32x16 fir[], { int ret; - /* Copy assign response from update and re-initilize EQ */ - if ((config == NULL) || (ch >= PLATFORM_MAX_CHANNELS)) + /* Copy assign response from update and re-initialize EQ */ + if (!config || ch >= PLATFORM_MAX_CHANNELS) return -EINVAL; config->data[ch] = response; @@ -247,7 +245,7 @@ static struct comp_dev *eq_fir_new(struct sof_ipc_comp *comp) struct comp_dev *dev; struct sof_ipc_comp_eq_fir *eq_fir; struct sof_ipc_comp_eq_fir *ipc_eq_fir - = (struct sof_ipc_comp_eq_fir *) comp; + = (struct sof_ipc_comp_eq_fir *)comp; struct comp_data *cd; int i; @@ -255,21 +253,22 @@ static struct comp_dev *eq_fir_new(struct sof_ipc_comp *comp) dev = rzalloc(RZONE_RUNTIME, SOF_MEM_CAPS_RAM, COMP_SIZE(struct sof_ipc_comp_eq_fir)); - if (dev == NULL) + if (!dev) return NULL; - eq_fir = (struct sof_ipc_comp_eq_fir *) &dev->comp; + eq_fir = (struct sof_ipc_comp_eq_fir *)&dev->comp; memcpy(eq_fir, ipc_eq_fir, sizeof(struct sof_ipc_comp_eq_fir)); cd = rzalloc(RZONE_RUNTIME, SOF_MEM_CAPS_RAM, sizeof(*cd)); - if (cd == NULL) { + if (!cd) { rfree(dev); return NULL; } comp_set_drvdata(dev, cd); - cd->eq_fir_func = eq_fir_s32_default; + cd->eq_fir_func = eq_fir_passthrough; + cd->eq_fir_func_odd = eq_fir_passthrough; cd->config = NULL; for (i = 0; i < PLATFORM_MAX_CHANNELS; i++) fir_reset(&cd->fir[i]); @@ -309,7 +308,8 @@ static int eq_fir_params(struct comp_dev *dev) cd->period_bytes = dev->frames * dev->frame_bytes; /* configure downstream buffer */ - sink = list_first_item(&dev->bsink_list, struct comp_buffer, source_list); + sink = list_first_item(&dev->bsink_list, + struct comp_buffer, source_list); err = buffer_set_size(sink, cd->period_bytes * config->periods_sink); if (err < 0) { trace_eq_error("eSz"); @@ -323,41 +323,39 @@ static int eq_fir_params(struct comp_dev *dev) return 0; } -static int fir_cmd_set_value(struct comp_dev *dev, struct sof_ipc_ctrl_data *cdata) +static int fir_cmd_get_data(struct comp_dev *dev, + struct sof_ipc_ctrl_data *cdata) { struct comp_data *cd = comp_get_drvdata(dev); - int j; - uint32_t ch; - bool val; - - if (cdata->cmd == SOF_CTRL_CMD_SWITCH) { - trace_eq("mst"); - for (j = 0; j < cdata->num_elems; j++) { - ch = cdata->chanv[j].channel; - val = cdata->chanv[j].value; - tracev_value(ch); - tracev_value(val); - if (ch >= PLATFORM_MAX_CHANNELS) { - trace_eq_error("che"); - return -EINVAL; - } - if (val) - fir_unmute(&cd->fir[ch]); - else - fir_mute(&cd->fir[ch]); - } - } else { - trace_eq_error("ste"); - return -EINVAL; - } - return 0; + size_t bs; + int ret = 0; + + switch (cdata->cmd) { + case SOF_CTRL_CMD_BINARY: + trace_eq("gbi"); + + /* Copy back to user space */ + bs = cd->config->size; + if (bs > SOF_EQ_FIR_MAX_SIZE || bs < 1) + return -EINVAL; + if (!cd->config) + memcpy(cdata->data->data, cd->config, bs); + break; + default: + trace_eq_error("egt"); + ret = -EINVAL; + break; + } + return ret; } -static int fir_cmd_set_data(struct comp_dev *dev, struct sof_ipc_ctrl_data *cdata) +static int fir_cmd_set_data(struct comp_dev *dev, + struct sof_ipc_ctrl_data *cdata) { struct comp_data *cd = comp_get_drvdata(dev); struct sof_ipc_ctrl_value_comp *compv; + struct sof_eq_fir_config *cfg; size_t bs; int i; int ret = 0; @@ -365,51 +363,84 @@ static int fir_cmd_set_data(struct comp_dev *dev, struct sof_ipc_ctrl_data *cdat /* TODO: determine if data is DMAed or appended to cdata */ /* Check version from ABI header */ - if (cdata->data->comp_abi != SOF_EQ_FIR_ABI_VERSION) + if (cdata->data->comp_abi != SOF_EQ_FIR_ABI_VERSION) { + trace_eq_error("eab"); return -EINVAL; + } switch (cdata->cmd) { case SOF_CTRL_CMD_ENUM: - trace_eq("EFe"); + trace_eq("snu"); + compv = (struct sof_ipc_ctrl_value_comp *)cdata->data->data; if (cdata->index == SOF_EQ_FIR_IDX_SWITCH) { - trace_eq("EFs"); - compv = (struct sof_ipc_ctrl_value_comp *) cdata->data->data; - for (i = 0; i < (int) cdata->num_elems; i++) { + trace_eq("fsw"); + for (i = 0; i < (int)cdata->num_elems; i++) { tracev_value(compv[i].index); tracev_value(compv[i].svalue); - ret = eq_fir_switch_response(cd->fir, cd->config, - compv[i].index, compv[i].svalue); + ret = eq_fir_switch_response(cd->fir, + cd->config, + compv[i].index, + compv[i].svalue); if (ret < 0) { - trace_eq_error("swe"); + trace_eq_error("esw"); return -EINVAL; } } } else { - trace_eq_error("une"); + trace_eq_error("enu"); trace_error_value(cdata->index); return -EINVAL; } break; case SOF_CTRL_CMD_BINARY: - trace_eq("EFc"); + trace_eq("sbi"); /* Check and free old config */ eq_fir_free_parameters(&cd->config); - /* Copy new config, need to decode data to know the size */ - bs = cdata->data->size; - if ((bs > SOF_EQ_FIR_MAX_SIZE) || (bs < 1)) + /* Copy new config, find size from header */ + if (!cdata->data->data) { + trace_eq_error("edn"); + return -EINVAL; + } + + cfg = (struct sof_eq_fir_config *)cdata->data->data; + bs = cfg->size; + trace_value(bs); + if (bs > SOF_EQ_FIR_MAX_SIZE || bs < 1) return -EINVAL; cd->config = rzalloc(RZONE_RUNTIME, SOF_MEM_CAPS_RAM, bs); - if (cd->config == NULL) + if (!cd->config) return -EINVAL; memcpy(cd->config, cdata->data->data, bs); ret = eq_fir_setup(cd->fir, cd->config, PLATFORM_MAX_CHANNELS); + if (ret == 0) { +#if 1 +#if FIR_GENERIC + cd->eq_fir_func = eq_fir_s32; + cd->eq_fir_func_odd = eq_fir_s32; +#endif +#if FIR_HIFIEP + cd->eq_fir_func = eq_fir_2x_s32_hifiep; + cd->eq_fir_func_odd = eq_fir_s32_hifiep; +#endif +#if FIR_HIFI3 + cd->eq_fir_func = eq_fir_2x_s32_hifi3; + cd->eq_fir_func_odd = eq_fir_s32_hifi3; +#endif +#endif + trace_eq("fok"); + } else { + cd->eq_fir_func = eq_fir_passthrough; + cd->eq_fir_func_odd = eq_fir_passthrough; + trace_eq_error("ept"); + return -EINVAL; + } break; default: - trace_eq_error("ec1"); + trace_eq_error("ecm"); ret = -EINVAL; break; } @@ -426,12 +457,12 @@ static int eq_fir_cmd(struct comp_dev *dev, int cmd, void *data) trace_eq("cmd"); switch (cmd) { - case COMP_CMD_SET_VALUE: - ret = fir_cmd_set_value(dev, cdata); - break; case COMP_CMD_SET_DATA: ret = fir_cmd_set_data(dev, cdata); break; + case COMP_CMD_GET_DATA: + ret = fir_cmd_get_data(dev, cdata); + break; } return ret; @@ -451,8 +482,10 @@ static int eq_fir_copy(struct comp_dev *dev) struct comp_buffer *source; struct comp_buffer *sink; int res; + int nch = dev->params.channels; + struct fir_state_32x16 *fir = sd->fir; - trace_comp("EqF"); + tracev_comp("fcp"); /* get source and sink buffers */ source = list_first_item(&dev->bsource_list, struct comp_buffer, @@ -462,14 +495,18 @@ static int eq_fir_copy(struct comp_dev *dev) /* make sure source component buffer has enough data available and that * the sink component buffer has enough free bytes for copy. Also - * check for XRUNs */ + * check for XRUNs. + */ res = comp_buffer_can_copy_bytes(source, sink, sd->period_bytes); if (res) { trace_eq_error("xrn"); return -EIO; /* xrun */ } - sd->eq_fir_func(dev, source, sink, dev->frames); + if (dev->frames & 1) + sd->eq_fir_func_odd(fir, source, sink, dev->frames, nch); + else + sd->eq_fir_func(fir, source, sink, dev->frames, nch); /* calc new free and available */ comp_update_buffer_consume(source, sd->period_bytes); @@ -483,42 +520,47 @@ static int eq_fir_prepare(struct comp_dev *dev) struct comp_data *cd = comp_get_drvdata(dev); int ret; - trace_eq("EPp"); + trace_eq("pre"); ret = comp_set_state(dev, COMP_TRIGGER_PREPARE); if (ret < 0) return ret; - cd->eq_fir_func = eq_fir_s32_default; - /* Initialize EQ */ - if (cd->config == NULL) { - comp_set_state(dev, COMP_TRIGGER_RESET); - return -EINVAL; - } - - ret = eq_fir_setup(cd->fir, cd->config, dev->params.channels); - if (ret < 0) { - comp_set_state(dev, COMP_TRIGGER_RESET); - return ret; + cd->eq_fir_func = eq_fir_passthrough; + if (cd->config) { + ret = eq_fir_setup(cd->fir, cd->config, dev->params.channels); + if (ret < 0) { + comp_set_state(dev, COMP_TRIGGER_RESET); + return ret; + } +#if FIR_GENERIC + cd->eq_fir_func = eq_fir_s32; + cd->eq_fir_func_odd = eq_fir_s32; +#endif +#if FIR_HIFIEP + cd->eq_fir_func = eq_fir_2x_s32_hifiep; + cd->eq_fir_func_odd = eq_fir_s32_hifiep; +#endif +#if FIR_HIFI3 + cd->eq_fir_func = eq_fir_2x_s32_hifi3; + cd->eq_fir_func_odd = eq_fir_s32_hifi3; +#endif } + trace_eq("len"); + trace_value(cd->fir[0].length); + trace_value(cd->fir[1].length); return 0; } static int eq_fir_reset(struct comp_dev *dev) { - int i; struct comp_data *cd = comp_get_drvdata(dev); - trace_eq("ERe"); + trace_eq("res"); - eq_fir_free_delaylines(cd->fir); - eq_fir_free_parameters(&cd->config); - - cd->eq_fir_func = eq_fir_s32_default; - for (i = 0; i < PLATFORM_MAX_CHANNELS; i++) - fir_reset(&cd->fir[i]); + eq_fir_clear_delaylines(cd->fir); comp_set_state(dev, COMP_TRIGGER_RESET); return 0; diff --git a/src/audio/eq_fir.h b/src/audio/eq_fir.h deleted file mode 100644 index 4d2480cab362..000000000000 --- a/src/audio/eq_fir.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2017, Intel Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the Intel Corporation nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Seppo Ingalsuo - * Liam Girdwood - * Keyon Jie - */ - -#ifndef EQ_FIR_H -#define EQ_FIR_H - -#endif diff --git a/src/include/uapi/eq.h b/src/include/uapi/eq.h index 99b2770bf522..0be012c96842 100644 --- a/src/include/uapi/eq.h +++ b/src/include/uapi/eq.h @@ -42,37 +42,63 @@ #define SOF_EQ_FIR_MAX_SIZE 4096 /* Max size allowed for coef data in bytes */ +#define SOF_EQ_FIR_MAX_LENGTH 192 /* Max length for individual filter */ + /* * eq_fir_configuration data structure contains this information + * uint32_t size + * This is the number of bytes need to store the received EQ + * configuration. * uint16_t channels_in_config * This describes the number of channels in this EQ config data. It * can be different from PLATFORM_MAX_CHANNELS. * uint16_t number_of_responses * 0=no responses, 1=one response defined, 2=two responses defined, etc. * int16_t data[] - * assign_response[STREAM_MAX_CHANNELS] - * -1 = not defined, 0 = use first response, 1 = use 2nd response, etc. - * E.g. {0, 0, 0, 0, -1, -1, -1, -1} would apply to channels 0-3 the - * same first defined response and leave channels 4-7 unequalized. + * assign_response[channels_in_config] + * 0 = use first response, 1 = use 2nd response, etc. + * E.g. {0, 0, 0, 0, 1, 1, 1, 1} would apply to channels 0-3 the + * same first defined response and for to channels 4-7 the second. * coef_data[] - * Repeated data { filter_length, input_shift, output_shift, h[] } + * Repeated data + * { filter_length, output_shift, h[] } * for every EQ response defined where vector h has filter_length * number of coefficients. Coefficients in h[] are in Q1.15 format. * E.g. 16384 (Q1.15) = 0.5. The shifts are number of right shifts. + * + * NOTE: The channels_in_config must be even to have coef_data aligned to + * 32 bit word in RAM. Therefore a mono EQ assign must be duplicated to 2ch + * even if it would never used. Similarly a 5ch EQ assign must be increased + * to 6ch. EQ init will return an error if this is not met. + * + * NOTE: The filter_length must be multiple of four. Therefore the filter must + * be padded from the end with zeros have this condition met. */ struct sof_eq_fir_config { + uint32_t size; uint16_t channels_in_config; uint16_t number_of_responses; int16_t data[]; }; +struct sof_eq_fir_coef_data { + int16_t length; /* Number of FIR taps */ + int16_t out_shift; /* Amount of right shifts at output */ + int16_t coef[]; /* FIR coefficients */ +}; + +/* In the struct above there's two words (length, shift) before the actual + * FIR coefficients. This information is used in parsing of the config blob. + */ +#define SOF_EQ_FIR_COEF_NHEADER 2 + /* IIR EQ type */ /* Component will reject non-matching configuration. The version number need * to be incremented with any ABI changes in function fir_cmd(). */ -#define SOF_EQ_FIR_ABI_VERSION 1 +#define SOF_EQ_IIR_ABI_VERSION 1 #define SOF_EQ_IIR_IDX_SWITCH 0 From 639b4a8cd3da483b7d93e61279c65c3c039db434 Mon Sep 17 00:00:00 2001 From: Seppo Ingalsuo Date: Fri, 3 Aug 2018 14:44:54 +0300 Subject: [PATCH 2/5] EQ FIR: Makefile changes for HiFiEP and HiFi3 optimized equalizer Signed-off-by: Seppo Ingalsuo --- src/audio/Makefile.am | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/audio/Makefile.am b/src/audio/Makefile.am index 482c98211de7..1fb59d11caf3 100644 --- a/src/audio/Makefile.am +++ b/src/audio/Makefile.am @@ -4,9 +4,9 @@ include_HEADERS = \ eq_iir.h \ iir.h \ fir.h \ + fir_config.h \ src_config.h \ src.h \ - eq_fir.h \ volume.h COMP_SRC = \ @@ -1014,6 +1014,8 @@ libaudio_a_SOURCES = \ iir.c \ eq_fir.c \ fir.c \ + fir_hifi2ep.c \ + fir_hifi3.c \ tone.c \ src.c \ src_generic.c \ From 32e490229b42a5b8cc08cd1d5666ddc6daaa585c Mon Sep 17 00:00:00 2001 From: Seppo Ingalsuo Date: Fri, 3 Aug 2018 14:45:37 +0300 Subject: [PATCH 3/5] EQ FIR: Updates to generic C filter core Signed-off-by: Seppo Ingalsuo --- src/audio/fir.c | 57 +++++++++++++++++++++---------- src/audio/fir.h | 41 ++++++++--------------- src/audio/fir_config.h | 76 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 128 insertions(+), 46 deletions(-) create mode 100644 src/audio/fir_config.h diff --git a/src/audio/fir.c b/src/audio/fir.c index 9c9fbdd0c470..d64475ac7bed 100644 --- a/src/audio/fir.c +++ b/src/audio/fir.c @@ -33,29 +33,24 @@ #include #include #include +#include +#include +#include +#include "fir_config.h" -#ifdef MODULE_TEST -#include -#endif +#if FIR_GENERIC -#include #include "fir.h" -#ifdef MODULE_TEST -#include -#endif - /* * EQ FIR algorithm code */ void fir_reset(struct fir_state_32x16 *fir) { - fir->mute = 1; fir->rwi = 0; fir->length = 0; fir->delay_size = 0; - fir->in_shift = 0; fir->out_shift = 0; fir->coef = NULL; /* There may need to know the beginning of dynamic allocation after @@ -65,19 +60,20 @@ void fir_reset(struct fir_state_32x16 *fir) int fir_init_coef(struct fir_state_32x16 *fir, int16_t config[]) { - struct fir_coef_32x16 *setup; + struct sof_eq_fir_coef_data *setup; - setup = (struct fir_coef_32x16 *) config; - fir->mute = 0; + setup = (struct sof_eq_fir_coef_data *)config; fir->rwi = 0; - fir->length = (int) setup->length; - fir->in_shift = (int) setup->in_shift; - fir->out_shift = (int) setup->out_shift; - fir->coef = &setup->coef; + fir->length = (int)setup->length; + fir->out_shift = (int)setup->out_shift; + fir->coef = &setup->coef[0]; fir->delay = NULL; fir->delay_size = 0; - if ((fir->length > MAX_FIR_LENGTH) || (fir->length < 1)) + /* Check for sane FIR length. The length is constrained to be a + * multiple of 4 for optimized code. + */ + if (fir->length > SOF_EQ_FIR_MAX_LENGTH || fir->length < 1) return -EINVAL; return fir->length; @@ -89,3 +85,28 @@ void fir_init_delay(struct fir_state_32x16 *fir, int32_t **data) fir->delay_size = fir->length; *data += fir->delay_size; /* Point to next delay line start */ } + +void eq_fir_s32(struct fir_state_32x16 fir[], struct comp_buffer *source, + struct comp_buffer *sink, int frames, int nch) +{ + struct fir_state_32x16 *filter; + int32_t *src = (int32_t *)source->r_ptr; + int32_t *snk = (int32_t *)sink->w_ptr; + int32_t *x; + int32_t *y; + int ch; + int i; + + for (ch = 0; ch < nch; ch++) { + filter = &fir[ch]; + x = src++; + y = snk++; + for (i = 0; i < frames; i++) { + *y = fir_32x16(filter, *x); + x += nch; + y += nch; + } + } +} + +#endif diff --git a/src/audio/fir.h b/src/audio/fir.h index b80e0002b812..fab87ede47bc 100644 --- a/src/audio/fir.h +++ b/src/audio/fir.h @@ -30,25 +30,19 @@ * Keyon Jie */ -#include +#ifndef FIR_H +#define FIR_H -#define MAX_FIR_LENGTH 192 +#include "fir_config.h" -#define NHEADER_FIR_COEF_32x16 3 +#if FIR_GENERIC -struct fir_coef_32x16 { - int16_t length; /* Number of FIR taps */ - int16_t in_shift; /* Amount of right shifts at input */ - int16_t out_shift; /* Amount of right shifts at output */ - int16_t coef; /* FIR coefficients */ -}; +#include struct fir_state_32x16 { - int mute; /* Set to 1 to mute EQ output, 0 otherwise */ int rwi; /* Circular read and write index */ int length; /* Number of FIR taps */ int delay_size; /* Actual delay lentgh, must be >= length */ - int in_shift; /* Amount of right shifts at input */ int out_shift; /* Amount of right shifts at output */ int16_t *coef; /* Pointer to FIR coefficients */ int32_t *delay; /* Pointer to FIR delay line */ @@ -60,17 +54,8 @@ int fir_init_coef(struct fir_state_32x16 *fir, int16_t config[]); void fir_init_delay(struct fir_state_32x16 *fir, int32_t **data); -/* The next trivial functions are inlined */ - -static inline void fir_mute(struct fir_state_32x16 *fir) -{ - fir->mute = 1; -} - -static inline void fir_unmute(struct fir_state_32x16 *fir) -{ - fir->mute = 0; -} +void eq_fir_s32(struct fir_state_32x16 fir[], struct comp_buffer *source, + struct comp_buffer *sink, int frames, int nch); /* The next functions are inlined to optmize execution speed */ @@ -81,7 +66,7 @@ static inline void fir_part_32x16(int64_t *y, int taps, const int16_t c[], /* Data is Q8.24, coef is Q1.15, product is Q9.39 */ for (n = 0; n < taps; n++) { - *y += (int64_t) c[*ic] * d[*id]; + *y += (int64_t)c[*ic] * d[*id]; (*ic)++; (*id)--; } @@ -96,7 +81,7 @@ static inline int32_t fir_32x16(struct fir_state_32x16 *fir, int32_t x) int tmp_ri; /* Write sample to delay */ - fir->delay[fir->rwi] = x >> fir->in_shift; + fir->delay[fir->rwi] = x; /* Start FIR calculation. Calculate first number of taps possible to * calculate before circular wrap need. @@ -125,8 +110,8 @@ static inline int32_t fir_32x16(struct fir_state_32x16 *fir, int32_t x) /* Q9.39 -> Q9.24, saturate to Q8.24 */ y = sat_int32(y >> (15 + fir->out_shift)); - if (fir->mute) - return 0; - else - return (int32_t)y; + return (int32_t)y; } + +#endif +#endif diff --git a/src/audio/fir_config.h b/src/audio/fir_config.h new file mode 100644 index 000000000000..317e0bf2906a --- /dev/null +++ b/src/audio/fir_config.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017, Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Intel Corporation nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Seppo Ingalsuo + * Liam Girdwood + * Keyon Jie + */ + +#ifndef FIR_CONFIG_H + +/* Get platforms configuration */ +#include + +/* If next defines are set to 1 the EQ is configured automatically. Setting + * to zero temporarily is useful is for testing needs. + * Setting EQ_FIR_AUTOARCH to 0 allows to manually set the code variant. + */ +#define FIR_AUTOARCH 1 + +/* Force manually some code variant when EQ_FIR_AUTODSP is set to zero. These + * are useful in code debugging. + */ +#if FIR_AUTOARCH == 0 +#define FIR_GENERIC 0 +#define FIR_HIFIEP 0 +#define FIR_HIFI3 1 +#endif + +/* Select optimized code variant when xt-xcc compiler is used */ +#if FIR_AUTOARCH == 1 +#if defined __XCC__ +#include +#define FIR_GENERIC 0 +#if XCHAL_HAVE_HIFI2EP == 1 +#define FIR_HIFIEP 1 +#define FIR_HIFI3 0 +#endif +#if XCHAL_HAVE_HIFI3 == 1 +#define FIR_HIFI3 1 +#define FIR_HIFIEP 0 +#endif +#else +/* GCC */ +#define FIR_GENERIC 1 +#define FIR_HIFIEP 0 +#define FIR_HIFI3 0 +#endif +#endif + +#define FIR_CONFIG_H + +#endif From 57f186193affba86d1777e71e0243082fcab9a9c Mon Sep 17 00:00:00 2001 From: Seppo Ingalsuo Date: Fri, 3 Aug 2018 14:46:08 +0300 Subject: [PATCH 4/5] EQ FIR: HiFi3 optimized version Signed-off-by: Seppo Ingalsuo --- src/audio/fir_hifi3.c | 192 ++++++++++++++++++++++++++++++++ src/audio/fir_hifi3.h | 248 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 440 insertions(+) create mode 100644 src/audio/fir_hifi3.c create mode 100644 src/audio/fir_hifi3.h diff --git a/src/audio/fir_hifi3.c b/src/audio/fir_hifi3.c new file mode 100644 index 000000000000..dad79e0af768 --- /dev/null +++ b/src/audio/fir_hifi3.c @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2017, Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Intel Corporation nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Seppo Ingalsuo + */ + +#include +#include +#include +#include +#include +#include +#include "fir_config.h" + +#if FIR_HIFI3 + +#include +#include +#include "fir_hifi3.h" + +/* + * EQ FIR algorithm code + */ + +void fir_reset(struct fir_state_32x16 *fir) +{ + fir->mute = 1; + fir->length = 0; + fir->out_shift = 0; + fir->rwp = NULL; + fir->delay = NULL; + fir->delay_end = NULL; + fir->coef = NULL; + /* There may need to know the beginning of dynamic allocation after + * reset so omitting setting also fir->delay to NULL. + */ +} + +int fir_init_coef(struct fir_state_32x16 *fir, int16_t config[]) +{ + struct sof_eq_fir_coef_data *setup; + + /* The length is taps plus two since the filter computes two + * samples per call. Length plus one would be minimum but the add + * must be even. The even length is needed for 64 bit loads from delay + * lines with 32 bit samples. + */ + setup = (struct sof_eq_fir_coef_data *)config; + fir->mute = 0; + fir->rwp = NULL; + fir->taps = (int)setup->length; + fir->length = fir->taps + 2; + fir->out_shift = (int)setup->out_shift; + fir->coef = (ae_f16x4 *)&setup->coef[0]; + fir->delay = NULL; + fir->delay_end = NULL; + + /* Check FIR tap count for implementation specific constraints */ + if (fir->taps > SOF_EQ_FIR_MAX_LENGTH || fir->taps < 4) + return -EINVAL; + + if (fir->taps & 3) + return -EINVAL; + + return fir->length; +} + +void fir_init_delay(struct fir_state_32x16 *fir, int32_t **data) +{ + fir->delay = (ae_int32 *) *data; + fir->delay_end = fir->delay + fir->length; + fir->rwp = (ae_int32 *)(fir->delay + fir->length - 1); + *data += fir->length; /* Point to next delay line start */ +} + +void fir_get_lrshifts(struct fir_state_32x16 *fir, int *lshift, + int *rshift) +{ + if (fir->mute) { + *lshift = 0; + *rshift = 31; + } else { + *lshift = (fir->out_shift < 0) ? -fir->out_shift : 0; + *rshift = (fir->out_shift > 0) ? fir->out_shift : 0; + } +} + +/* For even frame lengths use FIR filter that processes two sequential + * sample per call. + */ +void eq_fir_2x_s32_hifi3(struct fir_state_32x16 fir[], + struct comp_buffer *source, struct comp_buffer *sink, + int frames, int nch) +{ + struct fir_state_32x16 *f; + int32_t *src = (int32_t *)source->r_ptr; + int32_t *snk = (int32_t *)sink->w_ptr; + int32_t *x0; + int32_t *y0; + int32_t *x1; + int32_t *y1; + int ch; + int i; + int rshift; + int lshift; + int shift; + int inc = nch << 1; + + for (ch = 0; ch < nch; ch++) { + /* Get FIR instance and get shifts to e.g. apply mute + * without overhead. + */ + f = &fir[ch]; + fir_get_lrshifts(f, &lshift, &rshift); + shift = lshift - rshift; + + /* Setup circular buffer for FIR input data delay */ + fir_hifi3_setup_circular(f); + + x0 = src++; + y0 = snk++; + for (i = 0; i < (frames >> 1); i++) { + x1 = x0 + nch; + y1 = y0 + nch; + fir_32x16_2x_hifi3(f, x0, x1, y0, y1, shift); + x0 += inc; + y0 += inc; + } + } +} + +/* FIR for any number of frames */ +void eq_fir_s32_hifi3(struct fir_state_32x16 fir[], struct comp_buffer *source, + struct comp_buffer *sink, int frames, int nch) +{ + struct fir_state_32x16 *f; + int32_t *src = (int32_t *)source->r_ptr; + int32_t *snk = (int32_t *)sink->w_ptr; + int32_t *x; + int32_t *y; + int ch; + int i; + int rshift; + int lshift; + int shift; + + for (ch = 0; ch < nch; ch++) { + /* Get FIR instance and get shifts to e.g. apply mute + * without overhead. + */ + f = &fir[ch]; + fir_get_lrshifts(f, &lshift, &rshift); + shift = lshift - rshift; + + /* Setup circular buffer for FIR input data delay */ + fir_hifi3_setup_circular(f); + + x = src++; + y = snk++; + for (i = 0; i < frames; i++) { + fir_32x16_hifi3(f, x, y, shift); + x += nch; + y += nch; + } + } +} + +#endif diff --git a/src/audio/fir_hifi3.h b/src/audio/fir_hifi3.h new file mode 100644 index 000000000000..20896f099e34 --- /dev/null +++ b/src/audio/fir_hifi3.h @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2017, Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Intel Corporation nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Seppo Ingalsuo + */ + +#ifndef FIR_HIFI3_H +#define FIR_HIFI3_H + +#include "fir_config.h" + +#if FIR_HIFI3 + +#include +#include +#include + +struct fir_state_32x16 { + ae_int32 *rwp; /* Circular read and write pointer */ + ae_int32 *delay; /* Pointer to FIR delay line */ + ae_int32 *delay_end; /* Pointer to FIR delay line end */ + ae_f16x4 *coef; /* Pointer to FIR coefficients */ + int mute; /* Set to 1 to mute EQ output, 0 otherwise */ + int taps; /* Number of FIR taps */ + int length; /* Number of FIR taps plus input length (even) */ + int in_shift; /* Amount of right shifts at input */ + int out_shift; /* Amount of right shifts at output */ +}; + +void fir_reset(struct fir_state_32x16 *fir); + +int fir_init_coef(struct fir_state_32x16 *fir, int16_t config[]); + +void fir_init_delay(struct fir_state_32x16 *fir, int32_t **data); + +void eq_fir_2x_s32_hifi3(struct fir_state_32x16 fir[], + struct comp_buffer *source, struct comp_buffer *sink, + int frames, int nch); + +void eq_fir_s32_hifi3(struct fir_state_32x16 fir[], struct comp_buffer *source, + struct comp_buffer *sink, int frames, int nch); + +/* The next trivial functions are inlined */ + +static inline void fir_mute(struct fir_state_32x16 *fir) +{ + fir->mute = 1; +} + +static inline void fir_unmute(struct fir_state_32x16 *fir) +{ + fir->mute = 0; +} + +/* Setup circular buffer for FIR input data delay */ +static inline void fir_hifi3_setup_circular(struct fir_state_32x16 *fir) +{ + AE_SETCBEGIN0(fir->delay); + AE_SETCEND0(fir->delay_end); +} + +void fir_get_lrshifts(struct fir_state_32x16 *fir, int *lshift, + int *rshift); + +/* The next functions are inlined to optmize execution speed */ + +/* HiFi EP has the follow number of reqisters that should not be exceeded + * 4x 56 bit registers in register file Q + * 8x 48 bit registers in register file P + */ + +static inline void fir_32x16_hifi3(struct fir_state_32x16 *fir, int32_t *x, + int32_t *y, int shift) +{ + /* This function uses + * 1x 56 bit registers Q, + * 4x 48 bit registers P + * 3x integers + * 2x address pointers, + */ + ae_f64 a; + ae_valign u; + ae_f32x2 data2; + ae_f16x4 coefs; + ae_f32x2 d0; + ae_f32x2 d1; + int i; + ae_int32 *dp = fir->rwp; + ae_int16x4 *coefp = (ae_int16x4 *)fir->coef; + const int taps_div_4 = fir->taps >> 2; + const int inc = sizeof(int32_t); + + /* Write sample to delay */ + AE_S32_L_XC((ae_int32)*x, fir->rwp, -sizeof(int32_t)); + + /* Prime the coefficients stream */ + u = AE_LA64_PP(coefp); + + /* Note: If the next function is converted to handle two samples + * per call the data load can be done with single instruction + * AE_LP24X2F_C(data2, dp, sizeof(ae_p24x2f)); + */ + a = AE_ZEROQ56(); + for (i = 0; i < taps_div_4; i++) { + /* Load four coefficients. Coef_3 contains tap h[n], + * coef_2 contains h[n+1], coef_1 contains h[n+2], and + * coef_0 contains h[n+3]; + */ + AE_LA16X4_IP(coefs, u, coefp); + + /* Load two data samples and pack to d0 to data2_h and + * d1 to data2_l. + */ + AE_L32_XC(d0, dp, inc); + AE_L32_XC(d1, dp, inc); + data2 = AE_SEL32_LL(d0, d1); + + /* Accumulate + * a += data2_h * coefs_3 + data2_l * coefs_2. The Q1.31 + * data and Q1.15 coefficients are used as 24 bits as + * Q1.23 values. + */ + AE_MULAAFD32X16_H3_L2(a, data2, coefs); + + /* Repeat the same for next two taps and increase coefp. + * a += data2_h * coefs_1 + data2_l * coefs_0. + */ + AE_L32_XC(d0, dp, inc); + AE_L32_XC(d1, dp, inc); + data2 = AE_SEL32_LL(d0, d1); + AE_MULAAFD32X16_H1_L0(a, data2, coefs); + //coefp += 4; + } + + /* Do scaling shifts and store sample. */ + a = AE_SLAA64S(a, shift); + AE_S32_L_I(AE_ROUND32F48SSYM(a), (ae_int32 *)y, 0); +} + +/* HiFi EP has the follow number of reqisters that should not be exceeded + * 4x 56 bit registers in register file Q + * 8x 48 bit registers in register file P + */ + +static inline void fir_32x16_2x_hifi3(struct fir_state_32x16 *fir, int32_t *x0, + int32_t *x1, int32_t *y0, int32_t *y1, + int shift) +{ + /* This function uses + * 2x 56 bit registers Q, + * 4x 48 bit registers P + * 3x integers + * 2x address pointers, + */ + ae_f64 a; + ae_f64 b; + ae_valign u; + ae_f32x2 d0; + ae_f32x2 d1; + ae_f16x4 coefs; + int i; + ae_f32x2 *dp; + ae_f16x4 *coefp = fir->coef; + const int taps_div_4 = fir->taps >> 2; + const int inc = 2 * sizeof(int32_t); + + /* Write samples to delay */ + AE_S32_L_XC((ae_int32)*x0, fir->rwp, -sizeof(int32_t)); + dp = (ae_f32x2 *)fir->rwp; + AE_S32_L_XC((ae_int32)*x1, fir->rwp, -sizeof(int32_t)); + + /* Note: If the next function is converted to handle two samples + * per call the data load can be done with single instruction + * AE_LP24X2F_C(data2, dp, sizeof(ae_p24x2f)); + */ + a = AE_ZERO64(); + b = AE_ZERO64(); + + /* Prime the coefficients stream */ + u = AE_LA64_PP(coefp); + + /* Load two data samples and pack to d0 to data2_h and + * d1 to data2_l. + */ + AE_L32X2_XC(d0, dp, inc); + for (i = 0; i < taps_div_4; i++) { + /* Load four coefficients. Coef_3 contains tap h[n], + * coef_2 contains h[n+1], coef_1 contains h[n+2], and + * coef_0 contains h[n+3]; + */ + AE_LA16X4_IP(coefs, u, coefp); + + /* Load two data samples. Upper part d1_h is x[n+1] and + * lower part d1_l is x[n]. + */ + AE_L32X2_XC(d1, dp, inc); + + /* Quad MAC (HH) + * b += d0_h * coefs_3 + d0_l * coefs_2 + * a += d0_l * coefs_3 + d1_h * coefs_2 + */ + AE_MULAFD32X16X2_FIR_HH(b, a, d0, d1, coefs); + d0 = d1; + + /* Repeat the same for next two taps and increase coefp. */ + AE_L32X2_XC(d1, dp, inc); + + /* Quad MAC (HL) + * b += d0_h * coefs_1 + d0_l * coefs_0 + * a += d0_l * coefs_1 + d1_h * coefs_0 + */ + AE_MULAFD32X16X2_FIR_HL(b, a, d0, d1, coefs); + d0 = d1; + } + + /* Do scaling shifts and store sample. */ + b = AE_SLAA64S(b, shift); + a = AE_SLAA64S(a, shift); + AE_S32_L_I(AE_ROUND32F48SSYM(b), (ae_int32 *)y1, 0); + AE_S32_L_I(AE_ROUND32F48SSYM(a), (ae_int32 *)y0, 0); +} + +#endif +#endif From 920dc98f95b5bbe3fc382b82390b2bc4ac63be0f Mon Sep 17 00:00:00 2001 From: Seppo Ingalsuo Date: Fri, 3 Aug 2018 14:46:33 +0300 Subject: [PATCH 5/5] EQ FIR: HiFiEP optimized version Signed-off-by: Seppo Ingalsuo --- src/audio/fir_hifi2ep.c | 189 ++++++++++++++++++++++++++++++ src/audio/fir_hifi2ep.h | 248 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 437 insertions(+) create mode 100644 src/audio/fir_hifi2ep.c create mode 100644 src/audio/fir_hifi2ep.h diff --git a/src/audio/fir_hifi2ep.c b/src/audio/fir_hifi2ep.c new file mode 100644 index 000000000000..77cdba2262a3 --- /dev/null +++ b/src/audio/fir_hifi2ep.c @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2017, Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Intel Corporation nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Seppo Ingalsuo + */ + +#include +#include +#include +#include +#include +#include +#include "fir_config.h" + +#if FIR_HIFIEP + +#include +#include +#include "fir_hifi2ep.h" + +/* + * EQ FIR algorithm code + */ + +void fir_reset(struct fir_state_32x16 *fir) +{ + fir->mute = 1; + fir->length = 0; + fir->out_shift = 0; + fir->rwp = NULL; + fir->delay = NULL; + fir->delay_end = NULL; + fir->coef = NULL; + /* There may need to know the beginning of dynamic allocation after + * reset so omitting setting also fir->delay to NULL. + */ +} + +int fir_init_coef(struct fir_state_32x16 *fir, int16_t config[]) +{ + struct sof_eq_fir_coef_data *setup; + + /* The length is taps plus two since the filter computes two + * samples per call. Length plus one would be minimum but the add + * must be even. The even length is needed for 64 bit loads from delay + * lines with 32 bit samples. + */ + setup = (struct sof_eq_fir_coef_data *)config; + fir->mute = 0; + fir->rwp = NULL; + fir->taps = (int)setup->length; + fir->length = fir->taps + 2; + fir->out_shift = (int)setup->out_shift; + fir->coef = (ae_p16x2s *)&setup->coef[0]; + fir->delay = NULL; + fir->delay_end = NULL; + + /* Check FIR tap count for implementation specific constraints */ + if (fir->taps > SOF_EQ_FIR_MAX_LENGTH || fir->taps < 4) + return -EINVAL; + + if (fir->taps & 3) + return -EINVAL; + + return fir->length; +} + +void fir_init_delay(struct fir_state_32x16 *fir, int32_t **data) +{ + fir->delay = (ae_p24f *) *data; + fir->delay_end = fir->delay + fir->length; + fir->rwp = (ae_p24x2f *)(fir->delay + fir->length - 1); + *data += fir->length; /* Point to next delay line start */ +} + +void fir_get_lrshifts(struct fir_state_32x16 *fir, int *lshift, + int *rshift) +{ + if (fir->mute) { + *lshift = 0; + *rshift = 31; + } else { + *lshift = (fir->out_shift < 0) ? -fir->out_shift : 0; + *rshift = (fir->out_shift > 0) ? fir->out_shift : 0; + } +} + +/* For even frame lengths use FIR filter that processes two sequential + * sample per call. + */ +void eq_fir_2x_s32_hifiep(struct fir_state_32x16 fir[], + struct comp_buffer *source, + struct comp_buffer *sink, + int frames, int nch) +{ + struct fir_state_32x16 *f; + int32_t *src = (int32_t *)source->r_ptr; + int32_t *snk = (int32_t *)sink->w_ptr; + int32_t *x0; + int32_t *y0; + int32_t *x1; + int32_t *y1; + int ch; + int i; + int rshift; + int lshift; + int inc = nch << 1; + + for (ch = 0; ch < nch; ch++) { + /* Get FIR instance and get shifts to e.g. apply mute + * without overhead. + */ + f = &fir[ch]; + fir_get_lrshifts(f, &lshift, &rshift); + + /* Setup circular buffer for FIR input data delay */ + fir_hifiep_setup_circular(f); + + x0 = src++; + y0 = snk++; + for (i = 0; i < (frames >> 1); i++) { + x1 = x0 + nch; + y1 = y0 + nch; + fir_32x16_2x_hifiep(f, x0, x1, y0, y1, lshift, rshift); + x0 += inc; + y0 += inc; + } + } +} + +/* FIR for any number of frames */ +void eq_fir_s32_hifiep(struct fir_state_32x16 fir[], struct comp_buffer *source, + struct comp_buffer *sink, int frames, int nch) +{ + struct fir_state_32x16 *f; + int32_t *src = (int32_t *)source->r_ptr; + int32_t *snk = (int32_t *)sink->w_ptr; + int32_t *x; + int32_t *y; + int ch; + int i; + int rshift; + int lshift; + + for (ch = 0; ch < nch; ch++) { + /* Get FIR instance and get shifts to e.g. apply mute + * without overhead. + */ + f = &fir[ch]; + fir_get_lrshifts(f, &lshift, &rshift); + + /* Setup circular buffer for FIR input data delay */ + fir_hifiep_setup_circular(f); + + x = src++; + y = snk++; + for (i = 0; i < frames; i++) { + fir_32x16_hifiep(f, x, y, lshift, rshift); + x += nch; + y += nch; + } + } +} + +#endif diff --git a/src/audio/fir_hifi2ep.h b/src/audio/fir_hifi2ep.h new file mode 100644 index 000000000000..f9aff3857140 --- /dev/null +++ b/src/audio/fir_hifi2ep.h @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2017, Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the Intel Corporation nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * Author: Seppo Ingalsuo + */ + +#ifndef FIR_HIFI2EP_H +#define FIR_HIFI2EP_H + +#include "fir_config.h" + +#if FIR_HIFIEP + +#include +#include +#include + +struct fir_state_32x16 { + ae_p24x2f *rwp; /* Circular read and write pointer */ + ae_p24f *delay; /* Pointer to FIR delay line */ + ae_p24f *delay_end; /* Pointer to FIR delay line end */ + ae_p16x2s *coef; /* Pointer to FIR coefficients */ + int mute; /* Set to 1 to mute EQ output, 0 otherwise */ + int taps; /* Number of FIR taps */ + int length; /* Number of FIR taps plus input length (even) */ + int in_shift; /* Amount of right shifts at input */ + int out_shift; /* Amount of right shifts at output */ +}; + +void fir_reset(struct fir_state_32x16 *fir); + +int fir_init_coef(struct fir_state_32x16 *fir, int16_t config[]); + +void fir_init_delay(struct fir_state_32x16 *fir, int32_t **data); + +void eq_fir_2x_s32_hifiep(struct fir_state_32x16 fir[], + struct comp_buffer *source, + struct comp_buffer *sink, + int frames, int nch); + +void eq_fir_s32_hifiep(struct fir_state_32x16 fir[], struct comp_buffer *source, + struct comp_buffer *sink, int frames, int nch); + +/* The next trivial functions are inlined */ + +static inline void fir_mute(struct fir_state_32x16 *fir) +{ + fir->mute = 1; +} + +static inline void fir_unmute(struct fir_state_32x16 *fir) +{ + fir->mute = 0; +} + +/* Setup circular buffer for FIR input data delay */ +static inline void fir_hifiep_setup_circular(struct fir_state_32x16 *fir) +{ + AE_SETCBEGIN0(fir->delay); + AE_SETCEND0(fir->delay_end); +} + +void fir_get_lrshifts(struct fir_state_32x16 *fir, int *lshift, + int *rshift); + +/* The next functions are inlined to optmize execution speed */ + +/* HiFi EP has the follow number of reqisters that should not be exceeded + * 4x 56 bit registers in register file Q + * 8x 48 bit registers in register file P + */ + +static inline void fir_32x16_hifiep(struct fir_state_32x16 *fir, int32_t *x, + int32_t *y, int lshift, int rshift) +{ + /* This function uses + * 1x 56 bit registers Q, + * 4x 48 bit registers P + * 3x integers + * 2x address pointers, + */ + ae_q56s a; + ae_p24x2f data2; + ae_p24x2f coef2; + ae_p24x2f d0; + ae_p24x2f d1; + int i; + ae_p24x2f *dp = fir->rwp; + ae_p16x2s *coefp = fir->coef; + const int taps_div_4 = fir->taps >> 2; + const int inc = sizeof(int32_t); + + /* Write sample to delay */ + a = AE_LQ32F_I((ae_q32s *)x, 0); + AE_SQ32F_C(a, (ae_q32s *)fir->rwp, -sizeof(int32_t)); + + /* Note: If the next function is converted to handle two samples + * per call the data load can be done with single instruction + * AE_LP24X2F_C(data2, dp, sizeof(ae_p24x2f)); + */ + a = AE_ZEROQ56(); + for (i = 0; i < taps_div_4; i++) { + /* Load two coefficients. Coef2_h contains tap coefp[n] + * and coef2_l contains coef[n+1]. + */ + coef2 = AE_LP16X2F_I(coefp, 0); + + /* Load two data samples and pack to d0 to data2_h and + * d1 to data2_l. + */ + AE_LP24F_C(d0, dp, inc); + AE_LP24F_C(d1, dp, inc); + data2 = AE_SELP24_LL(d0, d1); + + /* Accumulate + * data2_h * coef2_h + data2_l * coef2_l. The Q1.31 + * data and Q1.15 coefficients are used as 24 bits as + * Q1.23 values. + */ + AE_MULAAFP24S_HH_LL(a, data2, coef2); + + /* Repeat the same for next two taps and increase coefp. */ + coef2 = AE_LP16X2F_I(coefp, sizeof(ae_p16x2s)); + AE_LP24F_C(d0, dp, inc); + AE_LP24F_C(d1, dp, inc); + data2 = AE_SELP24_LL(d0, d1); + AE_MULAAFP24S_HH_LL(a, data2, coef2); + coefp += 2; + } + + /* Do scaling shifts and store sample. */ + a = AE_SRAAQ56(AE_SLLASQ56S(a, lshift), rshift); + AE_SQ32F_I(AE_ROUNDSQ32SYM(a), (ae_q32s *)y, 0); +} + +/* HiFi EP has the follow number of reqisters that should not be exceeded + * 4x 56 bit registers in register file Q + * 8x 48 bit registers in register file P + */ + +static inline void fir_32x16_2x_hifiep(struct fir_state_32x16 *fir, int32_t *x0, + int32_t *x1, int32_t *y0, int32_t *y1, + int lshift, int rshift) +{ + /* This function uses + * 2x 56 bit registers Q, + * 4x 48 bit registers P + * 3x integers + * 2x address pointers, + */ + ae_q56s a; + ae_q56s b; + ae_p24x2f d0; + ae_p24x2f d1; + ae_p24x2f d3; + ae_p24x2f coefs; + int i; + ae_p24x2f *dp; + ae_p16x2s *coefp = fir->coef; + const int taps_div_4 = fir->taps >> 2; + const int inc = 2 * sizeof(int32_t); + + /* Write samples to delay */ + a = AE_LQ32F_I((ae_q32s *)x0, 0); + AE_SQ32F_C(a, (ae_q32s *)fir->rwp, -sizeof(int32_t)); + a = AE_LQ32F_I((ae_q32s *)x1, 0); + dp = fir->rwp; + AE_SQ32F_C(a, (ae_q32s *)fir->rwp, -sizeof(int32_t)); + + /* Note: If the next function is converted to handle two samples + * per call the data load can be done with single instruction + * AE_LP24X2F_C(data2, dp, sizeof(ae_p24x2f)); + */ + a = AE_ZEROQ56(); + b = AE_ZEROQ56(); + /* Load two data samples and pack to d0 to data2_h and + * d1 to data2_l. + */ + AE_LP24X2F_C(d0, dp, inc); + for (i = 0; i < taps_div_4; i++) { + /* Load two coefficients. Coef2_h contains tap coefp[n] + * and coef2_l contains coef[n+1]. + */ + coefs = AE_LP16X2F_I(coefp, 0); + + /* Load two data samples. Upper part d1_h is x[n+1] and + * lower part d1_l is x[n]. + */ + AE_LP24X2F_C(d1, dp, inc); + + /* Accumulate + * b += d0_h * coefs_h + d0_l * coefs_l. The Q1.31 data + * and Q1.15 coefficients are converted to 24 bits as + * Q1.23 values. + */ + AE_MULAAFP24S_HH_LL(b, d0, coefs); + + /* Pack d0_l and d1_h to d3. Then accumulate + * a += d3_h * coefs_h + d3_l * coefs_l. Pass d1 to d1 for + * next unrolled iteration. + */ + d3 = AE_SELP24_LH(d0, d1); + AE_MULAAFP24S_HH_LL(a, d3, coefs); + d0 = d1; + + /* Repeat the same for next two taps and increase coefp. */ + coefs = AE_LP16X2F_I(coefp, sizeof(ae_p16x2s)); + AE_LP24X2F_C(d1, dp, inc); + AE_MULAAFP24S_HH_LL(b, d0, coefs); + d3 = AE_SELP24_LH(d0, d1); + AE_MULAAFP24S_HH_LL(a, d3, coefs); + d0 = d1; + coefp += 2; + } + + /* Do scaling shifts and store sample. */ + b = AE_SRAAQ56(AE_SLLASQ56S(b, lshift), rshift); + a = AE_SRAAQ56(AE_SLLASQ56S(a, lshift), rshift); + AE_SQ32F_I(AE_ROUNDSQ32SYM(b), (ae_q32s *)y1, 0); + AE_SQ32F_I(AE_ROUNDSQ32SYM(a), (ae_q32s *)y0, 0); +} + +#endif +#endif