diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bb2bcdea..a8eedf97 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -28,8 +28,6 @@ add_executable ( sync.c firdecim_q15.c - firdes_kaiser.c - resamp_q15.c math.c conv_dec.c diff --git a/src/acquire.c b/src/acquire.c index d71ce63a..36234480 100644 --- a/src/acquire.c +++ b/src/acquire.c @@ -27,16 +27,16 @@ void acquire_process(acquire_t *st) { float complex max_v = 0; float angle, max_mag = -1.0f; - unsigned int samperr = 0, i, j; + unsigned int samperr = 0, i, j, keep; unsigned int mink = 0, maxk = FFTCP; if (st->idx != FFTCP * (M + 1)) return; - if (st->ready && fabsf(st->slope) < 1 && st->samperr > 10) + if (st->input->sync.ready) { - mink = st->samperr - 10; - maxk = st->samperr + 10; + mink = FFTCP / 2 - 25; + maxk = FFTCP / 2 + 25; } memset(st->sums, 0, sizeof(float complex) * FFTCP); @@ -74,81 +74,30 @@ void acquire_process(acquire_t *st) angle = 0.5 * st->prev_angle + 0.5 * angle; } st->prev_angle = angle; - st->samperr = samperr; - // compare with previous timing offset - if (abs((int)samperr - (int)st->history[(st->history_size-1) % ACQ_HISTORY]) > FFT/2) + for (i = 0; i < M; ++i) { - // clear the history if we "rolled over" - st->history_size = 0; - } - - st->history[st->history_size % ACQ_HISTORY] = samperr; - if (++st->history_size > ACQ_HISTORY) - { - float avgerr, slope; - int sum = 0; - for (i = 0; i < ACQ_HISTORY; i++) - sum += st->history[i]; - avgerr = sum / (float)ACQ_HISTORY; - slope = ((float)samperr - avgerr) / (ACQ_HISTORY / 2 * SYMBOLS); - st->ready = 1; - st->slope = slope; - - if ((st->history_size % ACQ_HISTORY) == 0) - log_debug("Timing offset: %f, slope: %f", avgerr, slope); - - // avoid adjusting the rate too much - if (fabsf(slope) > 1.0) - { - log_info("Timing offset: %f, slope: %f (adjust)", avgerr, slope); - - input_rate_adjust(st->input, (-slope / BLKSZ) / FFTCP); - - // clear the history so we don't overadjust - st->history_size = 0; - } - // we don't want the samperr to go < 0 - else if (slope < 0) + int j; + for (j = 0; j < FFTCP; ++j) { - input_rate_adjust(st->input, (-slope / BLKSZ / 8) / FFTCP); - st->history_size = 0; + int n = i * FFTCP + j; + float complex sample = fast_cexpf(angle * n / FFT) * st->buffer[i * FFTCP + j + samperr]; + if (j < CP) + st->fftin[j] = st->shape[j] * sample; + else if (j < FFT) + st->fftin[j] = sample; + else + st->fftin[j - FFT] += st->shape[j] * sample; } - // skip samples instead of having samperr > FFT - // NB adjustment must be greater than FFT/2 - if (samperr > 7*FFT/8) - { - input_set_skip(st->input, 6*FFT/8); - st->samperr = 0; - } + fftwf_execute(st->fft); + fftshift(st->fftout, FFT); + sync_push(&st->input->sync, st->fftout); } - if (st->ready) - { - for (i = 0; i < M; ++i) - { - int j; - for (j = 0; j < FFTCP; ++j) - { - int n = i * FFTCP + j; - float complex sample = fast_cexpf(angle * n / FFT) * st->buffer[i * FFTCP + j + samperr]; - if (j < CP) - st->fftin[j] = st->shape[j] * sample; - else if (j < FFT) - st->fftin[j] = sample; - else - st->fftin[j - FFT] += st->shape[j] * sample; - } - - fftwf_execute(st->fft); - fftshift(st->fftout, FFT); - sync_push(&st->input->sync, st->fftout); - } - } - - memmove(&st->buffer[0], &st->buffer[st->idx - FFTCP], sizeof(float complex) * FFTCP); - st->idx = FFTCP; + keep = FFTCP * 3 / 2 - samperr; + memmove(&st->buffer[0], &st->buffer[st->idx - keep], sizeof(float complex) * keep); + st->idx = keep; } unsigned int acquire_push(acquire_t *st, float complex *buf, unsigned int length) @@ -172,9 +121,6 @@ void acquire_init(acquire_t *st, input_t *input) st->buffer = malloc(sizeof(float complex) * FFTCP * (M + 1)); st->sums = malloc(sizeof(float complex) * (FFTCP + CP)); st->idx = 0; - st->ready = 0; - st->samperr = 0; - st->slope = 0; st->prev_angle = 0; st->shape = malloc(sizeof(float) * FFTCP); @@ -189,10 +135,6 @@ void acquire_init(acquire_t *st, input_t *input) st->shape[i] = cosf(M_PI / 2 * (i - FFT) / CP); } - st->history_size = 0; - for (i = 0; i < ACQ_HISTORY; ++i) - st->history[i] = 0; - st->fftin = malloc(sizeof(float complex) * FFT); st->fftout = malloc(sizeof(float complex) * FFT); st->fft = fftwf_plan_dft_1d(FFT, st->fftin, st->fftout, FFTW_FORWARD, 0); diff --git a/src/acquire.h b/src/acquire.h index 246b9019..ac3388ef 100644 --- a/src/acquire.h +++ b/src/acquire.h @@ -3,8 +3,6 @@ #include #include -#define ACQ_HISTORY 16 - typedef struct { struct input_t *input; @@ -15,12 +13,7 @@ typedef struct float *shape; fftwf_plan fft; - float samperr; - float slope; unsigned int idx; - int ready; - int history[ACQ_HISTORY]; - unsigned int history_size; float prev_angle; } acquire_t; diff --git a/src/firdes.h b/src/firdes.h deleted file mode 100644 index 89a8ffd0..00000000 --- a/src/firdes.h +++ /dev/null @@ -1,3 +0,0 @@ -#pragma once - -void firdes_kaiser(unsigned int _n, float _fc, float _As, float _mu, float *_h); diff --git a/src/firdes_kaiser.c b/src/firdes_kaiser.c deleted file mode 100644 index 09ed0724..00000000 --- a/src/firdes_kaiser.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * From the liquid-dsp project. Original copyright is below. - * - * Copyright (c) 2007 - 2015 Joseph Gaeddert - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include -#include "firdes.h" - -static float lngammaf(float _z) -{ - float g; - if (_z < 10.0f) { - // Use recursive formula: - // gamma(z+1) = z * gamma(z) - // therefore: - // log(Gamma(z)) = log(gamma(z+1)) - ln(z) - return lngammaf(_z + 1.0f) - logf(_z); - } else { - // high value approximation - g = 0.5*( logf(2*M_PI)-log(_z) ); - g += _z*( logf(_z+(1/(12.0f*_z-0.1f/_z)))-1); - } - return g; -} - -// I_0(z) : Modified bessel function of the first kind (order zero) -#define NUM_BESSELI0_ITERATIONS 32 -static float besseli0f(float _z) -{ - // TODO : use better low-signal approximation - if (_z == 0.0f) - return 1.0f; - - unsigned int k; - float t, y=0.0f; - for (k=0; k 50.0f) - beta = 0.1102f*(_As - 8.7f); - else if (_As > 21.0f) - beta = 0.5842*powf(_As - 21, 0.4f) + 0.07886f*(_As - 21); - else - beta = 0.0f; - - return beta; -} - -void firdes_kaiser(unsigned int _n, - float _fc, - float _As, - float _mu, - float *_h) -{ - // choose kaiser beta parameter (approximate) - float beta = kaiser_beta_As(_As); - - float t, h1, h2; - unsigned int i; - for (i=0; i<_n; i++) { - t = (float)i - (float)(_n-1)/2 + _mu; - - // sinc prototype - h1 = sincf(2.0f*_fc*t); - - // kaiser window - h2 = kaiser(i,_n,beta,_mu); - - //printf("t = %f, h1 = %f, h2 = %f\n", t, h1, h2); - - // composite - _h[i] = h1*h2; - } -} diff --git a/src/input.c b/src/input.c index 5efdc534..ca1ffb8c 100644 --- a/src/input.c +++ b/src/input.c @@ -24,12 +24,6 @@ #define INPUT_BUF_LEN (2160 * 512) -#ifdef USE_FAST_MATH -#define RESAMP_NUM_TAPS 8 -#else -#define RESAMP_NUM_TAPS 16 -#endif - static float filter_taps[] = { #ifdef USE_FAST_MATH /* @@ -143,11 +137,6 @@ void input_pdu_push(input_t *st, uint8_t *pdu, unsigned int len, unsigned int pr output_push(st->output, pdu, len, program); } -void input_rate_adjust(input_t *st, float adj) -{ - st->resamp_rate += adj; -} - void input_set_skip(input_t *st, unsigned int skip) { st->skip += skip; @@ -264,7 +253,6 @@ void input_cb(uint8_t *buf, uint32_t len, void *arg) } } new_avail = st->avail; - resamp_q15_set_rate(st->resamp, st->resamp_rate); #ifdef USE_THREADS pthread_mutex_unlock(&st->mutex); #endif @@ -278,7 +266,6 @@ void input_cb(uint8_t *buf, uint32_t len, void *arg) for (i = 0; i < cnt; i++) { - unsigned int nw; cint16_t x[2], y; x[0].r = U8_Q15(buf[i * 4 + 0]); @@ -287,9 +274,7 @@ void input_cb(uint8_t *buf, uint32_t len, void *arg) x[1].i = -U8_Q15(buf[i * 4 + 3]); firdecim_q15_execute(st->filter, x, &y); - resamp_q15_execute(st->resamp, &y, &st->buffer[new_avail], &nw); - - new_avail += nw; + st->buffer[new_avail++] = y.r / 32768.0 + _Complex_I * y.i / 32768.0; } #ifdef USE_THREADS @@ -318,7 +303,6 @@ void input_reset(input_t *st) st->avail = 0; st->used = 0; st->skip = 0; - st->resamp_rate = 1.0f; st->cfo = 0; st->cfo_idx = 0; st->cfo_used = 0; @@ -339,7 +323,6 @@ void input_init(input_t *st, output_t *output, double center, unsigned int progr st->snr_cb_arg = NULL; st->filter = firdecim_q15_create(2, filter_taps, sizeof(filter_taps) / sizeof(filter_taps[0])); - st->resamp = resamp_q15_create(RESAMP_NUM_TAPS / 2, 0.45f, 60.0f, 16); st->snr_fft = fftwf_plan_dft_1d(64, st->snr_fft_in, st->snr_fft_out, FFTW_FORWARD, 0); input_reset(st); diff --git a/src/input.h b/src/input.h index ac71ba12..10b5925b 100644 --- a/src/input.h +++ b/src/input.h @@ -13,7 +13,6 @@ #include "firdecim_q15.h" #include "frame.h" #include "output.h" -#include "resamp_q15.h" #include "sync.h" typedef int (*input_snr_cb_t) (void *, float, float, float); @@ -24,8 +23,6 @@ typedef struct input_t FILE *outfp; firdecim_q15 filter; - resamp_q15 resamp; - float resamp_rate; float complex *buffer; double center; unsigned int avail, used, skip; @@ -55,7 +52,6 @@ typedef struct input_t void input_init(input_t *st, output_t *output, double center, unsigned int program, FILE *outfp); void input_cb(uint8_t *, uint32_t, void *); void input_set_snr_callback(input_t *st, input_snr_cb_t cb, void *); -void input_rate_adjust(input_t *st, float adj); void input_cfo_adjust(input_t *st, int cfo); void input_set_skip(input_t *st, unsigned int skip); void input_wait(input_t *st, int flush); diff --git a/src/resamp_q15.c b/src/resamp_q15.c deleted file mode 100644 index 31a45e7c..00000000 --- a/src/resamp_q15.c +++ /dev/null @@ -1,425 +0,0 @@ -#include "config.h" - -#include - -#ifdef HAVE_NEON -#include -#endif - -#ifdef HAVE_SSE2 -#include -#endif - -#include "firdes.h" -#include "resamp_q15.h" - -#ifdef USE_FAST_MATH -#define NUM_TAPS 8 -#else -#define NUM_TAPS 16 -#endif - -#define WINDOW_SIZE 2048 - -typedef struct { - int32_t r, i; -} cint32_t; - -static inline cint32_t cf_to_cq31(float complex x) -{ - cint32_t cq31; - cq31.r = crealf(x) * 2147483647.0f; - cq31.i = cimagf(x) * 2147483647.0f; - return cq31; -} - -static inline float complex cq31_to_cf(cint32_t cq31) -{ - return CMPLXF((float)cq31.r / 2147483647.0f, (float)cq31.i / 2147483647.0f); -} - -typedef struct { - // number of filters - unsigned int nf; - - // coefficients - int32_t * h; - unsigned int h_len; - unsigned int h_sub_len; - - // input window - cint32_t * window; - unsigned int idx; -} *firpfb_q31; - -typedef struct { - // number of filters - unsigned int nf; - - // coefficients - int16_t * h; - unsigned int h_len; - unsigned int h_sub_len; - - // input window - cint16_t * window; - unsigned int idx; -} *firpfb_q15; - -struct resamp_q15 { - // resampling properties/states - float rate; // resampling rate (ouput/input) - float del; // fractional delay step - - // floating-point phase - float tau; // accumulated timing phase, 0 <= tau < 1 - float bf; // soft filterbank index, bf = tau*npfb = b + mu - unsigned int b; // base filterbank index, 0 <= b < npfb - float mu; // fractional filterbank interpolation value, 0 <= mu < 1 - cint32_t y0; // filterbank output at index b - cint32_t y1; // filterbank output at index b+1 - - // polyphase filter bank - unsigned int npfb; - firpfb_q31 pfb; - - enum { - RESAMP_STATE_BOUNDARY, // boundary between input samples - RESAMP_STATE_INTERP, // regular interpolation - } state; -}; - -firpfb_q31 firpfb_q31_create(unsigned int nf, const float *_h, unsigned int h_len) -{ - firpfb_q31 q; - - q = malloc(sizeof(*q)); - q->nf = nf; - q->h_len = h_len; - q->h_sub_len = h_len / nf; - assert(q->h_sub_len * q->nf == q->h_len); - assert(q->h_sub_len == NUM_TAPS); - - q->h = malloc(sizeof(int32_t) * 2 * q->h_len); - q->window = calloc(sizeof(cint32_t), WINDOW_SIZE); - q->idx = q->h_sub_len - 1; - - // reverse order so we can push into the window - // duplicate for neon - for (unsigned int i = 0; i < nf; ++i) - { - for (unsigned int j = 0; j < q->h_sub_len; ++j) - { - q->h[(i * q->h_sub_len + j) * 2] = roundf(_h[(q->h_sub_len - 1 - j) * q->nf + i] * 2147483647.0); - q->h[(i * q->h_sub_len + j) * 2 + 1] = roundf(_h[(q->h_sub_len - 1 - j) * q->nf + i] * 2147483647.0); - } - } - - return q; -} - -void firpfb_q31_push(firpfb_q31 q, cint32_t x) -{ - if (q->idx == WINDOW_SIZE) - { - for (unsigned int i = 0; i < q->h_sub_len - 1; i++) - q->window[i] = q->window[q->idx - q->h_sub_len + i]; - q->idx = q->h_sub_len - 1; - } - q->window[q->idx++] = x; -} - -#if defined(HAVE_NEON) -static cint32_t dotprod_q31(cint32_t *a, int32_t *b, int n) -{ - int32x4_t s1 = vqrdmulhq_s32(vld1q_s32((int32_t *)&a[0]), vld1q_s32(&b[0*2])); - int32x4_t s2 = vqrdmulhq_s32(vld1q_s32((int32_t *)&a[2]), vld1q_s32(&b[2*2])); - int32x4_t s3 = vqrdmulhq_s32(vld1q_s32((int32_t *)&a[4]), vld1q_s32(&b[4*2])); - int32x4_t s4 = vqrdmulhq_s32(vld1q_s32((int32_t *)&a[6]), vld1q_s32(&b[6*2])); - int32x4_t sum = vqaddq_s32(vqaddq_s32(s1, s2), vqaddq_s32(s3, s4)); - -#if NUM_TAPS == 16 - s1 = vqrdmulhq_s32(vld1q_s32((int32_t *)&a[8]), vld1q_s32(&b[8*2])); - s2 = vqrdmulhq_s32(vld1q_s32((int32_t *)&a[10]), vld1q_s32(&b[10*2])); - s3 = vqrdmulhq_s32(vld1q_s32((int32_t *)&a[12]), vld1q_s32(&b[12*2])); - s4 = vqrdmulhq_s32(vld1q_s32((int32_t *)&a[14]), vld1q_s32(&b[14*2])); - sum = vqaddq_s32(vqaddq_s32(s1, s2), sum); - sum = vqaddq_s32(vqaddq_s32(s3, s4), sum); -#endif - - cint32_t result[2]; - vst1q_s32((int32_t*)&result, sum); - result[0].r += result[1].r; - result[0].i += result[1].i; - - return result[0]; -} -#elif defined(HAVE_SSE2) -static cint32_t dotprod_q31(cint32_t *a, int32_t *b, int n) -{ - float shiftf = (float)(1 << 31); - __m128 s1, s2, s3, s4, h1, h2, h3, h4, p1, p2, p3, p4, sum; - __m128 shift = _mm_load_ps1(&shiftf); - - s1 = _mm_cvtepi32_ps(_mm_loadu_si128((__m128i*)&a[0])); - s2 = _mm_cvtepi32_ps(_mm_loadu_si128((__m128i*)&a[2])); - s3 = _mm_cvtepi32_ps(_mm_loadu_si128((__m128i*)&a[4])); - s4 = _mm_cvtepi32_ps(_mm_loadu_si128((__m128i*)&a[6])); - h1 = _mm_cvtepi32_ps(_mm_loadu_si128((__m128i*)&b[0*2])); - h2 = _mm_cvtepi32_ps(_mm_loadu_si128((__m128i*)&b[2*2])); - h3 = _mm_cvtepi32_ps(_mm_loadu_si128((__m128i*)&b[4*2])); - h4 = _mm_cvtepi32_ps(_mm_loadu_si128((__m128i*)&b[6*2])); - s1 = _mm_div_ps(s1, shift); - s2 = _mm_div_ps(s2, shift); - s3 = _mm_div_ps(s3, shift); - s4 = _mm_div_ps(s4, shift); - h1 = _mm_div_ps(h1, shift); - h2 = _mm_div_ps(h2, shift); - h3 = _mm_div_ps(h3, shift); - h4 = _mm_div_ps(h4, shift); - p1 = _mm_mul_ps(s1, h1); - p2 = _mm_mul_ps(s2, h2); - p3 = _mm_mul_ps(s3, h3); - p4 = _mm_mul_ps(s4, h4); - sum = _mm_add_ps(_mm_add_ps(p1, p2), _mm_add_ps(p3, p4)); - -#if NUM_TAPS == 16 - s1 = _mm_cvtepi32_ps(_mm_loadu_si128((__m128i*)&a[8])); - s2 = _mm_cvtepi32_ps(_mm_loadu_si128((__m128i*)&a[10])); - s3 = _mm_cvtepi32_ps(_mm_loadu_si128((__m128i*)&a[12])); - s4 = _mm_cvtepi32_ps(_mm_loadu_si128((__m128i*)&a[14])); - h1 = _mm_cvtepi32_ps(_mm_loadu_si128((__m128i*)&b[8*2])); - h2 = _mm_cvtepi32_ps(_mm_loadu_si128((__m128i*)&b[10*2])); - h3 = _mm_cvtepi32_ps(_mm_loadu_si128((__m128i*)&b[12*2])); - h4 = _mm_cvtepi32_ps(_mm_loadu_si128((__m128i*)&b[14*2])); - s1 = _mm_div_ps(s1, shift); - s2 = _mm_div_ps(s2, shift); - s3 = _mm_div_ps(s3, shift); - s4 = _mm_div_ps(s4, shift); - h1 = _mm_div_ps(h1, shift); - h2 = _mm_div_ps(h2, shift); - h3 = _mm_div_ps(h3, shift); - h4 = _mm_div_ps(h4, shift); - p1 = _mm_mul_ps(s1, h1); - p2 = _mm_mul_ps(s2, h2); - p3 = _mm_mul_ps(s3, h3); - p4 = _mm_mul_ps(s4, h4); - sum = _mm_add_ps(_mm_add_ps(p1, p2), sum); - sum = _mm_add_ps(_mm_add_ps(p3, p4), sum); -#endif - - sum = _mm_mul_ps(sum, shift); - cint32_t result[2]; - _mm_storeu_si128((__m128i*)result, _mm_cvtps_epi32(sum)); - result[0].r += result[1].r; - result[0].i += result[1].i; - - return result[0]; -} -#else -static cint32_t dotprod_q31(cint32_t *a, int32_t *b, int n) -{ - float complex sum = 0; - for (int i = 0; i < n; ++i) - sum += cq31_to_cf(a[i]) * ((float)b[i * 2] / 2147483647.0f); - return cf_to_cq31(sum); -} -#endif - -void firpfb_q31_execute(firpfb_q31 q, unsigned int f, cint32_t *y) -{ - *y = dotprod_q31(&q->window[q->idx - q->h_sub_len], &q->h[f * 2 * q->h_sub_len], q->h_sub_len); -} - -firpfb_q15 firpfb_q15_create(unsigned int nf, const float *_h, unsigned int h_len) -{ - firpfb_q15 q; - - q = malloc(sizeof(*q)); - q->nf = nf; - q->h_len = h_len; - q->h_sub_len = h_len / nf; - assert(q->h_sub_len * q->nf == q->h_len); - assert(q->h_sub_len == 16); - - q->h = malloc(sizeof(int16_t) * 2 * q->h_len); - q->window = calloc(sizeof(cint16_t), WINDOW_SIZE); - q->idx = q->h_sub_len - 1; - - // reverse order so we can push into the window - // duplicate for neon - for (unsigned int i = 0; i < nf; ++i) - { - for (unsigned int j = 0; j < q->h_sub_len; ++j) - { - q->h[(i * q->h_sub_len + j) * 2] = _h[(q->h_sub_len - 1 - j) * q->nf + i] * 32767.0f; - q->h[(i * q->h_sub_len + j) * 2 + 1] = _h[(q->h_sub_len - 1 - j) * q->nf + i] * 32767.0f; - } - } - - return q; -} - -void firpfb_q15_push(firpfb_q15 q, cint16_t x) -{ - if (q->idx == WINDOW_SIZE) - { - for (unsigned int i = 0; i < q->h_sub_len - 1; i++) - q->window[i] = q->window[q->idx - q->h_sub_len+ i]; - q->idx = q->h_sub_len - 1; - } - q->window[q->idx++] = x; -} - -#ifdef HAVE_NEON -static cint16_t dotprod_q15(cint16_t *a, int16_t *b, int n) -{ - int16x8_t s1 = vqrdmulhq_s16(vld1q_s16((int16_t *)&a[0]), vld1q_s16(&b[0*2])); - int16x8_t s2 = vqrdmulhq_s16(vld1q_s16((int16_t *)&a[4]), vld1q_s16(&b[4*2])); - int16x8_t s3 = vqrdmulhq_s16(vld1q_s16((int16_t *)&a[8]), vld1q_s16(&b[8*2])); - int16x8_t s4 = vqrdmulhq_s16(vld1q_s16((int16_t *)&a[12]), vld1q_s16(&b[12*2])); - int16x8_t sum = vqaddq_s16(vqaddq_s16(s1, s2), vqaddq_s16(s3, s4)); - -#ifndef USE_FAST_MATH - s1 = vqrdmulhq_s16(vld1q_s16((int16_t *)&a[16]), vld1q_s16(&b[16*2])); - s2 = vqrdmulhq_s16(vld1q_s16((int16_t *)&a[20]), vld1q_s16(&b[20*2])); - s3 = vqrdmulhq_s16(vld1q_s16((int16_t *)&a[24]), vld1q_s16(&b[24*2])); - s4 = vqrdmulhq_s16(vld1q_s16((int16_t *)&a[28]), vld1q_s16(&b[28*2])); - sum = vqaddq_s16(vqaddq_s16(s1, s2), sum); - sum = vqaddq_s16(vqaddq_s16(s3, s4), sum); -#endif - - int16x4x2_t sum2 = vuzp_s16(vget_high_s16(sum), vget_low_s16(sum)); - int16x4_t sum3 = vpadd_s16(sum2.val[0], sum2.val[1]); - sum3 = vpadd_s16(sum3, sum3); - - cint16_t result[2]; - vst1_s16((int16_t*)&result, sum3); - - return result[0]; -} -#else -static cint16_t dotprod_q15(cint16_t *a, int16_t *b, int n) -{ - cint16_t sum = { 0 }; - for (int i = 0; i < n; ++i) - { - sum.r += (a[i].r * b[i * 2]) >> 15; - sum.i += (a[i].i * b[i * 2]) >> 15; - } - return sum; -} -#endif - -void firpfb_q15_execute(firpfb_q15 q, unsigned int f, cint16_t *y) -{ - *y = dotprod_q15(&q->window[q->idx - q->h_sub_len], &q->h[f * 2 * q->h_sub_len], q->h_sub_len); -} - -resamp_q15 resamp_q15_create(unsigned int m, float fc, float As, unsigned npfb) -{ - resamp_q15 q; - - q = malloc(sizeof(*q)); - q->rate = 1.0; - q->del = 1.0; - q->tau = 0; - q->b = 0; - q->mu = 0; - q->npfb = npfb; - q->state = RESAMP_STATE_INTERP; - - // design filter - unsigned int n = 2 * m * npfb + 1; - float hf[n]; - firdes_kaiser(n, fc / npfb, As, 0.0f, hf); - - // normalize filter coefficients by DC gain - float gain=0.0f; - for (unsigned int i = 0; i < n; ++i) - gain += hf[i]; - gain = q->npfb / gain; - - // apply gain - for (unsigned int i = 0; i < n; ++i) - hf[i] *= gain; - - // construct pfb - q->pfb = firpfb_q31_create(q->npfb, hf, n - 1); - - return q; -} - -void resamp_q15_set_rate(resamp_q15 q, float rate) -{ - // set internal rate - q->rate = rate; - - // set output stride - q->del = 1.0f / rate; -} - -static void update_timing_state(resamp_q15 q) -{ - // update high-resolution timing phase - q->tau += q->del; - - // convert to high-resolution filterbank index - q->bf = q->tau * (float)(q->npfb); - - // split into integer filterbank index and fractional interpolation - q->b = floorf(q->bf); // base index - q->mu = q->bf - (float)(q->b); // fractional index -} - -void resamp_q15_execute(resamp_q15 q, const cint16_t * _x, float complex * y, unsigned int * pn) -{ - cint32_t x; - x.r = (int32_t)_x->r << 16; - x.i = (int32_t)_x->i << 16; - // push new sample - firpfb_q31_push(q->pfb, x); - - // number of output samples - unsigned int n = 0; - - while (q->b < q->npfb) - { - if (q->state == RESAMP_STATE_INTERP) - { - // compute output at base index - firpfb_q31_execute(q->pfb, q->b, &q->y0); - - // check to see if base index is last filter in the bank - if (q->b == q->npfb - 1) - { - q->state = RESAMP_STATE_BOUNDARY; - q->b = q->npfb; - } - else - { - // compute output at incremented base index - firpfb_q31_execute(q->pfb, q->b + 1, &q->y1); - - // linear interpolation - y[n++] = (1.0f - q->mu)*cq31_to_cf(q->y0) + q->mu*cq31_to_cf(q->y1); - - update_timing_state(q); - } - } - else - { - firpfb_q31_execute(q->pfb, 0, &q->y1); - y[n++] = (1.0f - q->mu)*cq31_to_cf(q->y0) + q->mu*cq31_to_cf(q->y1); - update_timing_state(q); - q->state = RESAMP_STATE_INTERP; - } - } - - // decrement timing phase by one sample - q->tau -= 1.0f; - q->bf -= q->npfb; - q->b -= q->npfb; - - *pn = n; -} diff --git a/src/resamp_q15.h b/src/resamp_q15.h deleted file mode 100644 index 793f2b93..00000000 --- a/src/resamp_q15.h +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once - -#include "defines.h" - -typedef struct resamp_q15 * resamp_q15; - -resamp_q15 resamp_q15_create(unsigned int m, float fc, float As, unsigned npfb); -void resamp_q15_set_rate(resamp_q15 q, float rate); -void resamp_q15_execute(resamp_q15 q, const cint16_t * x, float complex * y, unsigned int * pn); diff --git a/src/sync.c b/src/sync.c index 23d382ad..c2aca40b 100644 --- a/src/sync.c +++ b/src/sync.c @@ -33,16 +33,15 @@ static void dump_ref(uint8_t *ref_buf) // log_debug("REF %08X", value); } -static void calc_phase(float complex *buf, unsigned int ref, float *out_phase, float *out_slope) +static void adjust_ref(float complex *buf, float *phases, unsigned int ref) { + // sync bits (after DBPSK) + static const signed char sync[] = { + -1, 1, -1, -1, -1, 1, 1 + }; float phase, slope; float complex sum; - sum = 0; - for (int r = 0; r < BLKSZ; r++) - sum += buf[ref * BLKSZ + r] * buf[ref * BLKSZ + r]; - phase = cargf(sum) * 0.5; - sum = 0; for (int r = 1; r < BLKSZ; r++) { @@ -51,26 +50,18 @@ static void calc_phase(float complex *buf, unsigned int ref, float *out_phase, f } slope = cargf(sum) * 0.5; - *out_phase = phase; - *out_slope = slope; -} - -static void adjust_ref(float complex *buf, float *phases, unsigned int ref) -{ - // sync bits (after DBPSK) - static const signed char sync[] = { - -1, 1, -1, -1, -1, 1, 1 - }; - float phase, slope; - calc_phase(buf, ref, &phase, &slope); - if (prev_slope[ref]) slope = slope * 0.1 + prev_slope[ref] * 0.9; prev_slope[ref] = slope; + sum = 0; + for (int r = 0; r < BLKSZ; r++) + sum += buf[ref * BLKSZ + r] * buf[ref * BLKSZ + r] * cexpf(-I * 2 * slope * r); + phase = cargf(sum) * 0.5; + for (int n = 0; n < BLKSZ; n++) { - float item_phase = phase + slope * (n - ((BLKSZ-1)/2)); + float item_phase = phase + slope * n; phases[ref * BLKSZ + n] = item_phase; buf[ref * BLKSZ + n] *= cexpf(-I * item_phase); }