Skip to content
This repository
tag: Version-3.5.1
Fetching contributors…

Cannot retrieve contributors at this time

file 390 lines (341 sloc) 12.669 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
/*
SC_fftlib.cpp
An interface to abstract over different FFT libraries, for SuperCollider 3.
(c) 2007-2008 Dan Stowell, incorporating code from
SuperCollider 3 Copyright (c) 2002 James McCartney.
All rights reserved.

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

NOTE:
vDSP uses a "SplitBuf" as an intermediate representation of the data.
For speed we keep this global, although this makes the code non-thread-safe.
(This is not new to this refactoring. Just worth noting.)
*/

#include "clz.h"
#include <stdlib.h>
#include <cmath>
#include <stdio.h>
#include <cstring>
#include <cassert>

#include "SC_fftlib.h"


// We include vDSP even if not using for FFT, since we want to use some vectorised add/mul tricks
#if defined(__APPLE__) && !defined(SC_IPHONE)
#include "vecLib/vDSP.h"
#elif defined(SC_IPHONE)
#include <Accelerate/Accelerate.h>
#endif

////////////////////////////////////////////////////////////////////////////////////////////////////
// Constants and structs

// Decisions here about which FFT library to use - vDSP only exists on Mac BTW.
// We include the relevant libs but also ensure that one, and only one, of them is active...
#if SC_FFT_NONE
// "SC_FFT_NONE" allows compilation without FFT support; only expected to be used on very limited platforms
#define SC_FFT_FFTW 0
#define SC_FFT_VDSP 0
#define SC_FFT_GREEN 0
#elif SC_FFT_GREEN
#define SC_FFT_FFTW 0
#define SC_FFT_VDSP 0
#define SC_FFT_GREEN 1
#elif !SC_FFT_FFTW && defined(__APPLE__)
#define SC_FFT_FFTW 0
#define SC_FFT_VDSP 1
#define SC_FFT_GREEN 0
#else
//#elif SC_FFT_FFTW
#define SC_FFT_FFTW 1
#define SC_FFT_VDSP 0
#define SC_FFT_GREEN 0
#include <fftw3.h>
#endif

// Note that things like *fftWindow actually allow for other sizes, to be created on user request.
#define SC_FFT_ABSOLUTE_MAXSIZE 2147483648
#define SC_FFT_LOG2_ABSOLUTE_MAXSIZE 31
#define SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1 32

// This struct is a bit like FFTW's idea of a "plan": it represents an FFT operation that may be applied once or repeatedly.
// It should be possible for indata and outdata to be the same, for quasi-in-place operation.
typedef struct scfft {
unsigned int nfull, nwin, log2nfull, log2nwin; // Lengths of full FFT frame, and the (possibly shorter) windowed data frame
short wintype;
float *indata, *outdata, *trbuf;
float scalefac; // Used to rescale the data to unity gain
#if SC_FFT_FFTW
fftwf_plan plan;
#endif
} scfft;


static float *fftWindow[2][SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1];

#if SC_FFT_VDSP
static FFTSetup fftSetup[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1]; // vDSP setups, one per FFT size
static COMPLEX_SPLIT splitBuf; // Temp buf for holding rearranged data
#endif

#if SC_FFT_GREEN
extern "C" {
#include "fftlib.h"
static float *cosTable[SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1];
}
#endif

#define pi 3.1415926535898f
#define twopi 6.28318530717952646f

////////////////////////////////////////////////////////////////////////////////////////////////////
// Functions

#if SC_FFT_GREEN
static float* create_cosTable(int log2n);
static float* create_cosTable(int log2n)
{
int size = 1 << log2n;
int size2 = size / 4 + 1;
float *win = (float*)malloc(size2 * sizeof(float));
double winc = twopi / size;
for (int i=0; i<size2; ++i) {
double w = i * winc;
win[i] = cos(w);
}
return win;
}
#endif

static float* scfft_create_fftwindow(int wintype, int log2n)
{
int size = 1 << log2n;
unsigned short i;
float *win = (float*)malloc(size * sizeof(float));

double winc;
switch(wintype) {
case kSineWindow:
winc = pi / size;
for (i=0; i<size; ++i) {
double w = i * winc;
win[i] = sin(w);
}
break;
case kHannWindow:
winc = twopi / size;
for (i=0; i<size; ++i) {
double w = i * winc;
win[i] = 0.5 - 0.5 * cos(w);
}
break;
}

return win;

}

static void scfft_ensurewindow(unsigned short log2_fullsize, unsigned short log2_winsize, short wintype);

static bool scfft_global_initialization (void)
{
for (int wintype=0; wintype<2; ++wintype) {
for (int i=0; i< SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1; ++i) {
fftWindow[wintype][i] = 0;
}
for (int i= SC_FFT_LOG2_MINSIZE; i < SC_FFT_LOG2_MAXSIZE+1; ++i) {
fftWindow[wintype][i] = scfft_create_fftwindow(wintype, i);
}
}
#if SC_FFT_GREEN
for (int i=0; i< SC_FFT_LOG2_ABSOLUTE_MAXSIZE_PLUS1; ++i) {
cosTable[i] = 0;
}
for (int i= SC_FFT_LOG2_MINSIZE; i < SC_FFT_LOG2_MAXSIZE+1; ++i) {
cosTable[i] = create_cosTable(i);
}
printf("SC FFT global init: cosTable initialised.\n");
#elif SC_FFT_VDSP
// vDSP inits its twiddle factors
for (int i= SC_FFT_LOG2_MINSIZE; i < SC_FFT_LOG2_MAXSIZE+1; ++i) {
fftSetup[i] = vDSP_create_fftsetup (i, FFT_RADIX2);
if(fftSetup[i] == NULL){
printf("FFT ERROR: Mac vDSP library could not allocate FFT setup for size %i\n", 1<<i);
}
}
// vDSP prepares its memory-aligned buffer for rearranging input data.
// Note max size here - meaning max input buffer size is these two sizes added together.
// vec_malloc used in API docs, but apparently that's deprecated and malloc is sufficient for aligned memory on OSX.
splitBuf.realp = (float*) malloc ( SC_FFT_MAXSIZE * sizeof(float) / 2);
splitBuf.imagp = (float*) malloc ( SC_FFT_MAXSIZE * sizeof(float) / 2);
//printf("SC FFT global init: vDSP initialised.\n");
#elif SC_FFT_FFTW
//printf("SC FFT global init: FFTW, no init needed.\n");
#endif
return false;
}

static bool dummy = scfft_global_initialization();

// You need to provide an intermediate "transform buffer". Size will depend on which underlying lib is being used.
// "fullsize" is the number of samples in the input buffer (inc any padding), aka the number of "points" in the FFT.
// Often in an SC plugin you can get this number from buf->samples if you're grabbing an external buffer.
// The input value is given in samples.
// The return value is given in bytes.
static size_t scfft_trbufsize(unsigned int fullsize)
{
#if SC_FFT_FFTW
// Transform buf is two floats "too big" because of FFTWF's output ordering
return (fullsize+2) * sizeof(float);
#else
// vDSP packs the nyquist in with the DC, so size is same as input buffer (plus zeropadding)
// Green does this too
return (fullsize ) * sizeof(float);
#endif
}


scfft * scfft_create(size_t fullsize, size_t winsize, SCFFT_WindowFunction wintype,
float *indata, float *outdata, SCFFT_Direction forward, SCFFT_Allocator & alloc)
{
char * chunk = (char*) alloc.alloc(sizeof(scfft) + scfft_trbufsize(fullsize));
if (!chunk)
return NULL;

scfft * f = (scfft*)chunk;
float *trbuf = (float*)(chunk + sizeof(scfft));

f->nfull = fullsize;
f->nwin = winsize;
f->log2nfull = LOG2CEIL(fullsize);
f->log2nwin = LOG2CEIL( winsize);
f->wintype = wintype;
f->indata = indata;
f->outdata = outdata;
f->trbuf = trbuf;

// Buffer is larger than the range of sizes we provide for at startup; we can get ready just-in-time though
if (fullsize > SC_FFT_MAXSIZE)
scfft_ensurewindow(f->log2nfull, f->log2nwin, wintype);

#if SC_FFT_FFTW
if(forward)
f->plan = fftwf_plan_dft_r2c_1d(fullsize, trbuf, (fftwf_complex*) trbuf, FFTW_ESTIMATE);
else
f->plan = fftwf_plan_dft_c2r_1d(fullsize, (fftwf_complex*) trbuf, outdata, FFTW_ESTIMATE);
#endif

// The scale factors rescale the data to unity gain. The old Green lib did this itself, meaning scalefacs would here be 1...
if(forward){
#if SC_FFT_VDSP
f->scalefac = 0.5f;
#else // forward FFTW and Green factor
f->scalefac = 1.f;
#endif
} else { // backward FFTW and VDSP factor
#if SC_FFT_GREEN
f->scalefac = 1.f;
#else // fftw, vdsp
f->scalefac = 1.f / fullsize;
#endif
}

memset(trbuf, 0, scfft_trbufsize(fullsize));

return f;
}

static int largest_log2n = SC_FFT_LOG2_MAXSIZE;

// check the global list of windows incs ours; create if not.
// Note that expanding the table, if triggered, will cause a CPU hit as things are malloc'ed, realloc'ed, etc.
void scfft_ensurewindow(unsigned short log2_fullsize, unsigned short log2_winsize, short wintype)
{
// Ensure we have enough space to do our calcs
if(log2_fullsize > largest_log2n){
largest_log2n = log2_fullsize;
#if SC_FFT_VDSP
size_t newsize = (1 << largest_log2n) * sizeof(float) / 2;
splitBuf.realp = (float*) realloc (splitBuf.realp, newsize);
splitBuf.imagp = (float*) realloc (splitBuf.imagp, newsize);
#endif
}

// Ensure our window has been created
if((wintype != -1) && (fftWindow[wintype][log2_winsize] == 0)){
fftWindow[wintype][log2_winsize] = scfft_create_fftwindow(wintype, log2_winsize);
}

// Ensure our FFT twiddle factors (or whatever) have been created
#if SC_FFT_VDSP
if(fftSetup[log2_fullsize] == 0)
fftSetup[log2_fullsize] = vDSP_create_fftsetup (log2_fullsize, FFT_RADIX2);
#elif SC_FFT_GREEN
if(cosTable[log2_fullsize] == 0)
cosTable[log2_fullsize] = create_cosTable(log2_fullsize);
#endif
}

// these do the main jobs.
// Note: you DON"T need to call the windowing function yourself, it'll be applied by the _dofft and _doifft funcs.
static void scfft_dowindowing(float *data, unsigned int winsize, unsigned int fullsize, unsigned short log2_winsize,
short wintype, float scalefac)
{
int i;
if (wintype != kRectWindow) {
float *win = fftWindow[wintype][log2_winsize];
if (!win) return;
#if SC_FFT_VDSP
vDSP_vmul(data, 1, win, 1, data, 1, winsize);
#else
--win;
float *in = data - 1;
for (i=0; i< winsize ; ++i) {
*++in *= *++win;
}
#endif

}

// scale factor is different for different libs. But the compiler switch here is about using vDSP's fast multiplication method.
#if SC_FFT_VDSP
vDSP_vsmul(data, 1, &scalefac, data, 1, winsize);
#else
for(int i=0; i<winsize; ++i){
data[i] *= scalefac;
}
#endif

// Zero-padding:
memset(data + winsize, 0, (fullsize - winsize) * sizeof(float));
}

void scfft_dofft(scfft * f)
{
// Data goes to transform buf
memcpy(f->trbuf, f->indata, f->nwin * sizeof(float));
scfft_dowindowing(f->trbuf, f->nwin, f->nfull, f->log2nfull, f->wintype, f->scalefac);
#if SC_FFT_FFTW
fftwf_execute(f->plan);
// Rearrange output data onto public buffer
memcpy(f->outdata, f->trbuf, f->nfull * sizeof(float));
f->outdata[1] = f->trbuf[f->nfull]; // Pack nyquist val in
#elif SC_FFT_VDSP
// Perform even-odd split
vDSP_ctoz((COMPLEX*) f->trbuf, 2, &splitBuf, 1, f->nfull >> 1);
// Now the actual FFT
vDSP_fft_zrip(fftSetup[f->log2nfull], &splitBuf, 1, f->log2nfull, FFT_FORWARD);
// Copy the data to the public output buf, transforming it back out of "split" representation
vDSP_ztoc(&splitBuf, 1, (DSPComplex*) f->outdata, 2, f->nfull >> 1);
#elif SC_FFT_GREEN
// Green FFT is in-place
rffts(f->trbuf, f->log2nfull, 1, cosTable[f->log2nfull]);
// Copy to public buffer
memcpy(f->outdata, f->trbuf, f->nfull * sizeof(float));
#endif
}

void scfft_doifft(scfft * f)
{
#if SC_FFT_FFTW
float *trbuf = f->trbuf;
size_t bytesize = f->nfull * sizeof(float);
memcpy(trbuf, f->indata, bytesize);
trbuf[1] = 0.f;
trbuf[f->nfull] = f->indata[1]; // Nyquist goes all the way down to the end of the line...
trbuf[f->nfull+1] = 0.f;

fftwf_execute(f->plan);
// NB the plan already includes copying data to f->outbuf

#elif SC_FFT_VDSP
vDSP_ctoz((COMPLEX*) f->indata, 2, &splitBuf, 1, f->nfull >> 1);
vDSP_fft_zrip(fftSetup[f->log2nfull], &splitBuf, 1, f->log2nfull, FFT_INVERSE);
vDSP_ztoc(&splitBuf, 1, (DSPComplex*) f->outdata, 2, f->nfull >> 1);
#elif SC_FFT_GREEN
float *trbuf = f->trbuf;
size_t bytesize = f->nfull * sizeof(float);
memcpy(trbuf, f->indata, bytesize);
// Green FFT is in-place
riffts(trbuf, f->log2nfull, 1, cosTable[f->log2nfull]);
// Copy to public buffer
memcpy(f->outdata, trbuf, f->nwin * sizeof(float));
#endif
scfft_dowindowing(f->outdata, f->nwin, f->nfull, f->log2nfull, f->wintype, f->scalefac);
}

void scfft_destroy(scfft *f, SCFFT_Allocator & alloc)
{
if (f == NULL)
return;
#if SC_FFT_FFTW
fftwf_destroy_plan(f->plan);
#endif
alloc.free(f);
}
Something went wrong with that request. Please try again.