/
AudioProcessor.java
256 lines (233 loc) · 6.99 KB
/
AudioProcessor.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
package edu.mit.d54.plugins.audio;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.LineUnavailableException;
import javax.sound.sampled.TargetDataLine;
import edu.emory.mathcs.jtransforms.fft.FloatFFT_1D;
/**
* The AudioProcessor reads data from the computer's line input to facilitate audio visualization plugins.
* A plugin should call frameUpdate on every frame to refresh the AudioProcessor with new data. The raw
* audio samples as well as FFT magnitude are available via class methods.
*/
public class AudioProcessor {
private static final int FFT_LEN=2048;
private static final float SAMPLE_RATE=44100;
private static TargetDataLine line;
private AudioInputStream input;
private final int fftNumBins;
private final double fftMaxFreq;
private final boolean fftBinLog;
private final double fftScaleDecay;
private final double freqScalePower;
private byte[] frameRaw=new byte[FFT_LEN*2];
private int[] frameSamples=new int[FFT_LEN];
private float[] fftMag;
private float[] fftMagBinned;
private float fftMaxValue;
static
{
try {
line=AudioSystem.getTargetDataLine(
new AudioFormat(AudioFormat.Encoding.PCM_SIGNED,SAMPLE_RATE,16,1,2,SAMPLE_RATE,false));
} catch (LineUnavailableException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
/**
* Create a new AudioProcessor around the line in device.
* @param fftNumBins Number of FFT bins to provide
* @param fftMaxFreq Maximum audio frequency (in Hz) to return through FFT
* @param fftBinLog Scale the FFT bins to equal width in log frequency. Otherwise, FFT bins
* will be equal width in frequency.
* @param fftScaleDecay The exponential rate (ratio per frame) that the peak FFT amplitude is
* reduced. This should be a number between 0 and 1.
* @param freqScalePower The exponential rate to reduce effective amplitude versus frequency.
* Amplitude is multipled by (freq^^freqScalePower). Typically this is used to represent the
* deemphasize bass versus the middle and high frequencies which are perceived to be louder.
*/
public AudioProcessor(int fftNumBins, double fftMaxFreq, boolean fftBinLog, double fftScaleDecay, double freqScalePower)
{
this.fftNumBins=fftNumBins;
this.fftMaxFreq=fftMaxFreq;
this.fftBinLog=fftBinLog;
this.fftScaleDecay=fftScaleDecay;
this.freqScalePower=freqScalePower;
}
/**
* Create a new AudioProcessor around the line in device with generally useful defaults.
* fftMaxFreq is set to 3500 Hz, fftBinLog is true, fftScaleDecay is 0.998, and freqScalePower
* is 0.125.
* @param fftNumBins Number of FFT bins to provide
*/
public AudioProcessor(int fftNumBins)
{
this(fftNumBins,3500,true,0.998,0.125); //was 0.25
}
/**
* @return the raw audio samples from the current frame.
*/
public int[] getFrameSamples()
{
return frameSamples;
}
/**
* Get the FFT magnitude bins from the current frame. The size of this array is determined
* by the fftNumBins parameter. The frequency width and amplitude of the bins is affected by
* the fftMaxFreq, fftBinLog, and freqScalePower parameters.
* @return the FFT magnitude bins from the current frame.
*/
public float[] getFFTMagBins()
{
return fftMagBinned;
}
/**
* Get the peak value seen by the FFT in the current frame, or the decaying previous higher peak.
* The rate at which the previous peak decays is determined by fftScaleDecay.
* @return the FFT peak value
*/
public float getFFTMaxValue()
{
return fftMaxValue;
}
/**
* Open the audio channel so data can be captured. This must be called once before frameUpdate or the
* data accessors are called.
*/
public void openChannel()
{
try
{
line.open();
} catch (LineUnavailableException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
line.start();
line.drain();
input=new AudioInputStream(line);
}
/**
* Update the AudioProcessor with new sample data. This method should be called every time the
* DisplayPlugin is updated and before the audio data is accessed.
*/
public void frameUpdate()
{
try
{
while (input.available()>frameRaw.length) //flush the stream
{
byte[] flush=new byte[2];
input.read(flush);
}
int avail=input.available();
// System.out.println("Reading "+avail);
if (frameRaw.length>avail)
{
System.arraycopy(frameRaw, avail, frameRaw, 0, frameRaw.length-avail);
input.read(frameRaw,frameRaw.length-avail,avail);
}
else
{
input.read(frameRaw,0,frameRaw.length);
}
ByteBuffer bb=ByteBuffer.wrap(frameRaw);
bb.order(ByteOrder.LITTLE_ENDIAN);
float[] fft=new float[FFT_LEN];
for (int i=0; i<fft.length; i++)
{
frameSamples[i]=bb.getShort(i*2);
fft[i]=frameSamples[i];
}
FloatFFT_1D fftOp=new FloatFFT_1D(FFT_LEN);
fftOp.realForward(fft);
//get fft bins
fftMag=fft2mag(fft);
if (fftBinLog)
fftMagBinned=rebinLog(fftMag,fftMaxFreq,fftNumBins,freqScalePower);
else
fftMagBinned=rebin(fftMag,fftMaxFreq,fftNumBins,freqScalePower);
for (int i=0; i<fftMagBinned.length; i++)
{
fftMaxValue=Math.max(fftMaxValue,fftMagBinned[i]);
}
fftMaxValue*=fftScaleDecay;
// System.out.println("max="+fftMaxValue);
}
catch (IOException e)
{
e.printStackTrace();
throw new RuntimeException(e);
}
}
private static float[] fft2mag(float[] data)
{
float[] ret=new float[(data.length/2)-1];
for (int i=0; i<ret.length; i++)
{
float a=data[2*i+2];
float b=data[2*i+3];
ret[i]=(float)Math.sqrt(a*a+b*b);
}
return ret;
}
private static float[] rebin(float[] data, double maxFreq, int nbins, double freqScalePwr)
{
int count=freqToIndex(maxFreq);
float[] ret=new float[nbins];
int high=0;
int low=0;
for (int i=0; i<nbins; i++)
{
low=high;
high=(count*(i+1))/nbins;
float val=0;
for (int j=low; j<high; j++)
{
val+=data[j]*Math.pow(indexToFreq(j), freqScalePwr);
}
ret[i]=val/(high-low);
}
return ret;
}
private static float[] rebinLog(float[] data, double maxFreq, int nbins, double freqScalePwr)
{
int count=freqToIndex(maxFreq);
float[] ret=new float[nbins];
int high=0;
int low=0;
for (int i=0; i<nbins; i++)
{
low=high;
high=(int)Math.round(count*Math.expm1((i+1.0)/nbins)/(Math.E-1));
// System.out.println("freq bin "+high+" freq "+((high*1.0/FFT_LEN)*SAMPLE_RATE));
float val=0;
for (int j=low; j<high; j++)
{
val+=data[j]*Math.pow(indexToFreq(j), freqScalePwr);
}
ret[i]=val/(high-low);
}
return ret;
}
private static double indexToFreq(int index)
{
return (index*1.0/FFT_LEN)*SAMPLE_RATE;
}
private static int freqToIndex(double freq)
{
return (int)Math.round(freq/SAMPLE_RATE*FFT_LEN);
}
/**
* Stop collecting audio data from the line. After this is called, openChannel must be called before
* audio data can be used again.
*/
public void closeChannel()
{
line.stop();
}
}