/
audioFeatures.py
153 lines (143 loc) · 4.71 KB
/
audioFeatures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
"""
--------------------
audioFeatures.py
--------------------
A set of functions for extracting audio features
--------------------
"""
from scipy import *
import numpy as np
from preProcessing import *
def RMS(x):
"""
----------------------
A function calculating the root means square of an array of data
----------------------
Variables :
----------------------
x : the input array of data
----------------------
Returns
----------------------
the RMS of x
----------------------
"""
total = 0
for i in range(len(x) - 1):
total = x[i] + total
return (total/len(x))**(0.5)
def threeBandRMS(X):
"""
-----------------------
function that splits FFT data into three equal chunks and compares the RMS values of the
chunks returning 9 comparison features
-----------------------
Variables :
-----------------------
X : input array of FFT data
-----------------------
Outputs :
-----------------------
low : the amount of energy observed in the lower third frequency bins
mid : the amount of energy observed in the middle third frequency bins
high : the amount of energy observed in the higher third frequency bins
LV : ratio of energy contained in the lower third compared to other bins
MV : ratio of energy contained in the middle third compared to other bins
HV : ratio of energy contained in the higher third compared to other bins
LVM : lower third vs middle third
LVH : lower third vs higher third
MVH : middle third vs higher third
-----------------------
"""
low = RMS(X[:len(X)//3])
mid = RMS(X[len(X)//3:(len(X)//3)*2])
high = RMS(X[(len(X)//3)*2:])
#one band vs energy of other two bands
LV = low / ((mid + high)/2)
MV = mid / ((low + high)/2)
HV = high / ((mid + low)/2)
#one band vs energy of other specific band
LVM = low / mid
LVH = low / high
MVH = mid / high
#return the nine features from three band RMS
return low, mid, high, LV, MV, HV, LVM, LVH, MVH
def spectralFlux(x, hop=None):
"""
----------------------
A function for detecting the amount of relitive energy change in a signal
----------------------
Variables :
----------------------
x : the input array of data
hop : the number of samples to analyze at once
----------------------
Returns
----------------------
flux : the spectral flux of the signal
----------------------
"""
if (hop == None):
hop = 100
x = halfWave(x)
#smooth out the data using the envelope function
flux = envelope(x, hop)
aver = flux[0]
for i in range (hop, len(x) - hop, hop):
flux[i:i+hop] = np.abs(flux[i] - flux[i+hop])
for i in range (len(flux)):
aver = aver + flux[i]
return (aver/(len(flux)))
def zeroCrossings(x, srate = 44100):
"""
----------------------
function that returns the zero crossing rate of an array of audio data
as well as an array corrisponding to zero crossing points
----------------------
Variables :
----------------------
x : incoming array of audio data (or data in general
srate : the sample rate of the incoming data
----------------------
Returns
----------------------
crossings : an np array where 1's populate points of zero crossing and 0's are all other samples
crossingRate : the number of crossings a second
----------------------
"""
#used to keep track of number of overall crossings
crossingCount = 0
crossings = np.zeros(len(x))
for i in range (1, len(x)):
if ((x[i] > 0) & (x[i-1] <= 0)):
crossings[i] = 1
crossingCount = crossingCount + 1
if ((x[i] < 0) & (x[i-1] > 0)):
crossings[i] = 1
crossingCount = crossingCount + 1
crossingRate = (crossingCount*len(x)/srate/2)
return crossings, crossingRate
def envelope(x, binSize=None):
"""
----------------------
function for determining the overall amplitude envelope of a signal
----------------------
Variables :
----------------------
x : incoming array of audio data (or data in general
binSize : the amount of samples processed at a time
----------------------
Returns
----------------------
x : an array containing the envelope data
"""
if (binSize == None):
binSize = 2200
for i in range(0,len(x)-binSize,binSize):
maxi = 0.0
for j in range (i,i+binSize):
if (x[j] > maxi):
maxi = x[j]
for i in range(i,i+binSize):
x[i] = maxi
return x