Skip to content

Commit

Permalink
Merge pull request #91 from spokenlanguage/flake8
Browse files Browse the repository at this point in the history
fix flake8 layout warnings
  • Loading branch information
egpbos committed May 18, 2021
2 parents f607c46 + 896f0d2 commit 3800db6
Show file tree
Hide file tree
Showing 29 changed files with 182 additions and 147 deletions.
2 changes: 2 additions & 0 deletions platalea/asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def cost(self, item):

def experiment(net, data, config, slt=False):
_device = platalea.hardware.device()

def val_loss():
with torch.no_grad():
net.eval()
Expand Down Expand Up @@ -154,6 +155,7 @@ def val_loss():
torch.save(net, 'net.best.pt')
return results


def get_default_config(hidden_size_factor=1024):
fd = D.Flickr8KData
hidden_size = hidden_size_factor * 3 // 4
Expand Down
3 changes: 2 additions & 1 deletion platalea/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def forward(self, input):
# return the resulting embedding
return x


class MeanPool(nn.Module):
def __init__(self):
super(MeanPool, self).__init__()
Expand Down Expand Up @@ -75,7 +76,7 @@ def __init__(self, in_size_enc, in_size_state, hidden_size):
self.U_a = nn.Linear(in_size_enc, hidden_size, bias=False)
self.W_a = nn.Linear(in_size_state, hidden_size, bias=False)
self.v_a = nn.Linear(hidden_size, 1, bias=True)
self.prev_enc_out= None
self.prev_enc_out = None

def forward(self, hidden, encoder_outputs):
# Calculate energies for each encoder output
Expand Down
51 changes: 28 additions & 23 deletions platalea/audio/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,38 +5,40 @@
@author: danny
"""
from platalea.audio.preproc import four,pad,preemph, hamming, notch
from platalea.audio.filters import apply_filterbanks,filter_centers, create_filterbanks
from platalea.audio.preproc import four, pad, preemph, hamming, notch
from platalea.audio.filters import apply_filterbanks, filter_centers, create_filterbanks
from scipy.fftpack import dct
import numpy
import math

# this file contains the main bulk of the actuall feature creation functions

def delta (data, N):
# calculate delta features, n is the number of frames to look forward and backward

def delta(data, N):
# calculate delta features, n is the number of frames to look forward and backward

# create a delta array of the right shape
dt = numpy.zeros(data.shape)
# pad data with first and last frame for size of n
for n in range (N):
data = numpy.row_stack((data[0,:],data, data[-1,:]))
for n in range(N):
data = numpy.row_stack((data[0, :], data, data[-1, :]))
# calc n*c[x+n] + c[x-n] for n in Nand sum them
for n in range (1, N + 1):
dt += numpy.array([n * (data[x+n,:] - data[x-n,:]) for x in range (N, len(data) - N)])
for n in range(1, N + 1):
dt += numpy.array([n * (data[x+n, :] - data[x-n, :]) for x in range(N, len(data) - N)])
# normalise the deltas for the size of N
normalise = 2* sum([numpy.power(x,2) for x in range (1, N+1)])
normalise = 2 * sum([numpy.power(x, 2) for x in range(1, N+1)])

dt = dt/normalise

return (dt)


def raw_frames(data, frame_shift, window_size):
# this function cuts the data into frames and calculates each frames' accuracy
# this function cuts the data into frames and calculates each frames' accuracy

#determine the number of frames to be extracted
# determine the number of frames to be extracted
nframes = math.floor(data.size/frame_shift)
#apply notch filter
# apply notch filter
notched_data = notch(data)
# pad the data
data = pad(notched_data, window_size, frame_shift)
Expand All @@ -46,8 +48,8 @@ def raw_frames(data, frame_shift, window_size):
frames = []
energy = []

for f in range (0, nframes):
frame = data[f * frame_shift : f * frame_shift + window_size]
for f in range(0, nframes):
frame = data[(f * frame_shift):(f * frame_shift + window_size)]
energy.append(numpy.log(numpy.sum(numpy.square(frame), 0)))
frames.append(frame)

Expand All @@ -59,43 +61,46 @@ def raw_frames(data, frame_shift, window_size):

return (frames, energy)


def get_freqspectrum(frames, alpha, fs, window_size):
# this function prepares the raw frames for conversion to frequency spectrum
# and applies fft
# this function prepares the raw frames for conversion to frequency spectrum
# and applies fft

# apply preemphasis
frames = preemph(frames, alpha)
# apply hamming windowing
frames = hamming(frames)
# apply fft
freq_spectrum = four(frames,fs,window_size)
freq_spectrum = four(frames, fs, window_size)

return freq_spectrum


def get_fbanks(freq_spectrum, nfilters, fs):
# this function calculates the filters and creates filterbank features from
# the fft features
# this function calculates the filters and creates filterbank features from
# the fft features

# get the frequencies corresponding to the bins returned by the fft
xf = numpy.linspace(0.0, fs/2, numpy.shape(freq_spectrum)[1])
# get the filter frequencies
fc = filter_centers (nfilters,fs,xf)
fc = filter_centers(nfilters, fs, xf)
# create filterbanks
filterbanks = create_filterbanks(nfilters, xf, fc)
# apply filterbanks
fbanks = apply_filterbanks(freq_spectrum, filterbanks)

return fbanks


def get_mfcc(fbanks):
# this function creates mfccs from the fbank features
# this function creates mfccs from the fbank features

# apply discrete cosine transform to get mfccs. According to convention,
# we discard the first filterbank (which is roughly equal to the method
# where we only space filters from 1000hz onwards)
mfcc = dct(fbanks[:,1:])
mfcc = dct(fbanks[:, 1:])
# discard the first coefficient of the mffc as well and take the next 13
# coefficients.
mfcc = mfcc[:,1:13]
mfcc = mfcc[:, 1:13]

return mfcc
35 changes: 19 additions & 16 deletions platalea/audio/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,25 @@
from platalea.audio.melfreq import freq2mel, mel2freq
import numpy

def create_filterbanks (nfilters,freqrange,fc):

def create_filterbanks(nfilters, freqrange, fc):
# function to create filter banks. takes as input
# the number of filters to be created, the frequency range and the
# filter centers
filterbank = []
# for the desired # of filters do
for n in range (0,nfilters):
for n in range(0, nfilters):
# set the begin center and end frequency of the filters
begin = fc[n]
center= fc[n+1]
center = fc[n+1]
end = fc[n+2]
f = []
# create triangular filters
for x in freqrange:
# 0 for f outside the filter
if x < begin:
f.append(0)
#increasing to 1 towards the center
# increasing to 1 towards the center
elif begin <= x and x <= center:
f.append((x-begin)/(center-begin))
# decreasing to 0 upwards from the center
Expand All @@ -36,27 +37,29 @@ def create_filterbanks (nfilters,freqrange,fc):
elif x > end:
f.append(0)
filterbank.append(f)

return filterbank



def filter_centers(nfilters, fs, xf):
# calculates the center frequencies for the mel filters
#space the filters equally in mels

# space the filters equally in mels
spacing = numpy.linspace(0, freq2mel(fs/2), nfilters+2)
#back from mels to frequency
# back from mels to frequency
spacing = mel2freq(spacing)
# round the filter frequencies to the nearest availlable fft bin frequencies
# and return the centers for the filters.
filters = [xf[numpy.argmin(numpy.abs(xf-x))] for x in spacing]
# and return the centers for the filters.
filters = [xf[numpy.argmin(numpy.abs(xf-x))] for x in spacing]

return filters



def apply_filterbanks(data, filters):
# function to apply the filterbanks and take the log of the filterbanks
filtered_freq = numpy.log(numpy.dot(data, numpy.transpose(filters)))
filtered_freq = numpy.log(numpy.dot(data, numpy.transpose(filters)))
# same as with energy, taking the log of a filter bank with 0 power results in -inf
# we approximate 0 power with -50 the log of 2e-22
filtered_freq[filtered_freq == numpy.log(0)] = -50
filtered_freq[filtered_freq == numpy.log(0)] = -50

return filtered_freq
14 changes: 8 additions & 6 deletions platalea/audio/melfreq.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,16 @@
@author: danny
"""
import numpy
#provides simple functions to convert a frequency to mel and vice versa
# provides simple functions to convert a frequency to mel and vice versa


def freq2mel(f):
#converts a frequency to mel
mel=1125*numpy.log(1+f/700)
# converts a frequency to mel
mel = 1125*numpy.log(1+f/700)
return (mel)


def mel2freq(m):
#converts mel to frequency
f=700*(numpy.exp(m/1125)-1)
return (f)
# converts mel to frequency
f = 700*(numpy.exp(m/1125)-1)
return f
74 changes: 39 additions & 35 deletions platalea/audio/preproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,63 +10,67 @@
import numpy
# provides some basic preprocessing functions for audio files, such as
# padding the frames, hammingwindow for the frames, data preemphasis and fourrier
# transform
# transform


def four(frames, fs, windowsize):
# fft works on frames of size 2^x, first find the appropriate padsize for
# our framesize.
exp = 1
while True:
if numpy.power(2,exp) - windowsize >= 0:
padsize= numpy.power(2,exp) - windowsize
break
else:
exp += 1
# pad frames to be of size 2^x
frames = numpy.pad(frames, [(0,0), (0,padsize)], 'constant', constant_values = 0)
# set cutoff at the half the frame size (+1 to keep the bin around
# which the spectrum is mirrored)
cutoff = int((windowsize+padsize)/2)+1
# perform fast fourier transform
Y = fft(frames)
# take absolute power and collapse spectrum. Normalise the power for the
# amount of bins but multiply by 2 to make up for the collapse of the spectrum
Yamp = 2/(windowsize+padsize)* numpy.abs(Y[:, 0:cutoff])
# first amp (dc component) and nyquist freq bin are not to be doubled (as they
# are not mirrored in the fft)
Yamp[:,0] = Yamp[:,0]/2
Yamp[:,-1] = Yamp[:,-1]/2
return (Yamp)
# fft works on frames of size 2^x, first find the appropriate padsize for
# our framesize.
exp = 1
while True:
if numpy.power(2, exp) - windowsize >= 0:
padsize = numpy.power(2, exp) - windowsize
break
else:
exp += 1
# pad frames to be of size 2^x
frames = numpy.pad(frames, [(0, 0), (0, padsize)], 'constant', constant_values=0)
# set cutoff at the half the frame size (+1 to keep the bin around
# which the spectrum is mirrored)
cutoff = int((windowsize+padsize)/2)+1
# perform fast fourier transform
Y = fft(frames)
# take absolute power and collapse spectrum. Normalise the power for the
# amount of bins but multiply by 2 to make up for the collapse of the spectrum
Yamp = 2/(windowsize+padsize) * numpy.abs(Y[:, 0:cutoff])
# first amp (dc component) and nyquist freq bin are not to be doubled (as they
# are not mirrored in the fft)
Yamp[:, 0] = Yamp[:, 0]/2
Yamp[:, -1] = Yamp[:, -1]/2
return (Yamp)


def notch(data):
# apply a notch filter to remove the DC offset
# apply a notch filter to remove the DC offset
b, a = iirnotch(0.001, 3.5)
notched = lfilter(b, a, data)
return notched

def pad (data,window_size, frame_shift):


def pad(data, window_size, frame_shift):
# function to pad the audio file to fit the frameshift
context_size = (window_size-frame_shift)/2
pad_size = context_size - numpy.mod(data.size, frame_shift)
pad_size = context_size - numpy.mod(data.size, frame_shift)
# if needed add padding to the end of the data
if pad_size > 0:
data = numpy.append(data, numpy.zeros(int(numpy.ceil(pad_size))))
#always add padding to the front of the data
# always add padding to the front of the data
data = numpy.append(numpy.zeros(int(context_size)), data)
return(data)



def preemph(data, alpha):
# preemphasises the data: x(preemph) = X(t) - X(t-1)*alpha
xt = data
xtminus1 = data*alpha
xtminus1 = numpy.insert(xtminus1,0,0,1)[:,:-1]
data_preemph = xt-xtminus1
xtminus1 = numpy.insert(xtminus1, 0, 0, 1)[:, :-1]
data_preemph = xt-xtminus1
return data_preemph



def hamming(data):
# apply hamming windowing to a frame of data
L = numpy.shape(data)[1]
hammingwindow = 0.54-(0.46*numpy.cos(2*numpy.pi*numpy.arange(L)/(L-1)))
data = numpy.multiply(data,hammingwindow)
data = numpy.multiply(data, hammingwindow)
return data
Loading

0 comments on commit 3800db6

Please sign in to comment.