In [None]:
# Common imports
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from JSAnimation import IPython_display
from skspeech.synthesis import kroger as kr

import nengo
import nengo.utils.numpy as npext
import nengo_gui.ipython

#  Recognition system

## Auditory periphery

Making heavy use of [Brian hears](http://www.briansimulator.org/docs/hears.html),
but should also investigate other periphery models.

In [None]:
import brian_no_units  # For speed
import brian as br
import brian.hears as bh

In [None]:
def whitenoise_sound():
    sound = bh.whitenoise(100*br.ms).ramp()
    sound.level = 50*bh.dB
    return sound

def cochleogram(gt_mon):
    plt.imshow(gt_mon.T, aspect='auto', origin='lower left',
               extent=(0, sound.duration/br.ms,
                       center_frequencies[0], center_frequencies[-1]))
    plt.yscale('log')
    plt.title('Cochleogram')
    plt.ylabel('Frequency (Hz)')
    plt.xlabel('Time (ms)')


sound = whitenoise_sound()

In [None]:
# Gammatone
nbr_center_frequencies = 50
b1 = 1.019  # factor determining the time constant of the filters
# center frequencies with a spacing following an ERB scale
center_frequencies = bh.erbspace(100*br.Hz, 1000*br.Hz, nbr_center_frequencies)
gammatone = bh.Gammatone(sound, center_frequencies, b=b1)

gt_mon = gammatone.process()
cochleogram(gt_mon)

In [None]:
# Approximate Gammatone
nbr_center_frequencies = 50  # number of frequency channels in the filterbank
# center frequencies with a spacing following an ERB scale
center_frequencies = bh.erbspace(100*br.Hz, 1000*br.Hz, nbr_center_frequencies)
# bandwidth of the filters (different in each channel)
bw = 10**(0.037+0.785*np.log10(center_frequencies))

gammatone = bh.ApproximateGammatone(sound, center_frequencies, bw, order=3)

gt_mon = gammatone.process()
cochleogram(gt_mon)

In [None]:
# Log Gammachirp
nbr_center_frequencies = 50  #number of frequency channels in the filterbank
c1 = -2.96 # glide slope
b1 = 1.81  # factor determining the time constant of the filters
#center frequencies with a spacing following an ERB scale
cf = bh.erbspace(100*br.Hz, 1000*br.Hz, nbr_center_frequencies)

gamma_chirp = bh.LogGammachirp(sound, cf, c=c1, b=b1)
gamma_chirp_mon = gamma_chirp.process()
cochleogram(gamma_chirp_mon)

In [None]:
# Linear Gammachirp
nbr_center_frequencies = 10
# center frequencies with a spacing following an ERB scale
center_frequencies = bh.erbspace(100*br.Hz, 1000*br.Hz, nbr_center_frequencies)

c = 0.0 # glide slope
time_constant = np.linspace(3, 0.3, nbr_center_frequencies) * br.ms

gamma_chirp = bh.LinearGammachirp(sound, center_frequencies, time_constant, c)
gamma_chirp_mon = gamma_chirp.process()
cochleogram(gamma_chirp_mon)

In [None]:
# Tan & Carney
bh.set_default_samplerate(50*br.kHz)
sample_length = 1 / bh.get_samplerate(None)
cf = 1000 * br.Hz

print 'Testing click response'
duration = 25 * br.ms
levels = [40, 60, 80, 100, 120]
# a click of two samples
tones = bh.Sound([bh.Sound.sequence([bh.click(sample_length*2, peak=level*bh.dB),
                                     bh.silence(duration=duration - sample_length)])
                  for level in levels])
ihc = bh.TanCarney(bh.MiddleEar(tones), [cf] * len(levels), update_interval=1)
syn = bh.ZhangSynapse(ihc, cf)
s_mon = br.StateMonitor(syn, 's', record=True, clock=syn.clock)
R_mon = br.StateMonitor(syn, 'R', record=True, clock=syn.clock)
spike_mon = br.SpikeMonitor(syn)
net = br.Network(syn, s_mon, R_mon, spike_mon)
net.run(duration * 1.5)

for idx, level in enumerate(levels):
    plt.figure(1)
    plt.subplot(len(levels), 1, idx + 1)
    plt.plot(s_mon.times / br.ms, s_mon[idx])
    plt.xlim(0, 25)
    plt.xlabel('Time (msec)')
    plt.ylabel('Sp/sec')
    plt.text(15, np.nanmax(s_mon[idx])/2., 'Peak SPL=%s SPL' % str(level*bh.dB));
    ymin, ymax = plt.ylim()
    if idx == 0:
        plt.title('Click responses')

    plt.figure(2)
    plt.subplot(len(levels), 1, idx + 1)
    plt.plot(R_mon.times / br.ms, R_mon[idx])
    plt.xlabel('Time (msec)')
    plt.xlabel('Time (msec)')
    plt.text(15, np.nanmax(s_mon[idx])/2., 'Peak SPL=%s SPL' % str(level*bh.dB));
    plt.ylim(ymin, ymax)
    if idx == 0:
        plt.title('Click responses (with spikes and refractoriness)')
    plt.plot(spike_mon.spiketimes[idx] / br.ms,
         np.ones(len(spike_mon.spiketimes[idx])) * np.nanmax(R_mon[idx]), 'rx')

print 'Testing tone response'
br.reinit_default_clock()
duration = 60*br.ms
levels = [0, 20, 40, 60, 80]
tones = bh.Sound([bh.Sound.sequence([bh.tone(cf, duration).atlevel(level*bh.dB).ramp(when='both',
                                                                                     duration=10*br.ms,
                                                                                     inplace=False),
                                     bh.silence(duration=duration/2)])
                  for level in levels])
ihc = bh.TanCarney(bh.MiddleEar(tones), [cf] * len(levels), update_interval=1)
syn = bh.ZhangSynapse(ihc, cf)
s_mon = br.StateMonitor(syn, 's', record=True, clock=syn.clock)
R_mon = br.StateMonitor(syn, 'R', record=True, clock=syn.clock)
spike_mon = br.SpikeMonitor(syn)
net = br.Network(syn, s_mon, R_mon, spike_mon)
net.run(duration * 1.5)
for idx, level in enumerate(levels):
    plt.figure(3)
    plt.subplot(len(levels), 1, idx + 1)
    plt.plot(s_mon.times / br.ms, s_mon[idx])
    plt.xlim(0, 120)
    plt.xlabel('Time (msec)')
    plt.ylabel('Sp/sec')
    plt.text(1.25 * duration/br.ms, np.nanmax(s_mon[idx])/2., '%s SPL' % str(level*bh.dB));
    ymin, ymax = plt.ylim()
    if idx == 0:
        plt.title('CF=%.0f Hz - Response to Tone at CF' % cf)

    plt.figure(4)
    plt.subplot(len(levels), 1, idx + 1)
    plt.plot(R_mon.times / br.ms, R_mon[idx])
    plt.xlabel('Time (msec)')
    plt.xlabel('Time (msec)')
    plt.text(1.25 * duration/br.ms, np.nanmax(R_mon[idx])/2., '%s SPL' % str(level*bh.dB));
    plt.ylim(ymin, ymax)
    if idx == 0:
        plt.title('CF=%.0f Hz - Response to Tone at CF (with spikes and refractoriness)' % cf)
    plt.plot(spike_mon.spiketimes[idx] / br.ms,
         np.ones(len(spike_mon.spiketimes[idx])) * np.nanmax(R_mon[idx]), 'rx')

In [None]:
# Dual resonance nonlinear filter
simulation_duration = 50*br.ms
samplerate = 50*br.kHz
level = 50*bh.dB  # level of the input sound in rms dB SPL
sound = bh.whitenoise(simulation_duration, samplerate).ramp()
sound.level = level

nbr_cf = 50  #number of centre frequencies
#center frequencies with a spacing following an ERB scale
center_frequencies = bh.erbspace(100*br.Hz,1000*br.Hz, nbr_cf)

#conversion to stape velocity (which are the units needed by the following centres)
sound = sound*0.00014

#### Linear Pathway ####

# bandpass filter (second order gammatone filter)
center_frequencies_linear = 10**(-0.067+1.016*np.log10(center_frequencies))
bandwidth_linear = 10**(0.037+0.785*np.log10(center_frequencies))
order_linear = 3
gammatone = bh.ApproximateGammatone(sound, center_frequencies_linear,
                                    bandwidth_linear, order=order_linear)

# linear gain
g = 10**(4.2-0.48*np.log10(center_frequencies))
func_gain = lambda x: g * x
gain = bh.FunctionFilterbank(gammatone, func_gain)

# low pass filter(cascade of 4 second order lowpass butterworth filters)
cutoff_frequencies_linear = center_frequencies_linear
order_lowpass_linear = 2
lp_l = bh.LowPass(gain, cutoff_frequencies_linear)
lowpass_linear = bh.Cascade(gain, lp_l, 4)

#### Nonlinear Pathway ####

# bandpass filter (third order gammatone filters)
center_frequencies_nonlinear = center_frequencies
bandwidth_nonlinear = 10**(-0.031+0.774*np.log10(center_frequencies))
order_nonlinear = 3
bandpass_nonlinear1 = bh.ApproximateGammatone(sound, center_frequencies_nonlinear,
                                              bandwidth_nonlinear,
                                              order=order_nonlinear)

# compression (linear at low level, compress at high level)
a = 10**(1.402+0.819*np.log10(center_frequencies))  # linear gain
b = 10**(1.619-0.818*np.log10(center_frequencies))
v = .2  # compression exponent
func_compression = lambda x: np.sign(x) * np.minimum(a*np.abs(x), b*np.abs(x)**v)
compression = bh.FunctionFilterbank(bandpass_nonlinear1, func_compression)

# bandpass filter (third order gammatone filters)
bandpass_nonlinear2 = bh.ApproximateGammatone(compression,
                                              center_frequencies_nonlinear,
                                              bandwidth_nonlinear,
                                              order=order_nonlinear)

# low pass filter
cutoff_frequencies_nonlinear = center_frequencies_nonlinear
order_lowpass_nonlinear = 2
lp_nl = bh.LowPass(bandpass_nonlinear2, cutoff_frequencies_nonlinear)
lowpass_nonlinear = bh.Cascade(bandpass_nonlinear2, lp_nl, 3)

# adding the two pathways
dnrl_filter = lowpass_linear + lowpass_nonlinear
dnrl = dnrl_filter.process()

cochleogram(dnrl)

In [None]:
# DCGC; Compressive Gammachirp
simulation_duration = 50*br.ms
samplerate = 50*br.kHz
level = 50*bh.dB # level of the input sound in rms dB SPL
sound = bh.whitenoise(simulation_duration, samplerate).ramp()
sound = sound.atlevel(level)

nbr_cf = 50 # number of centre frequencies
# center frequencies with a spacing following an ERB scale
cf = bh.erbspace(100*br.Hz, 1000*br.Hz, nbr_cf)

c1 = -2.96 #glide slope of the first filterbank
b1 = 1.81  #factor determining the time constant of the first filterbank
c2 = 2.2   #glide slope of the second filterbank
b2 = 2.17  #factor determining the time constant of the second filterbank

order_ERB = 4
ERBrate = 21.4*np.log10(4.37*cf/1000+1)
ERBwidth = 24.7*(4.37*cf/1000 + 1)
ERBspace = np.mean(np.diff(ERBrate))

# the filter coefficients are updated every update_interval (here in samples)
update_interval = 1

#bank of passive gammachirp filters. As the control path uses the same passive
#filterbank than the signal path (but shifted in frequency)
#this filterbank is used by both pathway.
pGc = bh.LogGammachirp(sound, cf, b=b1, c=c1)

fp1 = cf + c1*ERBwidth*b1/order_ERB #centre frequency of the signal path

#### Control Path ####

#the first filterbank in the control path consists of gammachirp filters
#value of the shift in ERB frequencies of the control path with respect to the signal path
lct_ERB = 1.5
n_ch_shift = np.round(lct_ERB/ERBspace) #value of the shift in channels
#index of the channel of the control path taken from pGc
indch1_control = np.minimum(np.maximum(1, np.arange(1, nbr_cf+1)+n_ch_shift), nbr_cf).astype(int)-1
fp1_control = fp1[indch1_control]
#the control path bank pass filter uses the channels of pGc indexed by indch1_control
pGc_control = bh.RestructureFilterbank(pGc, indexmapping=indch1_control)

#the second filterbank in the control path consists of fixed asymmetric compensation filters
frat_control = 1.08
fr2_control = frat_control*fp1_control
asym_comp_control = bh.AsymmetricCompensation(pGc_control, fr2_control, b=b2, c=c2)

#definition of the pole of the asymmetric comensation filters
p0 = 2
p1 = 1.7818*(1-0.0791*b2)*(1-0.1655*abs(c2))
p2 = 0.5689*(1-0.1620*b2)*(1-0.0857*abs(c2))
p3 = 0.2523*(1-0.0244*b2)*(1+0.0574*abs(c2))
p4 = 1.0724

#definition of the parameters used in the control path output levels computation
#(see IEEE paper for details)
decay_tcst = .5*br.ms
order = 1.
lev_weight = .5
level_ref = 50.
level_pwr1 = 1.5
level_pwr2 = .5
RMStoSPL = 30.
frat0 = .2330
frat1 = .005
exp_deca_val = np.exp(-1/(decay_tcst*samplerate)*np.log(2))
level_min = 10**(-RMStoSPL/20)

#definition of the controller class. What is does it take the outputs of the
#first and second fitlerbanks of the control filter as input, compute an overall
#intensity level for each frequency channel. It then uses those level to update
#the filter coefficient of its target, the asymmetric compensation filterbank of
#the signal path.
class CompensensationFilterUpdater(object):
    def __init__(self, target):
        self.target = target
        self.level1_prev = -100
        self.level2_prev = -100

    def __call__(self, *input):
        value1 = input[0][-1,:]
        value2 = input[1][-1,:]
        #the current level value is chosen as the max between the current
        #output and the previous one decreased by a decay
        level1 = np.maximum(np.maximum(value1, 0), self.level1_prev*exp_deca_val)
        level2 = np.maximum(np.maximum(value2, 0), self.level2_prev*exp_deca_val)

        self.level1_prev = level1 #the value is stored for the next iteration
        self.level2_prev = level2
        #the overall intensity is computed between the two filterbank outputs
        level_total = lev_weight*level_ref*(level1/level_ref)**level_pwr1+\
                  (1-lev_weight)*level_ref*(level2/level_ref)**level_pwr2
        #then it is converted in dB
        level_dB = 20*np.log10(np.maximum(level_total, level_min))+RMStoSPL
        #the frequency factor is calculated
        frat = frat0 + frat1*level_dB
        #the centre frequency of the asymmetric compensation filters are updated
        fr2 = fp1*frat
        coeffs = bh.asymmetric_compensation_coeffs(samplerate, fr2,
                       self.target.filt_b, self.target.filt_a, b2, c2,
                       p0, p1, p2, p3, p4)
        self.target.filt_b, self.target.filt_a = coeffs

#### Signal Path ####
#the signal path consists of the passive gammachirp filterbank pGc previously
#defined followed by a asymmetric compensation filterbank
fr1 = fp1*frat0
varyingfilter_signal_path = bh.AsymmetricCompensation(pGc, fr1, b=b2, c=c2)
updater = CompensensationFilterUpdater(varyingfilter_signal_path)
#the controler which takes the two filterbanks of the control path as inputs
#and the varying filter of the signal path as target is instantiated
control = bh.ControlFilterbank(varyingfilter_signal_path,
                               [pGc_control, asym_comp_control],
                               varyingfilter_signal_path, updater, update_interval)

#run the simulation
#Remember that the controler are at the end of the chain and the output of the
#whole path comes from them
signal = control.process()
cochleogram(signal)