In [None]:
# Common imports
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import nengo
import nengo.utils.numpy as npext
# import nengo_ocl
import nengo_gui.ipython

import phd

# Some plotting niceties
plt.rc('figure', figsize=(8, 5))
sns.set_style('white')
sns.set_style('ticks')

def find_nearest_idx(array, val):
    return (np.abs(array-val)).argmin()

def ph_labels(phonemes, data, time, every=0.05, thresh=0.5):
    for t in np.arange(every, time[-1], every):
        t_idx = find_nearest_idx(time, t)
        if data[t_idx].max() > thresh:
            ph = data[t_idx].argmax()
            plt.text(t, 0.9, phonemes[ph],
                     horizontalalignment='center',
                     verticalalignment='center')

In [None]:
%%javascript
if($(IPython.toolbar.selector.concat(' > #kill-run-first')).length == 0){
  IPython.toolbar.add_buttons_group([
    {
      'label'   : 'kill and run-first',
      'icon'    : 'fa fa-angle-double-down',
      'callback': function(){
        IPython.notebook.kernel.restart();
        $(IPython.events).one('kernel_ready.Kernel', function(){
          var idx = IPython.notebook.get_selected_index();
          IPython.notebook.select(0);
          IPython.notebook.execute_cell();
          IPython.notebook.select(idx);
        });
      }
    }
  ], 'kill-run-first');
}

In [None]:
fs = 25000.
dt = 1. / fs

def plot_sound(process, t, dt):
    plt.figure()
    plt.plot(process.trange(t, dt=dt), process.run(t, dt=dt))
    plt.xlim(right=t)
    sns.despine()

plot_sound(phd.processes.WavFile('speech.wav'), 0.667, dt)
# plot_sound(phd.processes.WhiteNoise(), 0.1, dt)
# plot_sound(phd.processes.Tone(250), 0.1, dt)

#  Recognition system

## Auditory periphery

Making heavy use of [Brian hears](http://www.briansimulator.org/docs/hears.html),
but should also investigate other periphery models.

In [None]:
model = phd.Sermo(execution=False)
periphery = model.recognition.periphery
periphery.fs = 20000
periphery.freqs = phd.filters.erbspace(20, 10000, 64)
periphery.sound_process = phd.processes.WhiteNoise()
periphery.auditory_filter = phd.filters.gammatone(periphery.freqs)
net = model.build()

with net:
    ihc_p = nengo.Probe(net.periphery.ihc, synapse=None)
    an_in_p = nengo.Probe(net.periphery.an.input, synapse=None)
    an_p = nengo.Probe(net.periphery.an.add_neuron_output(), synapse=None)

In [None]:
from nengo.utils.matplotlib import rasterplot

dt = 1. / periphery.freqs.max()
print("dt=%.5f" % dt)
sim = nengo.Simulator(net, dt=dt)
sim.run(0.1)

plt.figure()
phd.plots.cochleogram(sim.data[ihc_p], sim.trange(), periphery.freqs)
plt.figure()
phd.plots.cochleogram(sim.data[an_in_p], sim.trange(), periphery.freqs)
plt.figure()
rasterplot(sim.trange(), sim.data[an_p])
plt.ylim(0, net.periphery.an.n_neurons * net.periphery.an.n_ensembles)

In [None]:
print(sum(ens.n_neurons for ens in net.all_ensembles))

## Preprocessing layer

In [None]:
model = phd.Sermo(execution=False)
periphery = model.recognition.periphery
periphery.fs = 20000
periphery.freqs = phd.filters.erbspace(20, 10000, 64)
periphery.sound_process = phd.processes.WavFile('speech.wav')
periphery.auditory_filter = phd.filters.gammatone(periphery.freqs)
fast_deriv = model.recognition.add_derivative('TrippFF', delay=0.01)
slow_deriv = model.recognition.add_derivative('TrippFF', delay=0.1)
net = model.build()

with net:
    ihc_p = nengo.Probe(net.periphery.ihc, synapse=None, sample_every=0.001)
    an_p = nengo.Probe(net.periphery.an.output, synapse=0.01, sample_every=0.001)
    fd_p = nengo.Probe(net.derivatives[0.01].output, synapse=0.01, sample_every=0.001)
    sd_p = nengo.Probe(net.derivatives[0.1].output, synapse=0.01, sample_every=0.001)

In [None]:
dt = 1. / net.periphery.freqs.max()
sim = nengo.Simulator(net, dt=dt)
sim.run(0.667)

plt.figure(figsize=(10, 10))
plt.subplot(2, 2, 1)
phd.plots.cochleogram(sim.data[ihc_p], sim.trange(0.001), net.periphery.freqs)
plt.subplot(2, 2, 2)
phd.plots.cochleogram(sim.data[an_p], sim.trange(0.001), net.periphery.freqs)
plt.subplot(2, 2, 3)
phd.plots.cochleogram(sim.data[fd_p], sim.trange(0.001), net.periphery.freqs)
plt.subplot(2, 2, 4)
phd.plots.cochleogram(sim.data[sd_p], sim.trange(0.001), net.periphery.freqs)
plt.tight_layout()

In [None]:
print(sum(ens.n_neurons for ens in net.all_ensembles))

## Feature layer

### No hierarchy

In [None]:
model = phd.Sermo(execution=False)
periphery = model.recognition.periphery
periphery.fs = 20000
periphery.freqs = phd.filters.erbspace(20, 10000, 64)
periphery.sound_process = phd.processes.WavFile('speech.wav')
periphery.auditory_filter = phd.filters.gammatone(periphery.freqs)
fast_deriv = model.recognition.add_derivative('TrippFF', delay=0.01)
slow_deriv = model.recognition.add_derivative('TrippFF', delay=0.1)
vow_detector = model.recognition.add_phoneme_detector(
    name='vowel', derivatives=[0.01, 0.1], phonemes=phd.timit.vowels)
cons_detector = model.recognition.add_phoneme_detector(
    name='consonant', derivatives=[0.01, 0.01], phonemes=phd.timit.consonants)
phd.timit.TrainingData(model, vow_detector).generate()
phd.timit.TrainingData(model, cons_detector).generate()

net = model.build()
with net:
    vowel_p = nengo.Probe(net.detectors['vowel'].output,synapse=0.01, sample_every=0.001)
    cons_p = nengo.Probe(net.detectors['consonant'].output, synapse=0.01, sample_every=0.001)

In [None]:
print phd.timit.TrainingData(model, vow_detector).cache_file()
print phd.timit.TrainingData(model, cons_detector).cache_file()

In [None]:
dt = 1. / net.periphery.freqs.max()
sim = nengo.Simulator(net, dt=dt)
sim.run(0.667)
t = sim.trange(0.001)

plt.figure(figsize=(10, 10))
plt.subplot(2, 1, 1)
plt.plot(t, sim.data[vowel_p])
plt.xlim(right=t[-1])
ph_labels(sorted(phd.timit.vowels), sim.data[vowel_p], sim.trange(0.001))
sns.despine()
plt.subplot(2, 1, 2)
plt.plot(t, sim.data[cons_p])
plt.xlim(right=t[-1])
ph_labels(sorted(phd.timit.consonants), sim.data[cons_p], sim.trange(0.001))
sns.despine()

In [None]:
print(sum(ens.n_neurons for ens in net.all_ensembles))

### With SumPool hierarchy

In [None]:
model = phd.Sermo(execution=False)
periphery = model.recognition.periphery
periphery.fs = 20000
periphery.freqs = phd.filters.erbspace(20, 10000, 64)
periphery.sound_process = phd.processes.WavFile('speech.wav')
periphery.auditory_filter = phd.filters.gammatone(periphery.freqs)
fast_deriv = model.recognition.add_derivative('TrippFF', delay=0.01)
slow_deriv = model.recognition.add_derivative('TrippFF', delay=0.1)
vow_detector = model.recognition.add_phoneme_detector(
    name='vowel', hierarchical='SumPool', pooling=4, derivatives=[0.1], phonemes=phd.timit.vowels)
cons_detector = model.recognition.add_phoneme_detector(
    name='consonant', hierarchical='SumPool', pooling=4, derivatives=[0.01], phonemes=phd.timit.consonants)
phd.timit.TrainingData(model, vow_detector).generate()
phd.timit.TrainingData(model, cons_detector).generate()

net = model.build()
with net:
    vowel_p = nengo.Probe(net.detectors['vowel'].output,synapse=0.01, sample_every=0.001)
    cons_p = nengo.Probe(net.detectors['consonant'].output, synapse=0.01, sample_every=0.001)

In [None]:
print phd.timit.TrainingData(model, vow_detector).cache_file()
print phd.timit.TrainingData(model, cons_detector).cache_file()

In [None]:
dt = 1. / net.periphery.freqs.max()
sim = nengo.Simulator(net, dt=dt)
sim.run(0.667)
t = sim.trange(0.001)

plt.figure(figsize=(10, 10))
plt.subplot(2, 1, 1)
plt.plot(t, sim.data[vowel_p])
plt.xlim(right=t[-1])
ph_labels(sorted(phd.timit.vowels), sim.data[vowel_p], sim.trange(0.001))
sns.despine()
plt.subplot(2, 1, 2)
plt.plot(t, sim.data[cons_p])
plt.xlim(right=t[-1])
ph_labels(sorted(phd.timit.consonants), sim.data[cons_p], sim.trange(0.001))
sns.despine()

In [None]:
print(sum(ens.n_neurons for ens in net.all_ensembles))

### With ProdTile hierarchy

In [None]:
model = phd.Sermo(execution=False)
periphery = model.recognition.periphery
periphery.fs = 20000
periphery.freqs = phd.filters.erbspace(20, 10000, 48)
periphery.sound_process = phd.processes.WavFile('speech.wav')
periphery.auditory_filter = phd.filters.gammatone(periphery.freqs)
fast_deriv = model.recognition.add_derivative('TrippFF', delay=0.01)
slow_deriv = model.recognition.add_derivative('TrippFF', delay=0.1)
vow_detector = model.recognition.add_phoneme_detector(
    name='vowel', hierarchical='ProdTile', spread=1, center=0,
    derivatives=[0.1], phonemes=phd.timit.vowels)
cons_detector = model.recognition.add_phoneme_detector(
    name='consonant', hierarchical='ProdTile', spread=1,
    center=0, derivatives=[0.01], phonemes=phd.timit.consonants)
phd.timit.TrainingData(model, vow_detector).generate()
phd.timit.TrainingData(model, cons_detector).generate()

net = model.build()
with net:
    vowel_p = nengo.Probe(net.detectors['vowel'].output,synapse=0.01, sample_every=0.001)
    cons_p = nengo.Probe(net.detectors['consonant'].output, synapse=0.01, sample_every=0.001)

In [None]:
print phd.timit.TrainingData(model, vow_detector).cache_file()
print phd.timit.TrainingData(model, cons_detector).cache_file()

In [None]:
dt = 1. / net.periphery.freqs.max()
sim = nengo.Simulator(net, dt=dt)
sim.run(0.667)
t = sim.trange(0.001)

plt.figure(figsize=(10, 10))
plt.subplot(2, 1, 1)
plt.plot(t, sim.data[vowel_p])
plt.xlim(right=t[-1])
ph_labels(sorted(phd.timit.vowels), sim.data[vowel_p], sim.trange(0.001))
sns.despine()
plt.subplot(2, 1, 2)
plt.plot(t, sim.data[cons_p])
plt.xlim(right=t[-1])
ph_labels(sorted(phd.timit.consonants), sim.data[cons_p], sim.trange(0.001))
sns.despine()