In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from IPython.display import Audio

# Some plotting niceties
plt.rc('figure', figsize=(10, 4))
sns.set_style('white')
sns.set_style('ticks')

import nengo
import phd

def img(array):
    plt.pcolormesh(array.T)
    plt.ylim(top=array.shape[1])
    plt.xlim(right=array.shape[0])
    plt.yticks(())
    sns.despine(left=True)
    plt.tight_layout()

In [None]:
%%javascript
if($(IPython.toolbar.selector.concat(' > #kill-run-first')).length == 0){
  IPython.toolbar.add_buttons_group([
    {
      'label'   : 'kill and run-first',
      'icon'    : 'fa fa-angle-double-down',
      'callback': function(){
        IPython.notebook.kernel.restart();
        $(IPython.events).one('kernel_ready.Kernel', function(){
          var idx = IPython.notebook.get_selected_index();
          IPython.notebook.select(0);
          IPython.notebook.execute_cell();
          IPython.notebook.select(idx);
        });
      }
    }
  ], 'kill-run-first');
}

In [None]:
model = phd.sermo.Production()
model.trial.sequence = 'PAT*POS1 + DAS*POS2 + KAP*POS3'
# model.trial.sequence = 'PAT*POS1'
freqs = [('pat', 1.5), ('das', 1.7), ('kap', 1.2)]
t = 0.2
for ges, freq in freqs:
    path = phd.ges_path('ges-de-cvc', '%s.ges' % ges.lower())
    traj = phd.vtl.parse_ges(path).trajectory(dt=model.trial.dt)
    model.add_syllable(label=ges.upper(), freq=freq, trajectory=traj)
    t += 1. / freq

In [None]:
net = model.build()
with net:
    p_syll = nengo.Probe(net.sequence.syllable.output, synapse=0.01)
    p_gate = nengo.Probe(net.sequencer.gate, synapse=0.01)
    p_reset = nengo.Probe(net.sequencer.reset, synapse=0.01)
    p_timer = nengo.Probe(net.sequencer.timer, synapse=0.01)
    p_disinhib = [nengo.Probe(dmp.disinhibit, synapse=0.01) for dmp in net.syllables]
    p_dmps = [nengo.Probe(dmp.osc, synapse=0.01) for dmp in net.syllables]
    p_prod = nengo.Probe(net.production_info.output, synapse=0.01)

In [None]:
sim = nengo.Simulator(net)
sim.run(t)

In [None]:
syllables = [s[0].upper() for s in freqs]
plt.plot(sim.trange(), 
         nengo.spa.similarity(sim.data[p_syll],
                              net.sequence.vocab.create_subset(syllables)))
plt.legend(syllables, loc='best')

In [None]:
plt.plot(sim.trange(), sim.data[p_gate])
plt.plot(sim.trange(), sim.data[p_reset])
plt.plot(sim.trange(), sim.data[p_timer])
plt.legend(['gate', 'kick', 'timer', 'timer'], loc="best")
# plt.ylim([-0.1, 1.1])

In [None]:
for p_dis in p_disinhib:
    plt.plot(sim.trange(), sim.data[p_dis])
plt.legend(['PAT', 'DAS', 'KAP'], loc="best")
# plt.ylim([-0.1, 1.1])

In [None]:
plt.figure(figsize=(10, 10))
plt.subplot(2, 2, 1)
plt.plot(sim.data[p_timer].T[0], sim.data[p_timer].T[1])
for i, p_dmp in enumerate(p_dmps):
    plt.subplot(2, 2, i + 2)
    plt.plot(sim.data[p_dmp].T[0], sim.data[p_dmp].T[1])

In [None]:
plt.plot(sim.trange(), sim.data[p_prod]);

## Construct a gesture score from the prod data

In [None]:
# Target: actual trajectories
from phd.experiments import shorten

def gest2seqlabel(gesture, vtl=None):
    if vtl is None:
        vtl = phd.vtl.VTL()
    labels = vtl.gesture_labels()
    labels.remove("f0")
    ix = labels.index(gesture)
    return ix2seqlabel(ix, labels)

def ix2seqlabel(ix, labels):
    if ix < labels.index('ll-labial-nas'):
        return 'vowel-gestures'
    elif ix < labels.index('tt-alveolar-nas'):
        return 'lip-gestures'
    elif ix < labels.index('tb-palatal-fric'):
        return 'tongue-tip-gestures'
    elif ix < labels.index('breathy'):
        return 'tongue-body-gestures'
    elif ix < labels.index('velic'):
        return 'glottal-shape-gestures'
    elif ix < labels.index('lung-pressure'):
        return 'velic-gestures'
    else:
        return 'lung-pressure-gestures'

full_traj = []
order = [0, 1, 2]
for i in order:
    syll = model.syllables[i]
    speed = syll.freq
    t_frames = int((1. / speed) / model.trial.dt)
    full_traj.append(shorten(syll.trajectory, t_frames))
full_traj = np.vstack(full_traj)
img(full_traj)
print full_traj.shape[0]

In [None]:
delay_frames = 200
y = sim.data[p_prod][delay_frames:]
print(y.shape)
img(y)

In [None]:
import nengo.utils.numpy as npext
print(npext.rmse(full_traj, y[:full_traj.shape[0]]))

In [None]:
# Make a gesture score out of the traj
from phd.mfcc import derivative

tderiv = np.abs(derivative(y, 18))
slices = tderiv > 0.012
img(y)
plt.figure()
img(slices)

In [None]:
x_ind, y_ind = np.where(np.abs(np.diff(np.vstack([np.zeros(48), slices, np.zeros(48)]), axis=0)))
# Add a blank area at the start, so that the starting neutral gesture is recorded
# x_ind = np.hstack([np.zeros(48 * 2, dtype=int), x_ind])
# y_ind = np.hstack([np.arange(48), np.arange(48), y_ind])
img(slices)
plt.plot(x_ind, y_ind, ls='none', marker='o')

In [None]:
# Sort by seq_index then x_index as each gesture depends on the two
# subsequent time slices for that gesture
vtl = phd.vtl.VTL()
labels = vtl.gesture_labels()
labels.remove("f0")
seqs = np.array([ix2seqlabel(yi, labels) for yi in y_ind])
sort_ix = np.argsort(seqs)
seqs = seqs[sort_ix]
x_ind = x_ind[sort_ix]
y_ind = y_ind[sort_ix]

# sort within each group
for seq in np.unique(seqs):
    subset = seqs == seq
    x_order = np.argsort(x_ind[subset])
    y_ind[subset] = y_ind[subset][x_order]
    x_ind[subset] = x_ind[subset][x_order]

print seqs
print y_ind
print x_ind

In [None]:
from phd.utils import rescale

gs = phd.vtl.GestureScore(vtl.gesture_labels())
gs.labels.remove('f0')
seq = phd.vtl.GestureSequence(seqs[0])
# print "--- first sequence:", seq.type
gs.sequences.append(seq)
for i in range(0, x_ind.size, 2):
    this_start = x_ind[i]
    this_end = x_ind[i+1]
    this_y = y_ind[i]

    if i+3 > x_ind.size:
        next_start = next_end = tderiv.shape[0]
    else:
        this_seq = seqs[i]
        next_seq = seqs[i+2]
        if this_seq != next_seq:
            next_start = next_end = tderiv.shape[0]
        else:
            next_start = x_ind[i+2]
            next_end = x_ind[i+3]

    if seq.type != this_seq:
#         print "--- next sequence:", this_seq
        seq = phd.vtl.GestureSequence(this_seq)
        gs.sequences.append(seq)

    st = int((this_end + this_start) // 2)
    ed = int((next_end + next_start) // 2)
    y_slice = y[st:ed, this_y]

#     print "i=%d" % i, "x=%d->%d => %d->%d" % (this_start, this_end, next_start, next_end),
#     print "y=%d" % this_y

#     if ed == st:
#         print " size 0"
#     else:
#         print " y: mean=%f, max=%f, median=%f" % (y_slice.mean(), y_slice.max(), np.median(y_slice))

    if np.median(y_slice) < 0.1:
        # We will ignore neutral gestures that occur,
        # but then add in our own when necessary
#         print "  ignoring"
        continue
        
    if seq.numerical:
        value = y_slice.max()
        value = rescale(value, 0, 1, *vtl.numerical_range[gs.labels[this_y]])
    else:
        value = gs.labels[this_y]

    tau = (this_end - this_start) * model.trial.dt * 0.5  # to match normal values
    duration = (ed - st) * model.trial.dt

    # Current time according to seq is seq.t_end
    # Time that this gesture starts is this_start * model.trial.dt
    this_start_t = round(st * model.trial.dt, 3)
    seq_t = round(seq.t_end, 3)
    if this_start_t < seq_t:
        raise RuntimeError()
    elif this_start_t > seq_t:
        # Add a neutral gesture to make up the difference
        t_diff = this_start_t - seq_t
#         print "  adding neutral", t_diff
        seq.gestures.append(phd.vtl.Gesture("", 0., t_diff, tau, True))
#     print "  adding", value, duration
    seq.gestures.append(phd.vtl.Gesture(value, 0., duration, tau, False))

# for seq in gs.sequences:
#     plt.figure()
#     plt.plot(seq.trajectory(seq.t_end, model.trial.dt, gs.labels))
#     plt.title(seq.type)

#     print "---", seq.type, seq.t_end
#     for ges in seq.gestures:
#         print ges.value if ges.value else "neutral", ges.duration_s
img(y)
plt.figure()
img(gs.trajectory(dt=model.trial.dt))
plt.xlim(right=y.shape[0])

In [None]:
audio, fs = gs.synthesize()
Audio(data=audio, rate=fs)