# Speech actions

- `lab_clos_full` for b, p, m
- `lab_clos_fric` for f, v
- `api_clos_full` for p, t, n
- `api_clos_fric` for s, S, ...
- `api_clos_lat` for l
- `dor_clos_full` for k, g, N
- `glott_adduc_phon` voiced sounds
- `glott_abduc_nophon` for voiceless sounds
- `vow_ii` extreme ii vowel
- `vow_aa` extreme aa vowel
- `vow_uu` extreme uu vowel

# Synthesizer articulators

- `tongue_dors_hl` tongue dorsum high-low
- `tongue_dors_fb` tongue dorsum front-back
- `lips_protrusion` degree of lip protrusion
- `lips_constriction` degree of consonantal constriction at lips 
- `ttip_constriction` degree of consonantal constriction at tongue tip 
- `tdor_constriction` degree of consonantal constriction at tongue dorsum 

# Syllables

Syllable length is normalized so that
0 is the start of the syllable,
1 is the end,
but voicing each should take around 300 ms.

## BAS

- `lab_clos_full` at 0.3-0.5  initial /b/
- `api_clos_fric` at 0.6-0.8  final /s/
- `glott_adduc_phon` at 0.3-0.7  voicing
- `glott_abduc_nophon` at 0.6-0.8  final consonant voiceless
- `vow_aa` at 0.3-0.7  vowel /a/

## KUL

- `api_clos_lat` at 0.6-0.8  final /l/
- `dor_clos_full` at 0.3-0.5  initial /k/
- `glott_adduc_phon` at 0.5-0.8  voicing
- `glott_abduc_nophon` at 0.3-0.6  initial consonant voiceless
- `vow_uu` at 0.3-0.7  vowel /u/

## TIP

- `lab_clos_full` at 0.6-0.8  final /p/
- `api_clos_full` at 0.3-0.5  initial /d/
- `glott_adduc_phon` at 0.3-0.7  voicing
- `glott_abduc_nophon` at 0.6-0.8  final consonant voiceless
- `vow_ii` at 0.3-0.7  vowel /ii/

# Common code

In [None]:
# Common imports
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from JSAnimation import IPython_display
from skspeech.synthesis import kroger as kr

import nengo
import nengo.utils.numpy as npext
import nengo_gui.ipython

In [None]:
from collections import namedtuple

# Speech action indices
(lab_clos_full, lab_clos_fric,
 api_clos_full, api_clos_fric,
 api_clos_lat, dor_clos_full,
 glott_adduc_phon, glott_abduc_nophon,
 vow_ii, vow_aa, vow_uu) = list(range(11))

# Articulator indices
(tongue_dors_hl, tongue_dors_fb,
 lips_protrusion, lips_constriction,
 ttip_constriction, tdor_constriction) = list(range(6))

# --- Speech Actions to decode from the oscillator
SA = namedtuple('SA', ['onset', 'offset', 'idx'])

def sa_func(actions):
    def _fn(x):
        ret = np.zeros(11)
        for action in actions:
            if action.onset <= x <= action.offset:
                ret[action.idx] = 1.0
        return ret
    return _fn

bas_sa = [SA(0.3, 0.5, lab_clos_full),
          SA(0.6, 0.8, api_clos_fric),
          SA(0.3, 0.7, glott_adduc_phon),
          SA(0.6, 0.8, glott_abduc_nophon),
          SA(0.3, 0.7, vow_aa)]
kul_sa = [SA(0.6, 0.8, api_clos_lat),
          SA(0.3, 0.5, dor_clos_full),
          SA(0.5, 0.8, glott_adduc_phon),
          SA(0.3, 0.6, glott_abduc_nophon),
          SA(0.3, 0.7, vow_uu)]
tip_sa = [SA(0.6, 0.8, lab_clos_full),
          SA(0.3, 0.5, api_clos_full),
          SA(0.3, 0.7, glott_adduc_phon),
          SA(0.6, 0.8, glott_abduc_nophon),
          SA(0.3, 0.7, vow_ii)]

bas = sa_func(bas_sa)
kul = sa_func(kul_sa)
tip = sa_func(tip_sa)

In [None]:
def plot_sa(sa):
    sa_f = sa_func(sa)
    x = np.linspace(0, 1, 100)
    out = np.zeros((100, 11))
    for i, xx in enumerate(x):
        out[i] = sa_f(xx)
    plt.plot(x, out)
    plt.ylim(-0.1, 1.1)

plt.figure(figsize=(10, 5))
plt.subplot(3, 1, 1)
plot_sa(bas_sa)
plt.subplot(3, 1, 2)
plot_sa(kul_sa)
plt.subplot(3, 1, 3)
plot_sa(tip_sa)

In [None]:
def speechactions(n_neurons, default_vel=0.001, vow_tau=0.02, cons_tau=0.001):
    vow_vel = default_vel / vow_tau
    cons_vel = default_vel / cons_tau

    # individual ensembles are the 11 speech actions listed above, in order
    sa = nengo.networks.EnsembleArray(n_neurons, 11)

    # Recurrently connect speech action ensembles to allow adjustable filtering ???
    #for i in range(11):
    #    velocity = (1 - vow_vel if i in (vow_ii, vow_aa, vow_uu)
    #                else 1 - cons_vel)
    #    nengo.Connection(sa.ea_ensembles[i], sa.ea_ensembles[i], transform=[velocity])
    
    return sa

def articulators(n_neurons, scale=2.0, n_articulators=11, n_speechactions=11):
    # invidiual ensembles are the 6 articulatory parameters that we're tracking at the moment
    art = nengo.networks.EnsembleArray(n_neurons, n_articulators, radius=2)

    # Add a connection to accept speech action input directly
    art.sa_input = nengo.Node(size_in=n_speechactions)
    
    # Mapping from premotor speech actions to articulator trajectories
    tr = np.zeros((n_articulators, n_speechactions))
    tr[tongue_dors_hl, vow_ii] = scale
    tr[tongue_dors_hl, vow_aa] = -scale
    tr[tongue_dors_hl, vow_uu] = scale
    tr[tongue_dors_fb, vow_ii] = scale
    tr[tongue_dors_fb, vow_uu] = -scale
    tr[lips_protrusion, vow_uu] = scale
    tr[lips_constriction, lab_clos_full] = scale
    tr[lips_constriction, lab_clos_fric] = scale
    tr[ttip_constriction, api_clos_full] = scale
    tr[ttip_constriction, api_clos_fric] = scale
    tr[ttip_constriction, api_clos_lat] = scale
    tr[tdor_constriction, dor_clos_full] = scale
    nengo.Connection(art.sa_input, art.input, transform=tr, synapse=None)

    return art

# Oscillator approach

In [None]:
from nengo.dists import Uniform

def zone(x):
    theta = np.arctan2(x[1], x[0])
    if theta > (7.0 / 8) * np.pi:
        return 0
    else:
        return x

def radial_f(fn):
    def _fn(x):
        #theta = math.arctan2(x[1], x[0])
        #t = theta / (2 * np.pi) + 0.5
        return fn(np.arctan2(x[1], x[0]) / (2 * np.pi) + 0.5)
    return _fn

In [None]:
# Adapted from the decode oscillator notebook
def oscillator(n_neurons, tau=0.025, freq=1.0):
    omega = tau * 2 * np.pi * freq
    encoders = [[np.cos(theta), np.sin(theta)]
                for theta in np.random.uniform(-np.pi, (7.0 / 8) * np.pi, n_neurons)]

    osc_input = nengo.Node(output=None, size_in=1, label="osc in")
    oscillator = nengo.Ensemble(n_neurons, dimensions=2,
                                intercepts=Uniform(0.3, 1),
                                encoders=encoders,
                                label="oscillator")
    nengo.Connection(osc_input, oscillator, transform=[[-1], [0]])
    nengo.Connection(oscillator, oscillator,
                     transform=[[1, -omega], [omega, 1]], function=zone, synapse=tau)
    return osc_input, oscillator

In [None]:
tau = 0.025
freq = 3.3  # ~300 ms, so ~3.3 Hz oscillation

with nengo.Network() as net:
    # Make a set of articulators for BAS
    sa = speechactions(150)
    art = articulators(150)
    nengo.Connection(sa.output, art.sa_input)

    # Make an oscillator with a dead zone
    osc_input, osc = oscillator(200, tau=tau, freq=freq)
    # Give it a bit of a kick
    osc_input.output = lambda t, x: 0.8 if t < 0.1 else 0.0

    # Connect to oscillator
    nengo.Connection(osc, sa.input, function=radial_f(bas))

    # Probes
    osc_p = nengo.Probe(osc, synapse=0.01)
    sa_p = nengo.Probe(sa.output, synapse=0.01)
    art_p = nengo.Probe(art.output, synapse=0.03)

In [None]:
sim = nengo.Simulator(net)
sim.run(0.4)

In [None]:
t = sim.trange()
plt.figure()
plt.plot(sim.data[osc_p].T[0], sim.data[osc_p].T[1])
plt.figure()
plt.plot(t, sim.data[sa_p])
plt.xlim(right=t[-1])
plt.figure()
plt.plot(t, sim.data[art_p])
plt.xlim(right=t[-1])

In [None]:
print(sum(ens.n_neurons for ens in net.all_ensembles))

In [None]:
# Synthesize articulatory trajectory
arts = kr.Articulators(sim.data[art_p].T * 1000)
contours = kr.SagittalContours.from_articulators(arts)
kr.animate_sagittal(kr.structure, contours)

# Delayed delta function approach

In [None]:
from scipy.linalg import solve_lyapunov
from nengo.utils.filter_design import zpk2ss, tf2ss, cont2discrete

class LTI(object):
    def __init__(self, a, b, c, d):
        self.a = np.array(a)
        self.b = np.array(b)
        self.c = np.array(c)
        self.d = np.array(d)

    @property
    def abcd(self):
        return (self.a, self.b, self.c, self.d)

    @classmethod
    def from_synapse(cls, synapse):
        """Instantiate class from a Nengo synapse."""
        if not hasattr(synapse, 'num') or not hasattr(synapse, 'den'):
            raise ValueError("Must be a linear filter with 'num' and 'den'")
        return cls(tf2ss(synapse.num, synapse.den))

    @classmethod
    def from_tf(cls, num, den):
        """Instantiate class from a transfer function."""
        return cls(tf2ss(num, den))

    @classmethod
    def from_zpk(cls, z, p, k):
        """Instantiate class from a zero-pole-gain representation."""
        return cls(zpk2ss(z, p, k))

    def scale_to(self, radii=1.0):
        """Scales the system to give an effective radius of r to x."""
        r = np.asarray(radii, dtype=np.float64)
        if r.ndim > 1:
            raise ValueError("radii (%s) must be a 1-dim array or scalar" % radii)
        elif r.ndim == 0:
            r = np.ones(len(self.a)) * r
        self.a /= r[:, None] * r
        self.b /= r[:, None]
        self.c /= r

    def ab_norm(self):
        """Returns H2-norm of each component of x in the state-space.

        Equivalently, this is the H2-norm of each component of (A, B, I, 0).
        This gives the power of each component of x in response to white-noise
        input with uniform power.

        Useful for setting the radius of an ensemble array with continuous
        dynamics (A, B)
        """
        p = solve_lyapunov(self.a, -np.dot(self.b, self.b.T))  # AP + PA^H = Q
        assert np.allclose(np.dot(self.a, p) + np.dot(p, self.a.T) + np.dot(self.b, self.b.T), 0)
        c = np.eye(len(self.a))
        h2norm = np.dot(c, np.dot(p, c.T))
        # The H2 norm of (A, B, C) is sqrt(tr(CXC^T)), so if we want the norm of
        # each component in the state-space representation, we evaluate this for
        # each elementary vector C separately, which is equivalent to just picking
        # out the diagonals
        return np.sqrt(h2norm[np.diag_indices(len(h2norm))])

    def to_sim(self, synapse, dt=0, copy=True):
        """Maps a state-space LTI to the synaptic dynamics on A and B."""
        if not isinstance(synapse, nengo.Lowpass):
            raise TypeError("synapse (%s) must be Lowpass" % (synapse,))
        if dt == 0:
            a = synapse.tau * self.a + np.eye(len(self.a))
            b = synapse.tau * self.b
        else:
            a, b, c, d, _ = cont2discrete(self.abcd, dt=dt)
            aa = np.exp(-dt / synapse.tau)
            a = 1. / (1 - aa) * (a - aa * np.eye(len(a)))
            b = 1. / (1 - aa) * b
        if copy:
            return LTI(a, b, c, d)
        else:
            self.a, self.b, self.c, self.d = a, b, c, d


def lti_net(lti, n_neurons, synapse=nengo.Lowpass(0.05),
            controlled=False, dt=0.001, radii=None, radius=1.0):
    net = nengo.Network()
    if radii is None:
        radii = lti.ab_norm()
    radii *= radius
    lti.scale_to(radii)  # Probably should require this outside of this function
    lti.to_sim(synapse, dt, copy=False)

    net.lti = lti
    net.lti_synapse = synapse
    net.size_in = lti.b.shape[1]
    net.size_state = lti.a.shape[0]
    net.size_out = lti.c.shape[0]

    with net:
        net.input = nengo.Node(size_in=net.size_in, label="input")
        net.output = nengo.Node(size_in=net.size_out, label="output")
        if controlled:
            net.x = Product(n_neurons, net.size_state)
            connect_abcd(net, net.x, net.x.A, net.x.output)
        else:
            net.x = nengo.networks.EnsembleArray(n_neurons, net.size_state)
            connect_abcd(net, net.x, net.x.input, net.x.output)
    return net


def connect_abcd(net, x, x_in, x_out):
    assert hasattr(net, 'lti')
    a, b, c, d = net.lti.abcd
    with net:
        net.conn_A = nengo.Connection(
            x_out, x_in, transform=a, synapse=net.lti_synapse)
        net.conn_B = nengo.Connection(
            net.input, x_in, transform=b, synapse=net.lti_synapse)
        net.conn_C = nengo.Connection(
            x_out, net.output, transform=c, synapse=None)
        net.conn_D = nengo.Connection(
             net.input, net.output, transform=d, synapse=None)


# No control yet; will do that later
def delay_net(delay, n_neurons, dimensions=1, degree=4):
    net = nengo.Network(label='delay=%.3f' % delay)
    net.degree = degree
    if delay <= 1e-3:
        net.input = net.output = nengo.Node(size_in=dimensions, label="in/out")
    else:
        j = np.arange(degree) + 1
        u = (degree + j) * (degree - j + 1) / (delay * j)

        a = np.zeros((degree, degree))
        b = np.zeros((degree, 1))
        c = np.zeros((1, degree))
        d = np.zeros((1,))

        a[0, :] = b[0, 0] = -u[0]
        a[1:, :-1][np.diag_indices(degree-1)] = u[1:]
        d[0] = (-1) ** degree
        c[0, np.arange(degree) % 2 == 0] = 2*d[0]
    
        net.input = nengo.Node(size_in=dimensions, label="in")
        net.output = nengo.Node(size_in=dimensions, label="out")
        for dimension in range(dimensions):
            lnet = lti_net(LTI(a, b, c, d), n_neurons)
            nengo.Connection(net.input[dimension], lnet.input, synapse=None)
            nengo.Connection(lnet.output, net.output[dimension], synapse=None)
    return net

In [None]:
def delayed_sa(n_neurons, degree=3, n_speechactions=11):
    # Break the 300 ms window into 9 possible delays spaced 30 ms apart
    delays = np.linspace(0, 0.3, 9)
    
    # not sure if this helps or is more annoying...
    # but let's make a node that takes the flattened delay matrix as input.
    with nengo.Network(label='delayed_sa') as net:
        net.input = nengo.Node(size_in=delays.size * n_speechactions)
        net.output = nengo.Node(size_in=n_speechactions)
        net.dnets = {}
        for i, delay in enumerate(delays):
            net.dnets[delay] = delay_net(delay, n_neurons,
                                         dimensions=n_speechactions,
                                         degree=degree)
            nengo.Connection(net.input[i*n_speechactions:(i+1)*n_speechactions],
                             net.dnets[delay].input, synapse=None)
            nengo.Connection(net.dnets[delay].output, net.output, synapse=None)
    return net

# There are 11 speech actions. So, our delay matrix is 9 x 11
bas_dm = np.zeros((9, 11))
bas_dm[2:5, lab_clos_full] = 1
bas_dm[5:8, api_clos_fric] = 1
bas_dm[2:7, glott_adduc_phon] = 1
bas_dm[5:8, glott_abduc_nophon] = 1
bas_dm[2:7, vow_aa] = 1

kul_dm = np.zeros((9, 11))
kul_dm[5:8, api_clos_lat] = 1
kul_dm[2:5, dor_clos_full] = 1
kul_dm[4:8, glott_adduc_phon] = 1
kul_dm[2:6, glott_abduc_nophon] = 1
kul_dm[2:7, vow_uu] = 1

tip_dm = np.zeros((9, 11))
tip_dm[5:8, lab_clos_full] = 1
tip_dm[2:5, api_clos_full] = 1
tip_dm[2:7, glott_adduc_phon] = 1
tip_dm[5:8, glott_abduc_nophon] = 1
tip_dm[2:7, vow_ii] = 1

with nengo.Network() as net:
    # Make a set of articulators for BAS
    sa = speechactions(150)
    art = articulators(150)
    nengo.Connection(sa.output, art.sa_input)

    # Make a delay network for delaying the kick
    delay = delayed_sa(150)
    nengo.Connection(delay.output, sa.input)

    # Our kick input
    kick = nengo.Node(lambda t: 1 if t < 0.01 else 0.0)

    # Connect with our delay matrix
    nengo.Connection(kick, delay.input, transform=bas_dm.ravel()[:, np.newaxis])

    # Probes
    sa_p = nengo.Probe(sa.output, synapse=0.01)
    art_p = nengo.Probe(art.output, synapse=0.03)

In [None]:
sim = nengo.Simulator(net)
sim.run(0.4)

In [None]:
t = sim.trange()
plt.figure()
plt.plot(t, sim.data[sa_p])
plt.xlim(right=t[-1])
plt.figure()
plt.plot(t, sim.data[art_p])
plt.xlim(right=t[-1])

In [None]:
print(sum(ens.n_neurons for ens in net.all_ensembles))

In [None]:
# Synthesize articulatory trajectory
arts = kr.Articulators(sim.data[art_p].T * 1000)
contours = kr.SagittalContours.from_articulators(arts)
kr.animate_sagittal(kr.structure, contours)