# experiment with random variation on encoded audio signals

process:

1. encode an audio file with an nsynth model
2. practice slight random variations on the encoding
3. decode the audio (preferably in one batch)

##### import

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from magenta.models.nsynth import utils
from magenta.models.nsynth.wavenet import fastgen
from numpy.random import default_rng
%matplotlib inline
%config InlineBackend.figure_format = 'jpg'

##### load the audio file

In [None]:
filename = os.path.join('in', 'iphone-ding-sound.wav')
sample_rate = 16000
length = 3
audio = utils.load_audio(filename,
                         sample_length=int(sample_rate * length),
                         sr=sample_rate)
sample_length = audio.shape[0]
print(f'{sample_length} samples, {sample_length / float(sample_rate)} seconds')


##### encode the audio with nsynth

In [None]:
%%time
encoding = fastgen.encode(audio,
                          os.path.join('models', 'wavenet-ckpt', 'model.ckpt-200000'),
                          sample_length)

In [None]:
print(encoding.shape)

optionally save the encoded tensor

In [None]:
np.save(
    os.path.join('npy',
                 f'{os.path.splitext(os.path.basename(filename))[0]}.npy'),
    encoding)


##### examine the audio & encoding

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(10, 5))
axs[0].plot(audio)
axs[0].set_title('audio signal')
axs[1].plot(encoding[0])
axs[1].set_title('nsynth encoding')

## introduce variations

for a set number of times variate slightly on the encoded matrix and concat all elements into an array for parallel decoding

### random deviation on all dimensions

In [None]:
rng = default_rng()

copy = np.copy(encoding[0])
new_encoding = np.tile(copy, (5, 1, 1))

for i in range(new_encoding.shape[0]):
    scale = i * 0.1 / (new_encoding.shape[0] - 1)

    for j in range(new_encoding.shape[2]):
        vals = rng.normal(size=encoding.shape[1], scale=scale)
        new_encoding[i][:, j] += vals

fig, axs = plt.subplots(new_encoding.shape[0],
                        1,
                        figsize=(10, new_encoding.shape[0] * 2.5))

for i in range(new_encoding.shape[0]):
    axs[i].plot(new_encoding[i])
    axs[i].set_title(f'nsynth encoding w/ deviations of scale {scale}')


##### decode all variations and save as .wavs

In [None]:
fastgen.synthesize(
    new_encoding,
    save_paths=[
        os.path.join(
            'out',
            f'gen_mod{i * 0.1 / (new_encoding.shape[0] - 1):.3f}_{os.path.basename(filename)}'
        ) for i in range(new_encoding.shape[0])
    ],
    checkpoint_path=os.path.join('models', 'wavenet-ckpt',
                                 'model.ckpt-200000'),
    samples_per_save=sample_length)


### random deviations on some dimensions

In [None]:
rng = default_rng()

copy = np.copy(encoding[0])
new_encoding = np.tile(copy, (new_encoding.shape[2], 1, 1))

for i in range(new_encoding.shape[0]):
    for j in range(i):
        vals = rng.normal(size=encoding.shape[1], scale=0.1)
        new_encoding[i][:, j] += vals

fig, axs = plt.subplots(new_encoding.shape[0],
                        1,
                        figsize=(10, new_encoding.shape[0] * 2.5))

for i in range(new_encoding.shape[0]):
    axs[i].plot(new_encoding[i])
    axs[i].set_title(f'nsynth encoding w/ deviations on {i} axes')


##### decode all variations and save as .wavs

In [None]:
fastgen.synthesize(
    new_encoding,
    save_paths=[
        os.path.join(
            'out',
            f'gen_mod{i}axes_{os.path.basename(filename)}'
        ) for i in range(new_encoding.shape[0])
    ],
    checkpoint_path=os.path.join('models', 'wavenet-ckpt',
                                 'model.ckpt-200000'),
    samples_per_save=sample_length)


### rampup in some dimensions

### losing certain dimensions