# Qualitative Evaluation of midi2params
Notebook to qualitatively evaluate our trained midi2params model. This has a lot of extra details and is not *that* user-friendly, so be warned.

In [None]:
# this line basically "injects" the global state of this script
# at the end into this notebook
%run ../midi2params/interact.py
import copy

### Let's first get our batch and see what's in it. Here you can choose which example we want to look at (with `i`)

In [None]:
for batch in test_loader:
    pass

In [None]:
i = 7
print(batch.keys())

# First, play the original audio.

In [None]:
from utils.util import preview_audio

audio = to_numpy(batch['audio'][i])[..., np.newaxis]
preview_audio(audio)

In [None]:
plt.plot(np.linspace(0, 5, audio.flatten().shape[0]), audio.flatten())
plt.yticks([])
plt.title('Audio')

# Synthesized from features extracted with DDSP
Now, synthesize with DDSP from the features extracted with DDSP.

In [None]:
# Extract synthesis parameters

import matplotlib.pyplot as plt

from utils.util import extract_ddsp_synthesis_parameters

audio_parameters = extract_ddsp_synthesis_parameters(audio)

plt.title('f0(t)')
plt.plot(audio_parameters['f0_hz'], color='orange')
plt.xlim(0, 1250)
plt.show()
plt.title('l(t)')
plt.plot(audio_parameters['loudness_db'])
plt.xlim(0, 1250)
plt.ylim(-120, 0)
plt.show()

In [None]:
normalize = lambda x: (x - x.min()) / (x.max() - x.min())
plt.figure(figsize=(16, 4))
N = len(batch['pitches'][i][100:])
plt.scatter(np.linspace(0, 5, N), normalize(batch['pitches'][i][100:]), s=2)
#plt.plot(normalize(audio_parameters['f0_hz']))
plt.xlim(0, 5)
plt.yticks([])
plt.title('MIDI Piano Roll')

In [None]:
# Load model

from utils.util import load_ddsp_model

#model = load_ddsp_model('Violin')
ckpt_path = '../checkpoints/CustomViolinCheckpoint'
model = load_ddsp_model(ckpt_path)

In [None]:
# Resynthesize parameters

from utils.util import synthesize_ddsp_audio

resynth = synthesize_ddsp_audio(model, audio_parameters)

preview_audio(resynth)

# Synthesize with *heuristically generated* features from MIDI
Now, synthesize with DDSP from the features *heuristically generated* from associated MIDI.

In [None]:
def generate_loud(beats, length=1250, decay=True):
    arrs = []
    length = 2500
    base = -30
    decay_rate = -0.01 # decays -1 per timestep/index
    #notelength = 0.7
    ld_arr = np.full((length), -120)
    for i, beat in enumerate(beats):
        if i == len(beats) - 1:
            next_beat = length
        else:
            next_beat = beats[i + 1]
        ld_arr[beat:next_beat] = np.linspace(base, base + decay_rate * (next_beat - beat), next_beat - beat)

    return ld_arr


def gen_heuristic(batch, i=0):
    """
    Take a batch containing 'pitches', 'onset_arr', and 'offset_arr' and
    turn them into f0 and loudness heuristically.
    """
    onsets = np.where(batch['onset_arr'][i] == 1)[0]
    if len([i for i in onsets if i < 30]) == 0:
        onsets = np.concatenate(([10], onsets))

    ld = generate_loud(onsets)
    pitches = copy.deepcopy(batch['pitches'][i])
    f0 = p2f(pitches)
    return f0, ld

In [None]:
f0_h, ld_h = gen_heuristic(batch, i=i)

In [None]:
plt.title('f0(t)')
f0_h_sin = np.abs(np.array(f0_h) + 3 * np.sin(np.arange(2500) * .15))
plt.plot(f0_h, color='orange')
plt.plot(f0_h_sin, color='red')
plt.xlim(0, 1250)
plt.show()
plt.title('l(t)')
plt.plot(audio_parameters['loudness_db'], label='loudness (ground truth)')
plt.plot(ld_h, label='loudness (generated)')
plt.ylim(-120, 0)
plt.xlim(0, 1250)
plt.legend()
plt.show()

In [None]:
torch_f0_h_sin = torch.FloatTensor(f0_h_sin)
#torch_f0_h_sin.dtype = torch.float32

In [None]:
heuristic_parameters = {
    'f0_hz': torch_f0_h_sin.type(torch.float32),
    'loudness_db': ld_h.astype(np.float32)
}
params = heuristic_parameters

In [None]:
# Resynthesize parameters

from utils.util import synthesize_ddsp_audio

heuristic_resynth = synthesize_ddsp_audio(model, params)

preview_audio(heuristic_resynth)

# Synthesize from feature outputs from *learned model*
Now, synthesize with DDSP from the features generated from the associated MIDI *with our trained model*.

In [None]:
model_path = '../model/best_model.pt'

### Load the model and generate!

In [None]:
for batch in test_loader:
    break

In [None]:
best_model = load_best_model(config, model_path)

In [None]:
if config.device == 'cuda':
    for k, arr in batch.items():
        batch[k] = torch.Tensor(arr.float()).cuda()

In [None]:
f0_pred, ld_pred = midi2params(best_model, batch)

In [None]:
for k, arr in batch.items():
    batch[k] = to_numpy(arr)

In [None]:
print(i)
f0 = batch['f0'][i]
ld = batch['loudness_db'][i]
plt.figure(figsize=(10,5))
plt.title('f0(t) comparison')
plt.plot(f0_pred[i], label='f0 (generated)')
plt.plot(f0, alpha=0.5, label='f0 (ground truth)')
plt.xlim(0, 1250)
plt.legend()
plt.show()
plt.figure(figsize=(10,5))
plt.title('l(t)')
plt.plot(ld_pred[i], label='loudness (generated)')
plt.plot(ld, alpha=0.5, label='loudness (ground truth)')
plt.xlim(0, 1250)
plt.ylim(-120, 0)
plt.legend()
plt.show()

In [None]:
train_params = {
    'f0_hz': f0_pred[i],
    'loudness_db': ld_pred[i]
}

In [None]:
# Resynthesize parameters

from utils.util import synthesize_ddsp_audio, preview_audio

new_model_resynth = synthesize_ddsp_audio(model, train_params)

preview_audio(new_model_resynth)

# Now, all of them side-by-side

## Original Audio

In [None]:
preview_audio(audio)

## Direct DDSP Features

In [None]:
preview_audio(resynth)

## Heuristically Generated Features

In [None]:
preview_audio(heuristic_resynth)

## Features from Trained Model

In [None]:
preview_audio(new_model_resynth)