In [None]:
%matplotlib inline
import torch

import numpy as np
import librosa

from performer.models.ddsp_module import DDSP
from performer.datamodules.components.ddsp_dataset import DDSPDataset

from IPython.display import Audio
from matplotlib import pyplot as plt

In [None]:
vln_ckpt = '../checkpoints/violin_longrun.ckpt'
vlc_ckpt = '../checkpoints/cello_longrun.ckpt'
flt_ckpt = '../checkpoints/flute_longrun.ckpt'

vln_data = "../data/violin_samples.pth"
vlc_data = "../data/cello_samples.pth"
flt_data = "../data/flute_samples.pth"

In [None]:
for path in [vln_data, vlc_data, flt_data]:
    dataset = DDSPDataset(path)
    print(path, dataset.loudness.mean(), dataset.loudness.std())

In [None]:
def midi_to_hz(midi: float) -> float:
    return 440. * 2**((midi - 69) / 12)

def hz_to_midi(hz: float) -> float:
    return 12 * torch.log2(hz / 440) + 69

In [None]:
def adsr(
    ts: float, a: float, d: float, r:float, zero: float = 0., peak: float = 1., s: float = 0.5
) -> torch.Tensor:
    attack = torch.linspace(zero, peak, int(a * 250))
    decay = torch.linspace(peak, s, int(d * 250))
    sustain = torch.ones(int(ts * 250)) * s
    release = torch.linspace(s, zero, int(r * 250))

    env = torch.cat([attack, decay, sustain, release])[None, None, :].cuda()
    
    return env

In [None]:
def sin(ts: float, f: float):
    t = torch.arange(int(ts * 250), dtype=torch.float32, device='cuda') / 250
    result = torch.sin(2 * np.pi * f * t)
    
    return result

def sin_like(ts: torch.Tensor, f: float):
    t = torch.arange(ts.shape[-1], dtype=torch.float32, device='cuda') / 250
    result = torch.sin(2 * np.pi * f * t)
    
    return result

In [None]:
with torch.inference_mode():
    model = DDSP.load_from_checkpoint(flt_ckpt, map_location='cuda')
    model = model.to('cuda')
    model.eval()
    pass

In [None]:
# amp = adsr(2, 0.05, 0.07, 1, -100, -16, -46) + 2 * sin(3.117, 4)
zero, peak, sus, std = -100, -48+15*2, -48, 15
a, d, s, r = 0.1, 0.4, 1, 2

attack = torch.linspace(zero, peak, int(a*250), device='cuda')
decay = torch.linspace(peak, sus, int(d*250), device='cuda')
sustain = torch.linspace(sus, sus+std, int(s*250), device='cuda')
release = torch.linspace(sus+std, zero, int(r*250), device='cuda')
silence = torch.ones(int(4*250), device='cuda') * zero

amp = torch.cat([attack, decay, sustain, release, silence])

amp += torch.randn_like(amp) * 0.01
f0 = torch.ones_like(amp, device='cuda') * 59.
f0 += sin_like(f0, 4) * 0.125
f0[-6*250:] += 3
f0 = midi_to_hz(f0)

In [None]:
with torch.inference_mode():
    y = model(f0[None, None, :], amp[None, None, :])

_y = y.cpu().numpy().squeeze()

Audio(data=_y, rate=48000)