In [None]:
import librosa
import numpy as np
import plotly.express as px
import soundfile as sf
import torch
from IPython.display import Audio

from performer.models.ddsp_module import DDSP

In [None]:
VLC_CKPT = "../checkpoints/cello_longrun.ckpt"
FPS = 250
SAMPLE_RATE = 48000

In [None]:
with torch.inference_mode():
    model = DDSP.load_from_checkpoint(VLC_CKPT, map_location="cuda")
    model = model.to("cuda")
    model.eval()
    pass

In [None]:
def t(end, start=0.0):
    duration = end - start
    return np.linspace(start, end, int(np.round(duration * FPS)), dtype="float32")

In [None]:
def line(x1, x2, duration):
    return t(duration) * (x2 - x1) / duration + x1

In [None]:
dbs = []
freqs = []

# make segment
dur = 3.0
db = line(-70, -14, dur)
pitch = 29.0 * np.ones_like(db)
f0 = librosa.midi_to_hz(pitch)

# append to list of segments
dbs.append(db)
freqs.append(f0)

# make another segment
attack = line(-90, -20, 0.01)
decay = line(-20, -40, 0.24)
db = np.concatenate((attack, decay))
pitch = 29.0 * np.ones_like(db)
f0 = librosa.midi_to_hz(pitch)

# append to list of segments
for i in range(5):
    dbs.append(db)
    freqs.append(f0)

# concat all segments into one signal
db = np.concatenate(dbs)
f0 = np.concatenate(freqs)

# render audio
with torch.inference_mode():
    y = model(
        torch.from_numpy(f0[None, None, :]).cuda(), torch.from_numpy(db[None, None, :]).cuda()
    )

# play audio
Audio(data=y.cpu().squeeze(), rate=SAMPLE_RATE)