In [None]:
%matplotlib inline
import torch

import numpy as np
from scipy import interpolate
import librosa
from soundfile import write
import music21

import random
from copy import deepcopy

from performer.models.ddsp_module import DDSP
from performer.datamodules.components.ddsp_dataset import DDSPDataset

from IPython.display import Audio, Image
from matplotlib import pyplot as plt

# import warnings
# warnings.filterwarnings("ignore")
# warnings.simplefilter("ignore")

In [None]:
us = music21.environment.UserSettings()
us['musescoreDirectPNGPath'] = '/usr/bin/musescore'

In [None]:
vln_ckpt = '../checkpoints/violin_longrun.ckpt'
vlc_ckpt = '../checkpoints/cello_longrun.ckpt'
flt_ckpt = '../checkpoints/flute_longrun.ckpt'

vln_data = "../data/violin_samples.pth"
vlc_data = "../data/cello_samples.pth"
flt_data = "../data/flute_samples.pth"

In [None]:
for path in [vln_data, vlc_data, flt_data]:
    dataset = DDSPDataset(path)
    print(path, dataset.loudness.mean(), dataset.loudness.std())

In [None]:
def midi_to_hz(midi: float) -> float:
    return 440. * 2**((midi - 69) / 12)

def hz_to_midi(hz: float) -> float:
    return 12 * torch.log2(hz / 440) + 69

def ratio_to_interval(ratio):
    return 12 * torch.log2(ratio)

In [None]:
def adsr(
    ts: float, a: float, d: float, r:float, zero: float = 0., peak: float = 1., s: float = 0.5
) -> torch.Tensor:
    attack = torch.linspace(zero, peak, int(a * 250))
    decay = torch.linspace(peak, s, int(d * 250))
    sustain = torch.ones(int(ts * 250)) * s
    release = torch.linspace(s, zero, int(r * 250))

    env = torch.cat([attack, decay, sustain, release])[None, None, :].cuda()
    
    return env

In [None]:
def sin(ts: float, f: float):
    t = torch.arange(int(ts * 250), dtype=torch.float32, device='cuda') / 250
    result = torch.sin(2 * np.pi * f * t)
    
    return result

def sin_like(ts: torch.Tensor, f: float):
    t = torch.arange(ts.shape[-1], dtype=torch.float32, device='cuda') / 250
    result = torch.sin(2 * np.pi * f * t)
    
    return result

In [None]:
with torch.inference_mode():
    model = DDSP.load_from_checkpoint(flt_ckpt, map_location='cuda')
    model = model.to('cuda')
    model.eval()
    pass

In [None]:
zero, peak, sus, std = -100, -48+15*3, -48, 15
a, d, s, r = 0.1, 0.9, 0.1, 0.01

attack = torch.linspace(zero, peak, int(a*250), device='cuda')
decay = torch.linspace(peak, sus, int(d*250), device='cuda')
sustain = torch.linspace(sus, sus-std, int(s*250), device='cuda')
release = torch.linspace(sus-std, zero, int(r*250), device='cuda')
silence = torch.ones(int(2.7*250), device='cuda') * zero

amp = torch.cat([attack, decay, sustain, release, silence])

amp += torch.randn_like(amp) * 0.01
amp += sin_like(amp, 4) * 1

In [None]:
constant = [2, 3, 5, 7, 11/2, 13/2, 17/4, 19/4]
# constant = [1, 2, 3/2, 4/3, 5/3, 5/4, 7/5, 8/5, 7/4, 6/5, 9/7, 11/8, 10/7, 11/7, 13/8, 12/7, 9/5]

rand = random.Random(123)
# constant = [1, 11/8, 11/7, 11/3]
def get_melody(c):
    # repeat = rand.randint(1, 3)
    repeat = 1
    ratios = np.array(c * repeat, dtype='float32')
    tt = np.linspace(0, 1, len(ratios), dtype='float32')
    ttt = np.linspace(0, 1, len(amp), dtype='float32')

    interp = interpolate.interp1d(tt, ratios, kind='nearest')

    factors = torch.from_numpy(interp(ttt)).cuda()

    changes = torch.cat([torch.tensor([0.], device='cuda'),torch.abs(factors[:1] - factors[1:])]) * 2
    f0 = torch.ones_like(amp, device='cuda') * 51.
    f0 += ratio_to_interval(factors)
    f0 = midi_to_hz(f0)
    
    return f0, changes, ratios

In [None]:
ys = []
for _ in range(2):
    oll = []
    for _ in range(8):
        rand.shuffle(constant)
        with torch.inference_mode():
            f0, changes, ratios = get_melody(constant)
            y = model(f0[None, None, :], amp[None, None, :])
            oll.append(y)

    ys.append(torch.cat(oll, dim=-1).cpu().numpy().squeeze())


f0 = midi_to_hz(torch.ones_like(amp, device='cuda') * 51-12)
oll = []
for _ in range(8):
    with torch.inference_mode():
        y = model(f0[None, None, :], amp[None, None, :] * (torch.randn(1, device='cuda') * 0.25 + 1))
        oll.append(y)

ys.append(torch.cat(oll, dim=-1).cpu().numpy().squeeze())

Audio(data=sum(ys), rate=48000, normalize=True)

In [None]:
Audio(data=ys[-1], rate=48000, normalize=False)

In [None]:
amp.shape

In [None]:
[(note := music21.note.Note(float(ratio_to_interval(torch.tensor(c))+51)), c, note.octave) for c in constant]

In [None]:
# yy = np.zeros_like(_y)

In [None]:
yy += _y

In [None]:
Audio(data=yy, rate=48000)

In [None]:
new_sorted = []
for c in sorted(constant):
    midi = ratio_to_interval(torch.tensor(c))
    while midi >= 12.:
        midi -= 12.
    new_sorted.append(midi*100)
    print(midi - midi.round(),
          music21.note.Note(midi.round()).pitch.name)
for f in sorted(new_sorted):
    print(f'{f:.2f}')

In [None]:
f0 = torch.ones_like(amp, device='cuda') * 440. * torch.from_numpy(factors).cuda()

In [None]:
sound = y + y2

In [None]:
Audio(data=sound.squeeze().cpu().numpy(), rate=48000)

In [None]:
write('1.wav', y.squeeze().T.cpu().numpy(), 48000)

In [None]:
write('2.wav', y2.squeeze().T.cpu().numpy(), 48000)

In [None]:
def show(music):
    display(Image(str(music.write("lily.png"))))

In [None]:
def to_str(microtone):
    return str(microtone)[1:-1]

In [None]:
def add_microtone(note):
    cents = note.pitch.microtone.cents
    prefix = ''
    if cents > 0:
        prefix = '+'
    if abs(cents) >= 10:
        note.addLyric(f'{prefix}{int(np.round(cents))}', applyRaw=True)

In [None]:
one.pitch.microtone.cents

In [None]:
one = music21.note.Note(quarterLength=0.5)
one.pitch.frequency = 440*11/7
add_microtone(one)

two = music21.note.Note(quarterLength=0.5)
two.pitch.frequency = 440*11/13
add_microtone(two)

rest = music21.note.Rest(4)

cres = music21.dynamics.Crescendo(one)

s = music21.stream.Score(id='mainScore')
p0 = music21.stream.Part(id='part0')

m01 = music21.stream.Measure(number=1)

m01.append(music21.dynamics.Dynamic('pp'))
m01.append(one)
m01.append(music21.dynamics.Dynamic('f'))
m01.append(two)
m01.append(cres)
m01.append(rest)

for i in range(10):
    mezura = deepcopy(m01)
    if i % 3 == 2:
        mezura.append(music21.layout.SystemLayout(isNew=True))
    p0.append(mezura)

tempo = music21.tempo.MetronomeMark(referent=1.0, number=90.0)
p0.measure(1).insert(tempo)
p0.insert(0, music21.meter.TimeSignature('5/4'))
s.insert(0, p0)
s.show()

In [None]:
one.seconds

In [None]:
scale = music21.scale.

In [None]:
verdi = music21.corpus.parse('verdi/laDonnaEMobile')
verdi.id = 'verdi'
verdi.show()