In [None]:
import sys
from pathlib import Path

try:
    sys.path.index(str(Path.cwd().parent / 'src'))
except ValueError:
    sys.path.insert(0, str(Path.cwd().parent / 'src'))

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [20, 10]
from IPython.display import Audio

In [None]:
import torch
import torch.nn.functional as F
import numpy as np
import librosa
from librosa.display import specshow
from librosa.filters import get_window
import torchcrepe

In [None]:
from models.modules.harmonic_oscillator import OscillatorBank
from models.modules.loss import MorletTransform
from models.modules.utils import pad_audio, get_frames

In [None]:
n_harmonics = 60
sample_rate = 16000
win_length = 1024
hop_length = 64
f0 = 440.
dur = 500

In [None]:
osc = OscillatorBank(n_harmonics, sample_rate, hop_length)

In [None]:
morlet_transform = MorletTransform(sample_rate, win_length, n_harmonics)

In [None]:
with torch.no_grad():
    dist = torch.abs(torch.randn(1, 1, n_harmonics))
    dist = torch.tile(dist, (1, dur, 1))
    
    # dist = torch.zeros(1, dur, n_harmonics)
    # dist[..., 0] = 1.0
    
    # dist = 61. - torch.arange(1, 61)
    
    # dist = torch.ones(1, dur, 60)
    
    dist /= dist.sum(-1, keepdim=True)
    
    amp = 1.0
    
    freq = (torch.sin(torch.linspace(0, hop_length * dur / sample_rate, dur) * 3.14159265 * 0.5).unsqueeze(0).unsqueeze(-1) + 2) * f0 / 3
    
    # freq = torch.ones(1, dur, 1) * f0
    audio = osc(
        freq,
        torch.ones(1, dur, 1) * amp,
        dist
    )

In [None]:
Audio(data=audio[0].T, rate=sample_rate, normalize=False)

In [None]:
stft = np.abs(librosa.stft(audio[0, ...].numpy(), win_length, hop_length))

In [None]:
specshow(librosa.amplitude_to_db(stft), sr=sample_rate, hop_length=hop_length)
plt.show()

- frequency is in cycles per sample
- nyquist of win_length divided by nyquist of sample_rate is the frequency conversion factor
- f0 * this_factor is the frequency term in fbsp kernel
- given crepe pitch, learn inharmonicity factor by maximizing real sound's total energy in this new transform

In [None]:
padded_audio = pad_audio(audio, win_length, hop_length)

In [None]:
framed_audio = get_frames(padded_audio, win_length, hop_length)

In [None]:
new_dist, amp = morlet_transform(framed_audio, freq)

In [None]:
plt.imshow(new_dist[0].flip(1).T)

In [None]:
with torch.no_grad():
    # freq = torch.ones(2, dur, 1) * f0
    new_audio = osc(
        freq,
        amp.unsqueeze(-1),
        new_dist
    ).unsqueeze(-1)

In [None]:
Audio(data=new_audio[0].T, rate=sample_rate, normalize=False)

In [None]:
path = '/home/kureta/Music/violin/Violin Samples/yee_bach_passion#25.wav'

In [None]:
violin = librosa.load(path, sr=sample_rate)[0][:sample_rate*4]

In [None]:
Audio(data=violin, rate=sample_rate, normalize=False)

In [None]:
# add batch dimension
padded_violin = pad_audio(torch.from_numpy(violin).unsqueeze(0), win_length, hop_length)

In [None]:
freqs = torchcrepe.predict(padded_violin, sample_rate, hop_length, decoder=torchcrepe.decode.weighted_argmax, pad=False, device='cuda')

In [None]:
violin_frames = get_frames(padded_violin, win_length, hop_length)
# hann = torch.hann_window(win_length)
# windowed_frames = torch.einsum('btn,n->btn', violin_frames, hann)

In [None]:
new_dist, amp = morlet_transform(violin_frames, freqs.unsqueeze(-1))

In [None]:
plt.imshow(librosa.amplitude_to_db(new_dist[0, :100].flip(1).T))

In [None]:
plt.plot(amp[0, :])

In [None]:
with torch.no_grad():
    # freq = torch.ones(2, dur, 1) * f0
    new_audio = osc(
        freqs.unsqueeze(-1),
        amp.unsqueeze(-1),
        new_dist
    ).unsqueeze(-1)

In [None]:
Audio(data=new_audio[0].T, rate=sample_rate, normalize=False)

In [None]:
morlet_wavelet = morlet_transform.generate_morlet_matrix(torch.ones(1, 1, 1) * 440.)

plt.plot(morlet_wavelet[0, 0, 0].real)
plt.plot(morlet_wavelet[0, 0, 0].imag)

In [None]:
# noise bands center bin and bandwidth calculations
for n in range(100):
   #  print(80 * n, 80 * n + 40, 80 * n + 80)
    pass