In [None]:
import sys
from pathlib import Path

try:
    sys.path.index(str(Path.cwd().parent / 'src'))
except ValueError:
    sys.path.insert(0, str(Path.cwd().parent / 'src'))

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [20, 10]
from IPython.display import Audio

In [None]:
import torch
import torch.nn.functional as F
import numpy as np
import librosa
from librosa.display import specshow

In [None]:
from models.modules.harmonic_oscillator import OscillatorBank

In [None]:
n_harmonics = 60
sample_rate = 16000
win_length = 2048
hop_length = 256
f0 = 20.

In [None]:
shit = OscillatorBank(n_harmonics, sample_rate, hop_length)

In [None]:
with torch.no_grad():
    dist = torch.randn(1, 1000, n_harmonics)
    dist = (dist - dist.min()) / (dist.max() - dist.min())
    audio = shit.forward(
        torch.ones(1, 1000, 1) * f0,
        torch.ones(1, 1000, 1) * -24.0,
        torch.ones(1, 1000, n_harmonics) / (torch.arange(1, n_harmonics + 1))
        # dist
    )

In [None]:
Audio(data=audio, rate=sample_rate)

In [None]:
stft = np.abs(librosa.stft(audio[0].numpy(), win_length, hop_length))

In [None]:
specshow(librosa.amplitude_to_db(stft), sr=sample_rate, hop_length=hop_length)
plt.show()

- frequency is in cycles per sample
- nyquist of win_length divided by nyquist of sample_rate is the frequency conversion factor
- f0 * this_factor is the frequency term in fbsp kernel

In [None]:
def dft_kernel(n, k):
    return torch.exp(torch.tensor(-2j * np.pi * (k / win_length) * n, dtype=torch.complex64))

In [None]:
def selective_dft_kernel(n, k, f0=f0):
    return torch.exp(torch.tensor(-2j * np.pi * (f0 * k / sample_rate) * n, dtype=torch.complex64))

In [None]:
def fbsp_kernel(n, k, f0=f0, fb=1., m=1):
    return torch.sqrt(torch.tensor(fb, dtype=torch.float32)) * torch.tensor(fb * n / m, dtype=torch.float32) * torch.exp(torch.tensor(2j * np.pi * (f0 * k / sample_rate) * n, dtype=torch.complex64))

In [None]:
overtones = f0 * np.arange(1, n_harmonics + 1)
overtones = overtones[overtones < sample_rate / 2]
n_overtones = len(overtones)

In [None]:
kernel = np.fromfunction(dft_kernel, (win_length, win_length))
fkernel = np.fromfunction(fbsp_kernel, (win_length, n_overtones))
skernel = np.fromfunction(selective_dft_kernel, (win_length, n_overtones))

In [None]:
tr = (0j + audio[:, :win_length]) @ fkernel

In [None]:
plt.bar(np.arange(n_overtones), torch.abs(tr[0]))

In [None]:
plt.plot(stft[:, 0])