In [None]:
import sys
from pathlib import Path

try:
    sys.path.index(str(Path.cwd().parent / 'src'))
except ValueError:
    sys.path.insert(0, str(Path.cwd().parent / 'src'))

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [20, 10]
from IPython.display import Audio

In [None]:
import torch
import torch.nn.functional as F
import numpy as np
import librosa
from librosa.display import specshow

In [None]:
from models.modules.harmonic_oscillator import OscillatorBank

In [None]:
n_harmonics = 60
sample_rate = 16000
win_length = 2048
hop_length = 256
f0 = 1024

In [None]:
shit = OscillatorBank(n_harmonics, sample_rate, hop_length)

In [None]:
with torch.no_grad():
    audio = shit.forward(
        torch.ones(1, 1000, 1) * f0,
        torch.ones(1, 1000, 1) * -24.0,
        torch.ones(1, 1000, n_harmonics) / (torch.arange(1, n_harmonics + 1))
    )

In [None]:
Audio(data=audio, rate=sample_rate)

In [None]:
stft = np.abs(librosa.stft(audio[0].numpy(), win_length, hop_length))

In [None]:
specshow(librosa.amplitude_to_db(stft), sr=sample_rate, hop_length=hop_length)
plt.show()

- frequency is in samples per second
- nyquist of win_length divided by nyquist of sample_rate is the frequency conversion factor
- f0 * this_factor is the frequency term in fbsp kernel

In [None]:
def dft_kernel(n, k):
    return torch.exp(torch.tensor(-2j * np.pi * (k / win_length) * n, dtype=torch.complex64))

In [None]:
def fbsp_kernel(n, k, f0=f0*1024/8000):
    return torch.exp(torch.tensor(-2j * np.pi * (f0 * k / win_length) * n, dtype=torch.complex64))

In [None]:
kernel = np.fromfunction(dft_kernel, (2048, 2048))

In [None]:
fkernel = np.fromfunction(fbsp_kernel, (2048, 8))

In [None]:
tr = (0j + audio[:, 2048:4096]) @ fkernel

In [None]:
plt.bar(np.arange(8), torch.abs(tr[0]) / 2)

In [None]:
plt.plot(stft[:, 128])

In [None]:
ck = kernel.unsqueeze(1)

In [None]:
res = F.conv1d(audio.unsqueeze(0), ck, stride=hop_length)

In [None]:
audio.shape

In [None]:
stft.shape