In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [17, 6]
from IPython.display import Audio

In [None]:
import numpy as np
import torch
import torch.nn.functional as F
import scipy.signal
from librosa.display import waveshow
import librosa

In [None]:
from rt_ddsp.synths import FilteredNoise
from rt_ddsp import core

In [None]:
n_samples = 32000
sample_rate = 16000
n_bands = 100

# I might need to re-implement the `FilteredNoise` class

Everything required to implement one is below.

Also I might need to try a different overlapping windowing strategy. Just a flat (boxcar) window along the slice, with two halves of hann windows on the side. But hiss seems to be a waste of time now.

In [None]:
signal = np.random.uniform(low=-1.0, high=1.0, size=n_samples)

In [None]:
Audio(signal, rate=sample_rate, normalize=True)

In [None]:
plt.plot(signal[:1000])
plt.show()

In [None]:
def gaussian(x, mu, sig):
    return np.exp(-np.power(x - mu, 2.) / (2 * np.power(sig, 2.)))

In [None]:
def padded_hann(n):
    hann = np.hanning(80)
    l_pad = 1 + n * 80
    r_pad = 8000 - (n + 1) * 80
    return np.pad(hann, (l_pad, r_pad))

In [None]:
def padded_hann(n):
    hann = np.hanning(160)
    l_pad = n * 80 - 80
    if l_pad < 0:
        l_pad = 0
        hann = hann[80:]
    r_pad = 8000 - (n + 1) * 80
    if r_pad < 0:
        r_pad = 0
        hann = hann[:80]
    return np.pad(hann, (l_pad + 1, r_pad))

In [None]:
one_bands = [5, 6, 12, 20, 21, 22, 23, 24, 25]

In [None]:
fdg_filter = sum([padded_hann(i) for i in one_bands])

In [None]:
fdg_filter.shape

In [None]:
plt.plot(fdg_filter)
plt.show()

In [None]:
tdg_filter = np.fft.irfft(fdg_filter)
tdg_filter = np.fft.fftshift(tdg_filter)

In [None]:
plt.plot(tdg_filter)
plt.show()

In [None]:
signal = signal[:512]
signal.shape, tdg_filter.shape

In [None]:
padded_signal = np.pad(signal, (len(tdg_filter), len(tdg_filter)))
padded_signal.shape

In [None]:
result = np.convolve(padded_signal, tdg_filter, 'valid')
# result = result[8000:-8001]
result.shape

In [None]:
plt.plot(result[7500:-7500])
plt.show()

In [None]:
Audio(result, rate=sample_rate)

In [None]:
plt.plot(np.abs(np.fft.rfft(result)))
plt.show()

In [None]:
result_fft = np.abs(np.fft.rfft(result))
bands = np.array([np.sum(result_fft[i * (len(result_fft) // n_bands):(i + 1) * (len(result_fft) // n_bands)]) for i in range(n_bands)])

In [None]:
def cosine_similarity(x, y):
    return (x @ y) / (np.linalg.norm(x) * np.linalg.norm(y))

In [None]:
bands = []
for i in range(101):
    begin = max(0, 160 * i - 80)
    end = min(17999, 160 * (i+1) - 80)
    bands.append(np.mean(result_fft[begin:end]))
bands = np.array(bands)

controls = np.zeros(101)
for i in one_bands:
    controls[i] = 1.0

In [None]:
plt.bar(range(101), bands / 80)
plt.bar(range(101), controls)
plt.show()

In [None]:
cosine_similarity(bands, controls)

## Convert all of the above to pytorch

bicubic interpolation is 10x slower than linear. but windowing also takes as much as bicubic interpolation

In [None]:
controls = torch.rand(1, 1, 1, 100)
intr_bands = F.interpolate(controls, (1, 8001), mode='bicubic')

In [None]:
plt.bar(range(100), controls[0, 0, 0].numpy())

In [None]:
plt.plot(intr_bands[0, 0, 0].numpy())

In [None]:
fdg_filter = intr_bands[0, 0, 0].numpy()
controls = controls[0, 0, 0].numpy()

In [None]:
tdg_filter = np.fft.irfft(fdg_filter)
tdg_filter = np.fft.fftshift(tdg_filter)

In [None]:
plt.plot(tdg_filter)
plt.show()

In [None]:
signal.shape, tdg_filter.shape

In [None]:
padded_signal = np.pad(signal, (len(tdg_filter), len(tdg_filter)))

In [None]:
result = np.convolve(padded_signal, tdg_filter, 'valid')
result = result[8000:-8001]
result.shape

In [None]:
plt.plot(result[:1000])
plt.show()

In [None]:
Audio(result, rate=sample_rate)

In [None]:
result_fft = np.abs(np.fft.rfft(result))

In [None]:
plt.plot(np.linspace(0, 16001, 16001), result_fft / 230)
plt.plot(np.linspace(0, 16001, 8001), fdg_filter)
plt.show()

In [None]:
np.fft.rfftfreq(16000, 1/16000)

In [None]:
result_fft.shape

In [None]:
signal = torch.rand(1, 1, 1, 16000) * 2.0 - 1.0

In [None]:
plt.plot(signal.numpy()[0, 0, 0])

In [None]:
# windowed = signal.unfold(1, 2048, 512)
windowed = F.unfold(signal, kernel_size=(1, 2048), stride=(1, 512))

In [None]:
plt.plot(windowed.numpy()[0, :, 0])

In [None]:
rebuild = F.fold(windowed * torch.hann_window(2048)[None, :, None], (1, 16000), (1, 2048), stride=(1, 512)) / 2.0

In [None]:
plt.plot(rebuild[0, 0, 0])

In [None]:
plt.plot((rebuild[0, 0, 0] - signal.numpy()[0, 0, 0])[2048:-2048])