In [None]:
import numpy as np
from pathlib import Path
import soundfile as sf
from scipy.signal import chirp, spectrogram
import matplotlib.pyplot as plt
# plt.rcParams['text.usetex'] = True
import IPython
from tqdm import tqdm

import os, sys
path = os.path.abspath('../.')
sys.path.append(path)

In [None]:
from velvet import *
from allpass import *
from utils import ms_to_samps, db, estimate_onsets_log_threshold
from widener import *
from hrtf_widener import *
from plot import *
from interaural_cues import itd_maxiacc
save_flag = False

### Helper functions

In [None]:
def plot_spectrogram(signal: np.ndarray, fs:float, title: str, nfft:int=2**10, axis:Optional[int]=None):
    if axis is None:
        ff, tt, Sxx = spectrogram(signal, fs=fs, window='hann', nfft=nfft)
    else:
        ff, tt, Sxx = spectrogram(signal, fs=fs, window='hann', nfft=nfft, axis=axis)
    if Sxx.ndim == 3:
        fig, ax = plt.subplots(2,1)
        for num_sig in range(Sxx.shape[1]):
            ax[num_sig].pcolormesh(tt, ff, np.squeeze(Sxx[:,num_sig,:]), shading='gouraud')
            ax[num_sig].set_xlabel('t (sec)')
            ax[num_sig].set_ylabel('Frequency (Hz)')
            ax[num_sig].grid(True)
        ax[0].set_title(title)

    else:
        fig, ax = plt.subplots()
        plt.pcolormesh(tt, ff, Sxx, shading='gouraud')
        ax.set_title(title)
        ax.set_xlabel('t (sec)')
        ax.set_ylabel('Frequency (Hz)')
        ax.grid(True)
    plt.show()

#### Create a chirp signal and duplicate it

In [None]:
fs = 48000
input_len_sec = 2.0
time_vec = np.arange(0, input_len_sec, 1.0/fs)
input_signal = chirp(time_vec, f0=20, t1=input_len_sec, f1=20000, method='logarithmic')
input_signal_stereo = np.vstack((input_signal, input_signal))

# plot, and listen to chirp
plot_spectrogram(input_signal_stereo.T, fs, 'Input chirp', axis=0)
sf.write('../../audio/input_chirp.wav', input_signal_stereo.T, fs)
IPython.display.Audio('../../audio/input_chirp.wav')


### Pass chirp through HRTF based stereo widener and plot the ICC as a function of speaker separation angle = $[0, \pi/2]$

There is no point plotting the ICC between the left and the right output channels here, as they will always be identical.

In [None]:
num_freq_samples = 2**9
num_time_samples = 2**9
input_signal = np.zeros((int(0.5*fs), 2))
input_signal[0,:] = np.ones(2)

# input_signal, fs = sf.read('../../../Examples/original.mp3')
# save_flag = True

hrtf_widener = HRTFStereoWidener(sample_rate=fs, 
                                 azimuth_range=(-90,90), 
                                 num_freq_points=num_freq_samples, 
                                 num_time_samples=num_time_samples,
                                 head_radius=0.085)
num_beta = 5
beta = np.linspace(0, np.pi/2, num_beta)
itds = np.zeros(num_beta)
max_iacc = np.zeros(num_beta)
bands_per_octave=3
iac_freqs = pf.dsp.filter.fractional_octave_frequencies(num_fractions=bands_per_octave, 
                                            frequency_range=(20, fs/2.0), 
                                            return_cutoff=False)[0]
num_iac_freqs = len(iac_freqs)
hrtf_set = hrtf_widener.hrtf_set
res_tup = np.array([itd_maxiacc(np.squeeze(hrtf_set.hrirs[k, ...]), fs, time_axis=0, ear_axis=1) 
                     for k in range (hrtf_widener.num_orientations)])
itds_set = res_tup[:, 0]
def find_closest_itd(all_itds,fs, all_doas, des_doa):
    closest_idx = np.argmin(np.abs(all_doas - des_doa))
    return all_itds[closest_idx] * fs

fig, ax = plt.subplots(figsize=(6, 4))
for k in tqdm(range(num_beta)):
    hrtf_widener.update_speaker_angle(np.degrees(beta[k]))
    output_signal = hrtf_widener.process(input_signal.copy())
    closest_itd = find_closest_itd(itds_set, fs, hrtf_set.doa, np.degrees(beta[k]))
    onset = estimate_onsets_log_threshold(output_signal[:, 0], axis=0)
    output_signal = np.roll(output_signal, -onset, axis=0) 

    plt.subplot(num_beta,1,k+1)
    plt.plot(output_signal[:num_time_samples, :])
    plt.vlines([0, closest_itd], 0, 1, 'k', linestyles='dashed')

    # if save_flag:
    #     sf.write(f'../../../Examples/hrtf_spk_angle={np.degrees(beta[k])}.wav', output_signal, fs)

if save_flag:
    plt.savefig('../../figures/hrtf_stereo_widener_IRs.png', dpi=300)


### Get decorrelated signals with velvet noise filters

In [None]:
opt_vn_path = Path('../../../Resources/opt_vn_filters.txt')
vn_output_signal = process_velvet(input_signal_stereo, fs, opt_vn_path)

# plot and listen to output
plot_spectrogram(vn_output_signal, fs, 'VN stereo chirps', axis=0)
sf.write('../../audio/vn_chirp.wav', vn_output_signal, fs)
IPython.display.Audio('../../audio/vn_chirp.wav')

### Get decorrelated signals with allpass filters

In [None]:
allpass_output_signal = process_allpass(input_signal_stereo, fs, num_biquads=200)
plot_spectrogram(allpass_output_signal, fs, 'Allpass stereo chirps', axis=0)

# plot and listen to output
sf.write('../../audio/allpass_chirp.wav', allpass_output_signal, fs)
IPython.display.Audio('../../audio/allpass_chirp.wav')

### Pass through stereo widener for different values of beta


In [None]:
decorr_type = DecorrelationType.ALLPASS
beta_init = 0
stereo_widener = StereoWidenerBroadband(input_signal_stereo, fs, decorr_type, beta_init)

num_beta = 100
beta = np.linspace(0, np.pi/2, num_beta)
decorr_correlation_coeffs = np.zeros(num_beta)
for k in range(num_beta):
    stereo_widener.update_beta(beta[k])
    output_signal = stereo_widener.process()
    decorr_correlation_coeffs[k] = stereo_widener.calculate_correlation(output_signal)

fig, ax = plt.subplots(figsize=(6, 4))
ax.plot(beta / (np.pi / 2), decorr_correlation_coeffs)
ax.set_ylim([0, 1])
ax.set_ylabel('Correlation coefficient')
ax.set_xlabel('Normalised angle')
ax.set_xlim([0, 1])
plt.grid()
if save_flag:
    plt.savefig('../../figures/beta_vs_correlation.png', dpi=300)

### Pass stereo widener through frequency based architecture and plot IC

In [None]:
beta = [(0, np.pi/2), (np.pi/2, 0)]
num_beta = len(beta)
filterbank_type = FilterbankType.AMP_PRESERVE
cutoff_freq = 250
bands_per_octave=3
num_iter = 100
iac_freqs = pf.dsp.filter.fractional_octave_frequencies(num_fractions=bands_per_octave, 
                                            frequency_range=(20, fs/2.0), 
                                            return_cutoff=False)[0]
num_freqs = len(iac_freqs)
icc_vector = np.zeros((num_iter, num_beta, num_freqs))

for iter in tqdm(range(num_iter)):
    stereo_widener = StereoWidenerFrequencyBased(input_signal_stereo, fs, filterbank_type, decorr_type, (0,0), cutoff_freq)
    for k in range(num_beta):
        stereo_widener.update_beta(beta[k])
        output_signal = stereo_widener.process()
        icc_vector[iter, k, :], _ = stereo_widener.calculate_interchannel_coherence(output_signal)
    del stereo_widener

In [None]:
plt.rc('text', usetex=False)
semiaudplot(iac_freqs, np.median(icc_vector, axis=0), marker='*', interp=False)
plt.ylabel('Coherence')
plt.ylim([0, 1.01])
plt.vlines(cutoff_freq, 0, 1.01, colors= 'k', linestyles='dashed')
plt.legend(["beta_{low} = 0, beta_{high} = pi/2", "beta_{low} = pi/2, beta_{high} = 0"], loc="upper right")
plt.grid()
plt.savefig('../../figures/stereo_widener_iac.png', dpi=300)