In [None]:
import numpy as np
import torch
import re
import os
import matplotlib.pyplot as plt
import pyfar as pf
import soundfile as sf
import librosa

from pathlib import Path
from numpy.typing import ArrayLike, NDArray
from typing import Optional, List
import IPython
from loguru import logger
from copy import deepcopy

os.chdir('../..')  # This changes the working directory to DiffGFDN

from diff_gfdn.config.config import DiffGFDNConfig
from diff_gfdn.config.config_loader import load_and_validate_config
from diff_gfdn.inference import infer_all_octave_bands_directional_fdn
from diff_gfdn.utils import ms_to_samps
from spatial_sampling.dataloader import parse_three_room_data

from src.sofa_parser import HRIRSOFAReader
from src.sound_examples import binaural_dynamic_rendering

In [None]:
out_path = Path('output/directional_fdn/').resolve()
audio_path = Path('output/sound_examples').resolve()
config_path = Path('data/config/directional_fdn').resolve()
room_data_pkl_path = Path('resources/Georg_3room_FDTD/srirs_spatial.pkl').resolve()

# get the original dataset
room_data = parse_three_room_data(room_data_pkl_path)

freqs_list = [63, 125, 250, 500, 1000, 2000, 4000, 8000]
grid_res_m = 0.9
save_dir = f'/treble_data_directional_fdn_grid_res={grid_res_m:.1f}m'

### Create a trajectory of a listener moving across the space

In [None]:
# along x axis between three rooms
start_pos_x, start_pos_y = (0.5, 3.5)
end_pos_x, end_pos_y = (9, 3.5)
num_pos = 50
head_orientation_az = np.deg2rad(np.linspace(200, 30, num_pos))
head_orientation_el = np.deg2rad(np.zeros(num_pos))

linear_trajectory_x = np.linspace(start_pos_x, end_pos_x, num_pos)
linear_trajectory_y = np.linspace(start_pos_y, end_pos_y, num_pos)
linear_trajectory_z = 1.5 * np.ones(num_pos)

rec_pos_list = np.zeros((num_pos, 3))
rec_pos_list[:, 0] = linear_trajectory_x
rec_pos_list[:, 1] = linear_trajectory_y
rec_pos_list[:, 2] = linear_trajectory_z
orientation_list = np.zeros((num_pos, 2))
orientation_list[:, 0] = head_orientation_az
orientation_list[:, 1] = head_orientation_el

# along y-axis between rooms 2 and 3
start_pos_x, start_pos_y = (9.1, 3.5)
end_pos_x, end_pos_y = (9.0, 12.0)
num_pos = 68

linear_trajectory_x = np.linspace(start_pos_x, end_pos_x, num_pos)
linear_trajectory_y = np.linspace(start_pos_y, end_pos_y, num_pos)
linear_trajectory_z = 1.5 * np.ones(num_pos)
head_orientation_az = np.deg2rad(np.linspace(30, 150, num_pos))
head_orientation_el = np.deg2rad(np.zeros(num_pos))

rec_pos_list = np.vstack((rec_pos_list, np.vstack((linear_trajectory_x, linear_trajectory_y, linear_trajectory_z)).T))
head_orientation_list = np.vstack((orientation_list, np.vstack((head_orientation_az, head_orientation_el)).T))

### Create config dicts

In [None]:
config_dicts = []

for k in range(len(freqs_list)):
    config_name = f'/treble_data_grid_training_{freqs_list[k]}Hz_directional_fdn_grid_res={grid_res_m:.1f}m.yml'
    cur_config_dict = load_and_validate_config(str(config_path) + config_name, DiffGFDNConfig)
    config_dicts.append(cur_config_dict)

### Get original and synthesised late reverberation by DiffDirectionalFDN

In [None]:
# get the synthesised late tail
synth_dfdn_room_data = infer_all_octave_bands_directional_fdn(freqs_list, 
                                                              config_dicts, 
                                                              str(out_path) + save_dir, 
                                                              room_data,
                                                              rec_pos_list,
                                                              )

In [None]:
rir = synth_dfdn_room_data.rirs[54, ...]
plt.plot(rir.T)

### Get the mono, dry stimulus and resample it

In [None]:
sig_type = 'speech'

speech_data = pf.signals.files.drums() if sig_type == 'drums' else pf.signals.files.speech()
speech = np.squeeze(speech_data.time)
fs = speech_data.sampling_rate
new_fs = int(synth_dfdn_room_data.sample_rate)

if fs != new_fs:
    speech = librosa.resample(speech, orig_sr = fs, target_sr = new_fs)

# add some silence at the end
silence = np.zeros(ms_to_samps(500, new_fs))
speech_app = np.concatenate((speech, silence))
IPython.display.Audio(speech_app, rate=new_fs)

### Load the HRTF dataset

In [None]:
hrtf_path = Path('resources/HRTF/48kHz/KEMAR_Knowl_EarSim_SmallEars_FreeFieldComp_48kHz.sofa')
hrtf_reader = HRIRSOFAReader(hrtf_path)

### Create binaural example

In [None]:
update_ms = 250 #should be a factor of 1s
ani_save_path = Path(f'{out_path}/sound_examples/treble_data_binaural').resolve()

dynamic_renderer = binaural_dynamic_rendering(synth_dfdn_room_data, 
                                             rec_pos_list, 
                                             head_orientation_list, 
                                             speech_app, 
                                             hrtf_reader, 
                                             update_ms=update_ms)
dynamic_renderer.animate_moving_listener(ani_save_path)

# cross-fading convolution with the reference set of RIRs
pred_output = dynamic_renderer.binaural_filter_overlap_add()

pred_output_norm = dynamic_renderer.normalise_loudness(pred_output, synth_dfdn_room_data.sample_rate, db_lufs=-24)
save_path = Path(f'{out_path}/sound_examples/binaural_directional_fdn_grid_res={grid_res_m:.1f}_moving_listener_{sig_type}.wav').resolve()
sf.write(save_path, pred_output_norm, int(synth_dfdn_room_data.sample_rate)) 
IPython.display.Audio(save_path)

dynamic_renderer.combine_animation_and_sound(f'{ani_save_path}_moving_listener.mp4', f'{save_path}', 
                                             f'{ani_save_path}_directional_fdn_grid_res={grid_res_m:.1f}_{sig_type}')