### Hybrid image source - FDN reverberation 
Since the FDN only gives a perceptually plausible late tail, for synthesizing an entire RIR, its output often gets combined
with direct paths and early reflections from a different room acoustics simulator. In this notebook, we will use the image-source method to get the impulse response upto 50ms, and then combine it with an output of the FDN using some cross-fading. This will give a more realistic synthesis of the RIR in a shoebox room.

In [None]:
import numpy as np
import soundfile as sf
import pyroomacoustics as pra
import matplotlib.pyplot as plt
from numpy.typing import NDArray
from loguru import logger
from scipy.signal.windows import hann
from scipy.signal import fftconvolve
from IPython.display import Audio
from pathlib import Path

from pyFDN.fdn import FDN
from pyFDN.delay_line import generate_coprime_delay_line_lengths
from pyFDN.feedback_matrix import FeedbackMatrixType
from pyFDN.utils import ms_to_samps, get_exponential_envelope

### Generate room impulse response with image-source method

In [None]:
# The desired reverberation time and dimensions of the room
fs = 48000
rt60 = 0.5  # seconds
room_dim = [9, 7.5, 3.5]  # meters

# We invert Sabine's formula to obtain the parameters for the ISM simulator
e_absorption, max_order = pra.inverse_sabine(rt60, room_dim)

# Create the room
room = pra.ShoeBox(
    room_dim, fs=fs, materials=pra.Material(e_absorption), max_order=20
)

# place the source in the room
room.add_source(position=[2.5, 3.73, 1.76])
# add the mic in the room
room.add_microphone([6.3, 4.8, 1.2])

room.image_source_model()
room.compute_rir()
im_rir = room.rir[0][0]
ir_len = len(im_rir)

time_vector = np.arange(0, ir_len/fs, 1.0/fs) 
time_constant = rt60 / np.log(1000)
max_peak_amp = np.max(im_rir)
exp_envelope = max_peak_amp * np.exp(-time_vector / time_constant)

# # plot the RIR between mic 1 and source 0
plt.plot(time_vector, im_rir)
plt.plot(time_vector, exp_envelope, 'k--')
plt.xlabel('Time')
plt.title('Image method RIR')

### Generate room impulse response with feedback delay network

In [None]:
# number of delay lines
N = 16
frame_size = 2**7
speed_sound = 345
max_dist_between_walls = max(room_dim)
min_dist_between_walls = min(room_dim)
max_delay_line_ms = (max_dist_between_walls) / speed_sound
min_delay_line_ms = (min_dist_between_walls) / speed_sound

# we want a binaural output
num_input = 1
num_output = 1

# input gains
b = np.random.randn(N, num_input)
# change coefficients of c so that the columns are orthonormal
c = np.random.randn(num_output,N)
direct_gain = np.zeros((num_output, num_input))

# delay lengths should be co-prime
# constrict delay range to be between 50 and 100ms
delay_range_ms = np.array([min_dist_between_walls, max_dist_between_walls])
delay_lengths = generate_coprime_delay_line_lengths(delay_range_ms, N, fs)
logger.info(f'The delay line lengths are {delay_lengths} samples')

# create an impulse
input_data = np.zeros((num_input, ir_len))
input_data[:, 0] = 1.0

# desired broadband T60
des_t60_ms = rt60 * 1e3

In [None]:
fdn = FDN(fs, num_input, num_output, N, frame_size)
fdn.init_io_gains(b, c)
fdn.init_direct_gain(direct_gain)
fdn.init_delay_line_lengths(delay_lengths)
fdn.init_feedback_matrix(FeedbackMatrixType.SCALAR_RANDOM)
fdn.init_absorption_gains(des_t60_ms)
fdn.init_delay_lines()

fdn_ir = fdn.process(input_data)

In [None]:
plt.figure()
plt.plot(time_vector, room.rir[0][0])
plt.plot(time_vector, fdn_ir.T)
plt.plot(time_vector, np.max(fdn_ir.T, axis=0) * exp_envelope, 'k--')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.title('FDN RIR')

### Get a hybrid RIR by taking the early reflections from IM and late reverb from FDN

In [None]:
def combine_rirs(early_rir: NDArray, 
                 late_rir: NDArray, 
                 fs: float,
                 mixing_time_ms: float = 50, 
                 xfade_length_ms: float = 20, 
                 time_axis:int = 0) -> NDArray:
    """
    Combine early and late responses from 2 different RIRs at mixing time with gain matching
    Args:
        early_rir : RIR from which early response will be taken
        late_rir : RIR from which late response will be taken
        fs: sampling frequency of the RIRs
        mixing_time_ms: time when the RIRs are mixed
        xfade_length_ms: length of cross-fade between the two
    Returns:
        NDArray: the combined RIR
    """
    assert early_rir.shape == late_rir.shape
    ir_len_samp = early_rir.shape[0]
    num_chans = early_rir.shape[1]

    if time_axis != 0:
        init_rir = init_rir.T
        late_rir = late_rir.T
        time_axis = 0
    
    mixing_time_samp = ms_to_samps(mixing_time_ms, fs)
    # create fade in and fade out windows
    window_length_samp = ms_to_samps(xfade_length_ms, fs)
    window = hann(2 * window_length_samp)
    fade_in_win = window[:window_length_samp]
    fade_out_win = window[window_length_samp:]
    fade_out_win = np.repeat(fade_out_win, num_chans).reshape(window_length_samp, num_chans)
    fade_in_win = np.repeat(fade_in_win, num_chans).reshape(window_length_samp, num_chans)

    # do gain matching at mixing time
    gain_at_mixing_time = np.max(early_rir, axis=time_axis) / np.max(late_rir, axis=time_axis)

    # truncate and fade out early response
    early_rir[mixing_time_samp - window_length_samp // 2:mixing_time_samp + window_length_samp // 2,:] *= fade_out_win
    early_rir[mixing_time_samp + window_length_samp // 2:, :] = np.zeros((ir_len_samp - (mixing_time_samp + window_length_samp // 2), num_chans))
    # truncate and fade in late response
    late_rir[mixing_time_samp - window_length_samp // 2:mixing_time_samp + window_length_samp // 2,:] *= fade_in_win
    late_rir[:mixing_time_samp - window_length_samp // 2, :] = np.zeros((mixing_time_samp - window_length_samp // 2, num_chans))

    combined_rir = early_rir + gain_at_mixing_time * late_rir
    return combined_rir
    

In [None]:
combined_rir = combine_rirs(im_rir[:, np.newaxis].copy(), fdn_ir.T.copy(), fs, xfade_length_ms=20)
plt.figure()
plt.plot(time_vector, combined_rir)
plt.plot(time_vector, np.max(combined_rir) * exp_envelope, 'k--')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.title('Hybrid RIR')

1. Convolve dry output with image method RIR
2. Convolve dry output with hybrid RIR

In [None]:
audio_path = Path('../../resources/audio/')
input_signal, fs = sf.read(str(audio_path / 'lexicon_dry15.wav'))
Audio(str(audio_path / 'lexicon_dry15.wav'))

In [None]:
#1.convolve with IM rir
output_im = fftconvolve(input_signal[:, 0], im_rir)
sf.write(str(audio_path / 'lexicon_mono_im.wav'), output_im, fs)
Audio(str(audio_path / 'lexicon_mono_im.wav'))

In [None]:
# 2. convolve with hybrid RIR
output_hyb = fftconvolve(input_signal[:, 0], np.squeeze(combined_rir))
sf.write(str(audio_path / 'lexicon_mono_hyb.wav'), output_hyb, fs)
Audio(str(audio_path / 'lexicon_mono_hyb.wav'))