# HW2 - Speech Enhancement in Reverberant Environments

**Q1(a):** Generate Room Impulse Responses (RIRs) and visualize them in the time domain for different reverberation times.

## Setup and Imports

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import sys

# Add rir-generator to path
sys.path.insert(0, './rir-generator/src')
import rir_generator as rir

np.random.seed(42)

## Simulation Parameters

In [None]:
# Room configuration
room_dim = [4.0, 5.0, 3.0]  # meters
c = 343.0  # speed of sound (m/s)
fs = 16000  # sample rate (Hz)

# Reverberation times to test
t60_ms_list = [150, 300]  # milliseconds

# Microphone array: 5-element ULA along x-axis, centered at [2, 1, 1.7]
n_mics = 5
d_mic = 0.05  # 5 cm spacing
center = np.array([2.0, 1.0, 1.7])

# Build mic positions: indices -2, -1, 0, 1, 2
mic_positions = []
for k in range(-2, 3):
    pos = center.copy()
    pos[0] += k * d_mic
    mic_positions.append(pos)
mic_positions = np.array(mic_positions)

# Source at 30 degrees, 1.5m from center
theta_src = np.deg2rad(30)
r_src = 1.5
source_pos = np.array([
    center[0] + r_src * np.cos(theta_src),
    center[1] + r_src * np.sin(theta_src),
    center[2]
])

print(f"Room: {room_dim} m")
print(f"Mic array center: {center}")
print(f"Mic positions (x): {mic_positions[:, 0]}")
print(f"Source position: {source_pos}")

## Generate RIRs for Different T60 Values

In [None]:
# Generate RIRs for each reverberation time
rir_dict = {}

for t60_ms in t60_ms_list:
    t60 = t60_ms / 1000.0  # convert to seconds
    n_samples = int(np.ceil(t60 * fs))  # as specified: nsample = T60 * fs
    
    # Generate RIR using rir_generator
    h = rir.generate(
        c=c,
        fs=fs,
        r=mic_positions,
        s=source_pos,
        L=room_dim,
        reverberation_time=t60,
        nsample=n_samples
    )
    rir_dict[t60_ms] = h
    print(f"T60={t60_ms}ms: RIR shape = {h.shape} (samples x mics)")

## Plot RIRs in Time Domain (First Microphone)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

for idx, t60_ms in enumerate(t60_ms_list):
    h = rir_dict[t60_ms]
    h_mic1 = h[:, 0]  # first microphone
    t = np.arange(len(h_mic1)) / fs * 1000  # time in ms
    
    axes[idx].plot(t, h_mic1, linewidth=0.7)
    axes[idx].set_xlabel('Time (ms)')
    axes[idx].set_ylabel('Amplitude')
    axes[idx].set_title(f'RIR - T60 = {t60_ms} ms')
    axes[idx].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Q1(b): Generate Measured Signals via Convolution

In [None]:
import scipy.signal as sig
import librosa

# Load a speech sample from LibriSpeech
speech_file = './speech_data/3081-166546-0000.flac'
speech, _ = librosa.load(speech_file, sr=fs)

print(f"Speech signal: {len(speech)} samples ({len(speech)/fs:.2f} sec)")

In [None]:
# Convolve speech with RIRs to create multichannel "clean" reverberant signals
clean_signals = {}

for t60_ms, h in rir_dict.items():
    n_samples_rir, n_mics = h.shape
    out_len = len(speech) + n_samples_rir - 1
    
    # Convolve speech with each mic's RIR
    y = np.zeros((n_mics, out_len))
    for m in range(n_mics):
        y[m] = sig.fftconvolve(speech, h[:, m])
    
    clean_signals[t60_ms] = y
    print(f"T60={t60_ms}ms: output shape = {y.shape} (mics x samples)")

In [None]:
# Plot measured signal at first microphone for both T60 values
fig, axes = plt.subplots(2, 1, figsize=(12, 5), sharex=True)

for idx, t60_ms in enumerate(t60_ms_list):
    y = clean_signals[t60_ms]
    t = np.arange(y.shape[1]) / fs
    
    axes[idx].plot(t, y[0], linewidth=0.5)
    axes[idx].set_ylabel('Amplitude')
    axes[idx].set_title(f'Reverberant Speech - T60 = {t60_ms} ms (Mic 1)')
    axes[idx].grid(True, alpha=0.3)

axes[1].set_xlabel('Time (s)')
plt.tight_layout()
plt.show()