# Binaural Panner using SOFA file

In [None]:
!pip install pysofaconventions numpy scipy soundfile

In [None]:
!pip install h5py

In [None]:
import h5py

# Load the SOFA file
sofa_path = "mit_kemar_normal_pinna.sofa"
with h5py.File(sofa_path, 'r') as sofa_file:
    # Inspect available keys
    print("Keys in the file:", list(sofa_file.keys()))
    
    # Access impulse response data (HRIR)
    hrir = sofa_file['Data.IR'][:]
    print(f"HRIR shape: {hrir.shape}")

    # Access source positions (azimuth, elevation)
    source_positions = sofa_file['SourcePosition'][:]
    print(f"Source Positions: {source_positions}")

    # Sampling rate
    sampling_rate = sofa_file['Data.SamplingRate'][:]
    print(f"Sampling Rate: {sampling_rate}")


In [11]:
import numpy as np
import soundfile as sf
from scipy.signal import fftconvolve
import h5py

# Load HRIR dataset
sofa_file = "mit_kemar_normal_pinna.sofa"  # Update with your SOFA file path
with h5py.File(sofa_file, "r") as sofa:
    # Extract source positions (Azimuth, Elevation, and Distance)
    source_positions = sofa["SourcePosition"][:]
    
    # Extract HRIR data (Impulse Responses)
    hrir_data = sofa["Data.IR"][:]
    
    # Sampling rate
    sampling_rate = int(sofa["Data.SamplingRate"][0])

# Load input audio
input_audio, fs = sf.read("lil-baby-style-dark-trap-piano_135bpm_A#_minor.wav")  # Update with your input file
if len(input_audio.shape) > 1:
    input_audio = np.mean(input_audio, axis=1)  # Convert to mono if stereo

# Check if input audio sampling rate matches HRIR sampling rate
if fs != sampling_rate:
    raise ValueError(f"Sampling rate mismatch: Audio ({fs} Hz) vs HRIR ({sampling_rate} Hz)")

# Define panning parameters
azimuth = 280  # Azimuth angle in degrees
elevation = 50  # Elevation angle in degrees

# Find the closest HRIR based on azimuth and elevation
# Extract azimuth and elevation from source positions
azimuths = source_positions[:, 0]
elevations = source_positions[:, 1]

# Compute distances to target azimuth and elevation
distances = np.sqrt((azimuths - azimuth) ** 2 + (elevations - elevation) ** 2)
closest_idx = np.argmin(distances)

# Extract HRIRs for left and right ears
hrir_left = hrir_data[closest_idx, 0, :]
hrir_right = hrir_data[closest_idx, 1, :]

# Convolve input audio with HRIRs
output_left = fftconvolve(input_audio, hrir_left, mode="same")
output_right = fftconvolve(input_audio, hrir_right, mode="same")

# Combine into stereo output
output_audio = np.column_stack((output_right,output_left))

# Save the output audio
sf.write("output_binaural_audio50.wav", output_audio, fs)

# Print confirmation
print("Binaural audio saved as 'output_binaural_audio.wav'")


Binaural audio saved as 'output_binaural_audio.wav'


In [6]:
import numpy as np
import soundfile as sf
from scipy.signal import fftconvolve
import h5py
import tkinter as tk
from tkinter import filedialog, messagebox
from tkinter import ttk

# Function to load and process audio
def process_audio():
    try:
        # Get file paths from UI inputs
        sofa_file = sofa_file_path.get()
        audio_file = audio_file_path.get()

        # Load HRIR dataset
        with h5py.File(sofa_file, "r") as sofa:
            # Extract source positions (Azimuth, Elevation, and Distance)
            source_positions = sofa["SourcePosition"][:]
            # Extract HRIR data (Impulse Responses)
            hrir_data = sofa["Data.IR"][:]
            # Sampling rate
            sampling_rate = int(sofa["Data.SamplingRate"][0])

        # Load input audio
        input_audio, fs = sf.read(audio_file)
        if len(input_audio.shape) > 1:
            input_audio = np.mean(input_audio, axis=1)  # Convert to mono if stereo

        # Check if input audio sampling rate matches HRIR sampling rate
        if fs != sampling_rate:
            raise ValueError(f"Sampling rate mismatch: Audio ({fs} Hz) vs HRIR ({sampling_rate} Hz)")

        # Get azimuth and elevation from the sliders
        azimuth = azimuth_slider.get()
        elevation = elevation_slider.get()

        # Find the closest HRIR based on azimuth and elevation
        azimuths = source_positions[:, 0]
        elevations = source_positions[:, 1]
        distances = np.sqrt((azimuths - azimuth) ** 2 + (elevations - elevation) ** 2)
        closest_idx = np.argmin(distances)

        # Extract HRIRs for left and right ears
        hrir_left = hrir_data[closest_idx, 0, :]
        hrir_right = hrir_data[closest_idx, 1, :]

        # Convolve input audio with HRIRs
        output_left = fftconvolve(input_audio, hrir_left, mode="same")
        output_right = fftconvolve(input_audio, hrir_right, mode="same")

        # Combine into stereo output
        output_audio = np.column_stack((output_right, output_left))

        # Save the output audio
        output_file = filedialog.asksaveasfilename(defaultextension=".wav", filetypes=[("WAV Files", "*.wav")])
        if output_file:
            sf.write(output_file, output_audio, fs)
            messagebox.showinfo("Success", f"Binaural audio saved as '{output_file}'")
    
    except Exception as e:
        messagebox.showerror("Error", str(e))

# Function to browse and select a SOFA file
def browse_sofa():
    file_path = filedialog.askopenfilename(filetypes=[("SOFA Files", "*.sofa")])
    if file_path:
        sofa_file_path.set(file_path)

# Function to browse and select an audio file
def browse_audio():
    file_path = filedialog.askopenfilename(filetypes=[("WAV Files", "*.wav")])
    if file_path:
        audio_file_path.set(file_path)

# Set up the Tkinter window
root = tk.Tk()
root.title("Binaural Panner")

# Define input fields and buttons
sofa_file_path = tk.StringVar()
audio_file_path = tk.StringVar()

# Sofa file selection
tk.Label(root, text="Select SOFA file:").grid(row=0, column=0, padx=10, pady=5)
tk.Entry(root, textvariable=sofa_file_path, width=40).grid(row=0, column=1, padx=10, pady=5)
tk.Button(root, text="Browse", command=browse_sofa).grid(row=0, column=2, padx=10, pady=5)

# Audio file selection
tk.Label(root, text="Select Audio file:").grid(row=1, column=0, padx=10, pady=5)
tk.Entry(root, textvariable=audio_file_path, width=40).grid(row=1, column=1, padx=10, pady=5)
tk.Button(root, text="Browse", command=browse_audio).grid(row=1, column=2, padx=10, pady=5)

# Azimuth and Elevation sliders
tk.Label(root, text="Azimuth (degrees):").grid(row=2, column=0, padx=10, pady=5)
azimuth_slider = tk.Scale(root, from_=0, to=360, orient="horizontal")
azimuth_slider.grid(row=2, column=1, padx=10, pady=5)
azimuth_slider.set(0)  # Default value

tk.Label(root, text="Elevation (degrees):").grid(row=3, column=0, padx=10, pady=5)
elevation_slider = tk.Scale(root, from_=-90, to=90, orient="horizontal")
elevation_slider.grid(row=3, column=1, padx=10, pady=5)
elevation_slider.set(0)  # Default value

# Process button
tk.Button(root, text="Generate Binaural Audio", command=process_audio).grid(row=4, column=0, columnspan=3, pady=20)

# Start the Tkinter event loop
root.mainloop()

In [None]:
!pip install --upgrade pip
!pip install numpy scipy