# Perceptual optimization

#### Import required libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
from scipy.signal import spectrogram

#### Visualize Masking Effects

##### **Step 1. Load the Audio File*
Load a provided stereo or mono audio file in `.wav` format. Ensure that the file is in a supported format and contains clear audio for visualization purposes.

##### **Step 2. Compute the Spectrogram of the Audio Signal*
The spectrogram provides a visual way to analyze how the signal’s frequency content changes over time, allowing for a clearer view of both high and low energy regions in the audio.

##### **Step 3. Define and Apply a Masking Threshold**
- A simple masking threshold is defined using a constant multiplier (`masking_constant`) and applied directly to the spectrogram’s power data to simulate masking effects.
- By applying `np.sqrt(spectrogram_data)` scaled by `masking_constant`, a masking threshold is generated that approximates where high-energy frequencies can mask quieter, nearby frequencies.
- This threshold can be adjusted with different constants to demonstrate varying levels of masking effect strength.

##### **Step 4. Plot the Spectrogram with the Masking Threshold Overlay*
The spectrogram provides a visual way to analyze how the signal’s frequency content changes over time, allowing for a clearer view of both high and low energy regions in the audio.


In [None]:
# Step 1: Load the audio file
sample_rate, signal = wavfile.read('audio_samples_06/FILE_NAME.wav')  # Use appropriate file path

if signal.ndim > 1:
    signal = signal.mean(axis=1)  # Convert to mono if the file is stereo

# Step 2: Compute the spectrogram of the audio signal
frequencies, times, spectrogram_data = spectrogram(signal, sample_rate)

# Step 3: Define and apply a masking threshold
masking_constant = 1.0  # Basic constant for threshold demonstration
masking_thresholds = masking_constant * np.sqrt(spectrogram_data)

# Step 4: Plot the spectrogram and the calculated masking threshold
eps = 1e-10  # Small offset to prevent log10(0)

plt.figure(figsize=(12, 6))
plt.pcolormesh(times, frequencies, 10 * np.log10(spectrogram_data + eps), shading='gouraud', cmap='viridis')
plt.colorbar(label="Power/Frequency (dB/Hz)")
plt.plot(times, masking_thresholds.mean(axis=0), color='red', label="Masking Threshold (Average)")
plt.ylim(0, sample_rate / 2)
plt.xlabel("Time (s)")
plt.ylabel("Frequency (Hz)")
plt.title("Spectrogram with Masking Threshold Overlay")
plt.legend()
plt.show()

#### Apply Basic Perceptual Filtering

##### **Step 1. Load the Audio Signal**
The audio file is loaded. If it's stereo, it is converted to mono by averaging the channels.

##### **Step 2. Define the Perceptual Filter and Apply the Filter**
A basic perceptual filter is defined. The threshold parameter determines the level below which the signal is considered negligible. If the absolute value of the signal is below this threshold, it is set to zero.

##### **Step 3. Plot Results**
The original and filtered signals are plotted for visual comparison. The original signal is displayed in blue, and the filtered signal in red.

In [None]:
# Step 1: Load the audio file
sample_rate, signal = wavfile.read('audio_samples_06/FILE_NAME.wav')  # Use appropriate file path

if signal.ndim > 1:
    signal = signal.mean(axis=1)  # Convert to mono if the file is stereo

# Step 2: Define a basic perceptual filter
threshold = ???  # Threshold parameter for filtering
filtered_signal = np.where(np.abs(signal) < threshold, 0, signal)  # Zero out values below the threshold

# Ensure the filtered signal has no NaN values
filtered_signal = np.nan_to_num(filtered_signal)

# Step 3: Compare the original and filtered signals
plt.figure(figsize=(12, 6))

# Original signal
plt.subplot(2, 1, 1)
plt.plot(signal, color='blue', alpha=0.6, label='Original Signal')
plt.title("Original Audio Signal")
plt.xlabel("Sample Index")
plt.ylabel("Amplitude")
plt.grid(True)
plt.legend()

# Filtered signal
plt.subplot(2, 1, 2)
plt.plot(filtered_signal, color='red', alpha=0.6, label='Filtered Signal')
plt.title("Filtered Audio Signal")
plt.xlabel("Sample Index")
plt.ylabel("Amplitude")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# Step 4: Save the filtered signal as .wav
# Ensure that the filtered signal is within the proper range
filtered_signal = np.clip(filtered_signal, -1, 1)  # Clip values to ensure they are within the range
wavfile.write('filtered_signal.wav', sample_rate, (filtered_signal * 32767).astype(np.int16))

#### Compare Results of Basic Perceptual Filtering

##### **Step 1. Load the Audio File**
The audio file is loaded, and if it's stereo, it is converted to mono by averaging the channels.

##### **Step 2. Define Thresholds**
A list of different threshold values is created to demonstrate the effect of perceptual filtering.

##### **Step 3. Apply Filtering**
For each threshold, the code applies the perceptual filter. Values below the threshold are zeroed out, and the filtered signals are stored in a list.

##### **Step 4. Visualization**
The original audio signal is plotted first, followed by the filtered signals for each threshold. This helps in comparing how different thresholds affect the audio signal.

In [None]:
# Step 1: Load the audio file
sample_rate, signal = wavfile.read('audio_samples_06/FILE_NAME.wav')  # Use appropriate file path

if signal.ndim > 1:
    signal = signal.mean(axis=1)  # Convert to mono if the file is stereo

# Step 2: Define a range of threshold values
thresholds = [?, ?, ?]  # Different threshold values for comparison
filtered_signals = []  # List to store filtered signals for each threshold

# Step 3: Apply perceptual filtering for each threshold and store results
for threshold in thresholds:
    filtered_signal = np.where(np.abs(signal) < threshold, 0, signal)  # Zero out values below the threshold
    filtered_signals.append(filtered_signal)

# Step 4: Visualize the original and filtered signals for each threshold
plt.figure(figsize=(12, 12))

# Plot original signal
plt.subplot(len(thresholds) + 1, 1, 1)
plt.plot(signal, color='blue', alpha=0.6, label='Original Signal')
plt.title("Original Audio Signal")
plt.xlabel("Sample Index")
plt.ylabel("Amplitude")
plt.grid(True)
plt.legend()

# Plot filtered signals for each threshold
for i, threshold in enumerate(thresholds):
    plt.subplot(len(thresholds) + 1, 1, i + 2)
    plt.plot(filtered_signals[i], color='red', alpha=0.6, label=f'Filtered Signal (Threshold: {threshold})')
    plt.title(f"Filtered Audio Signal (Threshold: {threshold})")
    plt.xlabel("Sample Index")
    plt.ylabel("Amplitude")
    plt.legend()
    plt.grid(True)

plt.tight_layout()
plt.show()