# Normalization:
Normalizing the audio samples to a uniform length of 18 seconds, by cropping longer files and padding shorter ones.

**Consistency**: Having all audio files with the same length makes it easier for algorithms to process and compare them. It avoids issues where models need to handle variable-length inputs.

**Alignment**: When performing operations like feature extraction (e.g., frequency analysis, zero-crossing rate), it's beneficial for the features to be extracted from audio clips of the same duration, ensuring that features like spectral centroid and dominant frequency are based on consistent timeframes.

**Padding Shorter Files**: Padding shorter files to a fixed length ensures that you don't lose important information from shorter clips. The padding (often with zeros) won't affect the meaningful parts of the signal but will give you a consistent length.


In [2]:
import librosa
import os
import numpy as np
import soundfile as sf

# Path to the audio folder
audio_folder = '../dataset'
output_folder = '../dataset_normalized'
os.makedirs(output_folder, exist_ok=True)

# Target duration for all audio files (18 seconds)
target_duration = 18  # seconds

# Sample rate for loading and saving audio files
sample_rate = 22050  # Adjust based on your original sample rate

# Function to normalize audio length to 18 seconds
def normalize_audio_length(file_path, output_path, target_duration, sample_rate):
    y, sr = librosa.load(file_path, sr=sample_rate)
    
    # Calculate the target length in samples
    target_length = int(target_duration * sample_rate)
    
    if len(y) > target_length:
        # Crop the audio if it's longer than 18 seconds
        y = y[:target_length]
    else:
        # Pad the audio with zeros if it's shorter than 18 seconds
        padding = target_length - len(y)
        y = np.pad(y, (0, padding), 'constant')
    
    # Save the normalized audio to the output folder
    sf.write(output_path, y, sample_rate)

# Normalize all audio files in the dataset
for filename in os.listdir(audio_folder):
    if filename.endswith('.wav'):  # Adjust extension if necessary
        file_path = os.path.join(audio_folder, filename)
        output_path = os.path.join(output_folder, filename)
        normalize_audio_length(file_path, output_path, target_duration, sample_rate)

print("All audio files have been normalized to 18 seconds.")


All audio files have been normalized to 18 seconds.
