In [3]:
import os
import librosa
import soundfile as sf
import numpy as np

# Define the target duration in seconds
TARGET_DURATION = 4.0  # seconds
OUTPUT_DIR = "normalized_american_wavs"  # Directory to save normalized files

# Create the output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

def normalize_audio_length(input_path, output_path, target_duration):
    # Load the audio file
    audio, sr = librosa.load(input_path, sr=None)
    
    # Calculate the target number of samples
    target_samples = int(target_duration * sr)
    
    if len(audio) > target_samples:
        # Truncate the audio if it's longer than the target duration
        normalized_audio = audio[:target_samples]
    else:
        # Pad the audio with zeros (silence) if it's shorter than the target duration
        pad_length = target_samples - len(audio)
        normalized_audio = np.pad(audio, (0, pad_length), mode='constant')
    
    # Save the normalized audio
    sf.write(output_path, normalized_audio, sr)

# Directory containing the WAV files
input_dir = r"D:\Speech_Processing\data\american_accent"

# Normalize all WAV files in the directory
for filename in os.listdir(input_dir):
    if filename.endswith(".wav"):
        input_path = os.path.join(input_dir, filename)
        output_path = os.path.join(OUTPUT_DIR, filename)
        normalize_audio_length(input_path, output_path, TARGET_DURATION)

print(f"Normalized audio files saved in {OUTPUT_DIR}.")


Normalized audio files saved in normalized_american_wavs.
