In [7]:
import os
import numpy as np
import librosa
import soundfile as sf

# Define preprocessing function
def preprocess_audio(file_path, target_length=5.0, target_db=-20.0):
    # Load audio file
    audio, sr = librosa.load(file_path, sr=None)
    
    # Normalize audio
    audio = librosa.util.normalize(audio)
    
    # Target level normalization
    audio_db = librosa.amplitude_to_db(np.abs(audio))
    audio_db = audio_db - np.mean(audio_db) + target_db
    audio = librosa.db_to_amplitude(audio_db)

    # Padding or trimming
    target_samples = int(target_length * sr)
    if len(audio) < target_samples:
        # Pad with zeros if audio is shorter
        audio = np.pad(audio, (0, target_samples - len(audio)), mode='constant')
    else:
        # Trim the audio if it's longer
        audio = audio[:target_samples]

    # Apply simple noise reduction (e.g., subtracting the mean)
    audio = audio - np.mean(audio)

    return audio, sr

# Define function to preprocess all audio files in a folder
def preprocess_audio_folder(input_folder, output_folder, target_length=5.0, target_db=-20.0):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for filename in os.listdir(input_folder):
        if filename.endswith(".wav") or filename.endswith(".mp3"):  # Add more formats if needed
            file_path = os.path.join(input_folder, filename)
            processed_audio, sr = preprocess_audio(file_path, target_length, target_db)
            output_file_path = os.path.join(output_folder, filename)
            sf.write(output_file_path, processed_audio, sr)
            print(f"Processed {filename} and saved to {output_file_path}")

# Usage
input_folder = "sample/"
output_folder = "processed"
preprocess_audio_folder(input_folder, output_folder)


Processed sample-000000.mp3 and saved to processed\sample-000000.mp3
Processed sample-000001.mp3 and saved to processed\sample-000001.mp3
Processed sample-000002.mp3 and saved to processed\sample-000002.mp3
Processed sample-000003.mp3 and saved to processed\sample-000003.mp3
Processed sample-000004.mp3 and saved to processed\sample-000004.mp3
Processed sample-000005.mp3 and saved to processed\sample-000005.mp3
Processed sample-000006.mp3 and saved to processed\sample-000006.mp3
Processed sample-000007.mp3 and saved to processed\sample-000007.mp3
Processed sample-000008.mp3 and saved to processed\sample-000008.mp3
Processed sample-000009.mp3 and saved to processed\sample-000009.mp3
Processed sample-000010.mp3 and saved to processed\sample-000010.mp3
Processed sample-000011.mp3 and saved to processed\sample-000011.mp3
Processed sample-000012.mp3 and saved to processed\sample-000012.mp3
Processed sample-000013.mp3 and saved to processed\sample-000013.mp3
Processed sample-000014.mp3 and sa