In [1]:
pip install librosa noisereduce numpy pandas tqdm

Collecting noisereduce
  Downloading noisereduce-3.0.2-py3-none-any.whl.metadata (14 kB)
Collecting tqdm
  Downloading tqdm-4.66.5-py3-none-any.whl.metadata (57 kB)
Downloading noisereduce-3.0.2-py3-none-any.whl (22 kB)
Downloading tqdm-4.66.5-py3-none-any.whl (78 kB)
Installing collected packages: tqdm, noisereduce
Successfully installed noisereduce-3.0.2 tqdm-4.66.5
Note: you may need to restart the kernel to use updated packages.


In [19]:
# Import required libraries
import os
import librosa
import noisereduce as nr
import numpy as np
import pandas as pd
from tqdm import tqdm  # For progress tracking
import soundfile as sf

In [18]:
pip install soundfile

Note: you may need to restart the kernel to use updated packages.


In [5]:
# Define paths
input_folder = "C:/Users/Yojith/Documents/NLP/audiofile"   # Folder containing original audio files
output_folder = "C:/Users/Yojith/Documents/NLP/output_audio" # Folder to save processed audio files

In [6]:
# Parameters
target_sr = 16000      # Target sample rate for resampling
max_duration = 10      # Maximum duration of audio files (in seconds)
max_samples = int(max_duration * target_sr)  # Max samples after padding

In [7]:
# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

In [8]:
# Create a dataframe to store preprocessing details
df_preprocessing = pd.DataFrame(columns=['filename', 'original_sr', 'new_sr', 'max_amplitude'])

In [15]:
# Function to preprocess a single audio file
def preprocess_audio(file_path, target_sr=16000, max_duration=10):
    # Load the audio file with original sample rate
    audio, sr = librosa.load(file_path, sr=None)
    
    # Step 1: Resample if necessary
    if sr != target_sr:
        audio = librosa.resample(audio, orig_sr=sr, target_sr=target_sr)
        sr = target_sr  # Update sampling rate after resampling

    # Step 2: Noise Reduction (Using a portion of the audio as noise reference)
    noise_clip = audio[:int(0.25 * len(audio))]  # Take first 25% as noise reference
    audio = nr.reduce_noise(y=audio, y_noise=noise_clip, sr=sr)
    
    # Step 3: Normalization (Scale between -1 and 1)
    max_val = max(abs(audio))
    if max_val != 0:
        audio = audio / max_val

    # Step 4: Padding/Trimming to a fixed length
    max_samples = int(max_duration * sr)  # Calculate max samples based on duration
    audio = librosa.util.fix_length(audio, size=max_samples)

    return audio, sr, target_sr, max_val


In [21]:
import soundfile as sf  # Import soundfile for writing audio files
import pandas as pd     # Ensure pandas is imported

# Create an empty DataFrame to store details of the processed files
df_preprocessing = pd.DataFrame(columns=['filename', 'original_sr', 'new_sr', 'max_amplitude'])

# Main Loop
for file in tqdm(os.listdir(input_folder), desc="Processing Audio Files"):
    if file.endswith('.wav'):  # Check for .wav files
        file_path = os.path.join(input_folder, file)

        # Preprocess the file
        audio_processed, original_sr, new_sr, max_amplitude = preprocess_audio(file_path, target_sr=target_sr, max_duration=max_duration)

        # Save the processed file to output folder
        output_path = os.path.join(output_folder, file)
        sf.write(output_path, audio_processed, samplerate=new_sr)  # Use soundfile.write() to save audio

        # Record the details of the processed file using pd.concat
        df_preprocessing = pd.concat([df_preprocessing, pd.DataFrame([{
            'filename': file, 
            'original_sr': original_sr, 
            'new_sr': new_sr, 
            'max_amplitude': max_amplitude
        }])], ignore_index=True)


Processing Audio Files:   0%|          | 0/4288 [00:00<?, ?it/s]

  df_preprocessing = pd.concat([df_preprocessing, pd.DataFrame([{
Processing Audio Files: 100%|██████████| 4288/4288 [09:11<00:00,  7.77it/s]
