## Data Preprocessing

### resampling, trimming, normalization

**Input File:** https://www.kaggle.com/datasets/ejlok1/cremad

## **LINK TO DRIVE:** https://drive.google.com/drive/folders/1IymBA_qUoM3-sAq6JktS6hvskxqvU3AF?usp=drive_link

- file too big to add to github


### --- Audio Preprocessing for CREMA-D Dataset ---


In [1]:
# Install dependencies
!pip install librosa soundfile --quiet

import os
import librosa
import numpy as np
import soundfile as sf
from tqdm import tqdm

In [2]:
# Step 1: Unzip CREMA-D Audio Files
!unzip AudioWAV.zip -d /content/CREMA-D

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/CREMA-D/AudioWAV/1044_TSI_DIS_XX.wav  
  inflating: /content/CREMA-D/__MACOSX/AudioWAV/._1044_TSI_DIS_XX.wav  
  inflating: /content/CREMA-D/AudioWAV/1062_IEO_DIS_MD.wav  
  inflating: /content/CREMA-D/__MACOSX/AudioWAV/._1062_IEO_DIS_MD.wav  
  inflating: /content/CREMA-D/AudioWAV/1077_IEO_DIS_MD.wav  
  inflating: /content/CREMA-D/__MACOSX/AudioWAV/._1077_IEO_DIS_MD.wav  
  inflating: /content/CREMA-D/AudioWAV/1036_IWL_DIS_XX.wav  
  inflating: /content/CREMA-D/__MACOSX/AudioWAV/._1036_IWL_DIS_XX.wav  
  inflating: /content/CREMA-D/AudioWAV/1023_IWL_DIS_XX.wav  
  inflating: /content/CREMA-D/__MACOSX/AudioWAV/._1023_IWL_DIS_XX.wav  
  inflating: /content/CREMA-D/AudioWAV/1073_IEO_DIS_HI.wav  
  inflating: /content/CREMA-D/__MACOSX/AudioWAV/._1073_IEO_DIS_HI.wav  
  inflating: /content/CREMA-D/AudioWAV/1066_IEO_DIS_HI.wav  
  inflating: /content/CREMA-D/__MACOSX/AudioWAV/._1066_IEO_DIS_HI.wav  
  in

This can all be tweaked as needed
- Resampled to 16,000Hz (librosa.resample)
- trimmed silence (librosa.resample)
- Normalization (audio scaled to max amptlitude of 1)

In [3]:
def preprocess_audio(file_path, target_sr=16000):
    """
    Preprocess audio: resample, trim silence, normalize.
    """
    audio, sr = librosa.load(file_path, sr=None)

    # Resample
    if sr != target_sr:
        audio = librosa.resample(audio, orig_sr=sr, target_sr=target_sr)
        sr = target_sr

    # Trim silence
    audio, _ = librosa.effects.trim(audio)

    # Normalize
    max_amp = np.max(np.abs(audio))
    if max_amp > 0:
        audio = audio / max_amp

    return audio, sr


def preprocess_folder(input_folder, output_folder="/content/ProcessedAudio", target_sr=16000):
    """
    Process all .wav files in a folder and save preprocessed audio to /content.
    """
    os.makedirs(output_folder, exist_ok=True)
    audio_files = [f for f in os.listdir(input_folder) if f.endswith('.wav')]

    for file in tqdm(audio_files, desc="Preprocessing audio"):
        input_path = os.path.join(input_folder, file)
        output_path = os.path.join(output_folder, file)

        audio, sr = preprocess_audio(input_path, target_sr)
        sf.write(output_path, audio, sr)

    print(f"Preprocessing done - Files saved to: {output_folder}")

    #zipping file
    zip_path = output_folder + ".zip"
    os.system(f"zip -r {zip_path} {output_folder}")
    print(f"Zipped processed files: {zip_path}")



In [4]:
# =============================

input_folder = "/content/CREMA-D/AudioWAV"  # <-- Path after unzipping
output_folder = "/content/CREMA-D-Processed"

preprocess_folder(input_folder, output_folder, target_sr=16000)

Preprocessing audio: 100%|██████████| 7442/7442 [00:43<00:00, 171.55it/s]


Preprocessing done - Files saved to: /content/CREMA-D-Processed
Zipped processed files: /content/CREMA-D-Processed.zip
