In [1]:
import os
import librosa
import soundfile as sf
from tqdm import tqdm


In [2]:
input_dir = 'scream_data'
output_dir = 'scream_data_processed'
os.makedirs(output_dir, exist_ok=True)

In [3]:
### mono and 16kHz data will be stored in scream_data_processed
for filename in tqdm(os.listdir(input_dir)):
    if filename.endswith('.wav'):
        input_path = os.path.join(input_dir, filename)
        output_path = os.path.join(output_dir, filename)

        try:
            audio, sr = librosa.load(input_path, sr=16000, mono=True)
            sf.write(output_path, audio, sr)
        except Exception as e:
            print(f"Failed to process {filename}: {e}")

100%|██████████| 72/72 [00:20<00:00,  3.52it/s]


In [4]:
# same for non scream data
input_dir = 'non_scream_data'
output_dir = 'non_scream_data_processed'
os.makedirs(output_dir, exist_ok=True)

for filename in tqdm(os.listdir(input_dir)):
    if filename.endswith('.wav'):
        input_path = os.path.join(input_dir, filename)
        output_path = os.path.join(output_dir, filename)

        try:
            audio, sr = librosa.load(input_path, sr=16000, mono=True)
            sf.write(output_path, audio, sr)
        except Exception as e:
            print(f"Failed to process {filename}: {e}")

100%|██████████| 71/71 [00:00<00:00, 74.29it/s]


In [5]:
import shutil
import pandas as pd

# Input folders
scream_dir = 'scream_data_processed'
non_scream_dir = 'non_scream_data_processed'

# Output folder
output_audio_dir = 'scream_dataset/audio'
os.makedirs(output_audio_dir, exist_ok=True)

metadata = []

# Copy scream files
for fname in os.listdir(scream_dir):
    if fname.endswith('.wav'):
        src = os.path.join(scream_dir, fname)
        dst = os.path.join(output_audio_dir, fname)
        shutil.copy(src, dst)
        metadata.append({'filename': fname, 'label': 'scream'})

# Copy non-scream files
for fname in os.listdir(non_scream_dir):
    if fname.endswith('.wav'):
        src = os.path.join(non_scream_dir, fname)
        dst = os.path.join(output_audio_dir, fname)
        shutil.copy(src, dst)
        metadata.append({'filename': fname, 'label': 'non_scream'})

# Save metadata CSV
df = pd.DataFrame(metadata)
df.to_csv('scream_dataset/metadata.csv', index=False)

print("Done, data is ready in scream_dataset/")


Done, data is ready in scream_dataset/
