In [1]:
import os
import shutil

def copy_wav_files(root_path, output_path):
    # Ensure output directory exists
    os.makedirs(output_path, exist_ok=True)
    
    # Traverse all subdirectories in the root path
    for subdir, _, files in os.walk(root_path):
        for file in files:
            if file.lower().endswith(".wav"):
                source_file = os.path.join(subdir, file)
                destination_file = os.path.join(output_path, file)

                # Copy file (overwrites if exists)
                shutil.copy2(source_file, destination_file)
                print(f"Copied: {source_file} -> {destination_file}")

# Example usage
root_directory = "original_dataset"
output_directory = "data"
copy_wav_files(root_directory, output_directory)

Copied: original_dataset\fold1\101415-3-0-2.wav -> data\101415-3-0-2.wav
Copied: original_dataset\fold1\101415-3-0-3.wav -> data\101415-3-0-3.wav
Copied: original_dataset\fold1\101415-3-0-8.wav -> data\101415-3-0-8.wav
Copied: original_dataset\fold1\102106-3-0-0.wav -> data\102106-3-0-0.wav
Copied: original_dataset\fold1\102305-6-0-0.wav -> data\102305-6-0-0.wav
Copied: original_dataset\fold1\102842-3-0-1.wav -> data\102842-3-0-1.wav
Copied: original_dataset\fold1\102842-3-1-0.wav -> data\102842-3-1-0.wav
Copied: original_dataset\fold1\102842-3-1-5.wav -> data\102842-3-1-5.wav
Copied: original_dataset\fold1\102842-3-1-6.wav -> data\102842-3-1-6.wav
Copied: original_dataset\fold1\103074-7-0-0.wav -> data\103074-7-0-0.wav
Copied: original_dataset\fold1\103074-7-0-1.wav -> data\103074-7-0-1.wav
Copied: original_dataset\fold1\103074-7-0-2.wav -> data\103074-7-0-2.wav
Copied: original_dataset\fold1\103074-7-1-0.wav -> data\103074-7-1-0.wav
Copied: original_dataset\fold1\103074-7-1-1.wav -> 

In [2]:
import os
import shutil

def classify_wav_files(input_root, output_root):
    os.makedirs(output_root, exist_ok=True)  # Ensure the output root exists

    for subdir, _, files in os.walk(input_root):
        for file in files:
            if file.lower().endswith(".wav"):
                parts = file.split('-')
                if len(parts) > 1:  # Ensure the filename is correctly formatted
                    class_name = parts[1]  # 'b' is the second part
                    class_path = os.path.join(output_root, class_name)

                    os.makedirs(class_path, exist_ok=True)  # Create class directory if not exists
                    
                    source_file = os.path.join(subdir, file)
                    destination_file = os.path.join(class_path, file)

                    shutil.copy2(source_file, destination_file)
                    print(f"Copied: {source_file} -> {destination_file}")

# Example usage
input_directory = "data"
output_directory = "data_in_classes"
classify_wav_files(input_directory, output_directory)

Copied: data\100032-3-0-0.wav -> data_in_classes\3\100032-3-0-0.wav
Copied: data\100263-2-0-117.wav -> data_in_classes\2\100263-2-0-117.wav
Copied: data\100263-2-0-121.wav -> data_in_classes\2\100263-2-0-121.wav
Copied: data\100263-2-0-126.wav -> data_in_classes\2\100263-2-0-126.wav
Copied: data\100263-2-0-137.wav -> data_in_classes\2\100263-2-0-137.wav
Copied: data\100263-2-0-143.wav -> data_in_classes\2\100263-2-0-143.wav
Copied: data\100263-2-0-161.wav -> data_in_classes\2\100263-2-0-161.wav
Copied: data\100263-2-0-3.wav -> data_in_classes\2\100263-2-0-3.wav
Copied: data\100263-2-0-36.wav -> data_in_classes\2\100263-2-0-36.wav
Copied: data\100648-1-0-0.wav -> data_in_classes\1\100648-1-0-0.wav
Copied: data\100648-1-1-0.wav -> data_in_classes\1\100648-1-1-0.wav
Copied: data\100648-1-2-0.wav -> data_in_classes\1\100648-1-2-0.wav
Copied: data\100648-1-3-0.wav -> data_in_classes\1\100648-1-3-0.wav
Copied: data\100648-1-4-0.wav -> data_in_classes\1\100648-1-4-0.wav
Copied: data\100652-3-

In [8]:
import os
import pandas as pd
from pydub.utils import mediainfo
from pydub import AudioSegment

def get_wav_durations(input_path, output_excel):
    filenames = []
    durations = []

    for subdir, _, files in os.walk(input_path):
        for file in files:
            if file.lower().endswith(".wav"):
                file_path = os.path.join(subdir, file)

                try:
                    # Use pydub to check and get information about the file
                    audio = AudioSegment.from_wav(file_path)
                    
                    # Calculate the duration of the wav file in seconds
                    duration = len(audio) / 1000.0  # Duration is in milliseconds, so divide by 1000

                    filenames.append(file)
                    durations.append(duration)

                except Exception as e:
                    print(f"Skipping {file_path} due to error: {e}")
                    continue  # Skip problematic file and continue with the rest

    # Create a DataFrame with filenames and durations
    data = {'Filename': filenames, 'Duration (seconds)': durations}
    df = pd.DataFrame(data)

    # Write DataFrame to an Excel file
    df.to_excel(output_excel, index=False)
    print(f"Excel file with durations saved at: {output_excel}")

# Example usage
input_directory = "data"
output_excel = "data_durations.xlsx"
get_wav_durations(input_directory, output_excel)

Skipping data\19007-4-0-0.wav due to error: Decoding failed. ffmpeg returned error code: 3165764104

Output from ffmpeg/avlib:

ffmpeg version N-116156-g9c357324f0-20240706 Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 14.1.0 (crosstool-NG 1.26.0.93_a87bf7f)
  configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-w64-mingw32- --arch=x86_64 --target-os=mingw32 --enable-gpl --enable-version3 --disable-debug --disable-w32threads --enable-pthreads --enable-iconv --enable-zlib --enable-libfreetype --enable-libfribidi --enable-gmp --enable-libxml2 --enable-fontconfig --enable-libharfbuzz --enable-libvorbis --enable-opencl --disable-libpulse --enable-libvmaf --disable-libxcb --disable-xlib --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-

In [1]:
import os
import shutil
from pydub import AudioSegment

def copy_wav_files_with_duration(input_path, output_path, target_duration_sec=4):
    # Ensure output path exists
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    
    # Traverse all .wav files in the given path
    for subdir, _, files in os.walk(input_path):
        for file in files:
            if file.lower().endswith(".wav"):
                file_path = os.path.join(subdir, file)
                
                try:
                    # Use pydub to load the .wav file
                    audio = AudioSegment.from_wav(file_path)
                    
                    # Get the duration in seconds
                    duration_sec = len(audio) / 1000.0
                    
                    # If the duration is exactly 4 seconds, copy the file
                    if duration_sec == target_duration_sec:
                        destination_path = os.path.join(output_path, file)
                        shutil.copy(file_path, destination_path)
                        print(f"Copied: {file}")
                    
                except Exception as e:
                    print(f"Skipping {file_path} due to error: {e}")
                    continue  # Skip problematic files

# Example usage
input_directory = "data"
output_directory = "data_4_seconds"
copy_wav_files_with_duration(input_directory, output_directory)

Copied: 100263-2-0-117.wav
Copied: 100263-2-0-121.wav
Copied: 100263-2-0-126.wav
Copied: 100263-2-0-137.wav
Copied: 100263-2-0-143.wav
Copied: 100263-2-0-161.wav
Copied: 100263-2-0-3.wav
Copied: 100263-2-0-36.wav
Copied: 100652-3-0-0.wav
Copied: 100652-3-0-1.wav
Copied: 100652-3-0-2.wav
Copied: 100652-3-0-3.wav
Copied: 100795-3-0-0.wav
Copied: 100795-3-1-0.wav
Copied: 100795-3-1-1.wav
Copied: 100795-3-1-2.wav
Copied: 100852-0-0-0.wav
Copied: 100852-0-0-1.wav
Copied: 100852-0-0-10.wav
Copied: 100852-0-0-11.wav
Copied: 100852-0-0-12.wav
Copied: 100852-0-0-13.wav
Copied: 100852-0-0-14.wav
Copied: 100852-0-0-15.wav
Copied: 100852-0-0-16.wav
Copied: 100852-0-0-17.wav
Copied: 100852-0-0-18.wav
Copied: 100852-0-0-19.wav
Copied: 100852-0-0-2.wav
Copied: 100852-0-0-20.wav
Copied: 100852-0-0-21.wav
Copied: 100852-0-0-22.wav
Copied: 100852-0-0-23.wav
Copied: 100852-0-0-24.wav
Copied: 100852-0-0-25.wav
Copied: 100852-0-0-26.wav
Copied: 100852-0-0-27.wav
Copied: 100852-0-0-28.wav
Copied: 100852-0-0

In [2]:
import os
import shutil

def classify_wav_files(input_root, output_root):
    os.makedirs(output_root, exist_ok=True)  # Ensure the output root exists

    for subdir, _, files in os.walk(input_root):
        for file in files:
            if file.lower().endswith(".wav"):
                parts = file.split('-')
                if len(parts) > 1:  # Ensure the filename is correctly formatted
                    class_name = parts[1]  # 'b' is the second part
                    class_path = os.path.join(output_root, class_name)

                    os.makedirs(class_path, exist_ok=True)  # Create class directory if not exists
                    
                    source_file = os.path.join(subdir, file)
                    destination_file = os.path.join(class_path, file)

                    shutil.copy2(source_file, destination_file)
                    print(f"Copied: {source_file} -> {destination_file}")

# Example usage
input_directory = "data_4_seconds"
output_directory = "data_in_classes_4_seconds"
classify_wav_files(input_directory, output_directory)

Copied: data_4_seconds\100263-2-0-117.wav -> data_in_classes_4_seconds\2\100263-2-0-117.wav
Copied: data_4_seconds\100263-2-0-121.wav -> data_in_classes_4_seconds\2\100263-2-0-121.wav
Copied: data_4_seconds\100263-2-0-126.wav -> data_in_classes_4_seconds\2\100263-2-0-126.wav
Copied: data_4_seconds\100263-2-0-137.wav -> data_in_classes_4_seconds\2\100263-2-0-137.wav
Copied: data_4_seconds\100263-2-0-143.wav -> data_in_classes_4_seconds\2\100263-2-0-143.wav
Copied: data_4_seconds\100263-2-0-161.wav -> data_in_classes_4_seconds\2\100263-2-0-161.wav
Copied: data_4_seconds\100263-2-0-3.wav -> data_in_classes_4_seconds\2\100263-2-0-3.wav
Copied: data_4_seconds\100263-2-0-36.wav -> data_in_classes_4_seconds\2\100263-2-0-36.wav
Copied: data_4_seconds\100652-3-0-0.wav -> data_in_classes_4_seconds\3\100652-3-0-0.wav
Copied: data_4_seconds\100652-3-0-1.wav -> data_in_classes_4_seconds\3\100652-3-0-1.wav
Copied: data_4_seconds\100652-3-0-2.wav -> data_in_classes_4_seconds\3\100652-3-0-2.wav
Copied