In [3]:
import os
import wave
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import spectrogram
import librosa
import librosa.display
import soundfile as sf

img_dpi = 600

In [4]:
def read_wav(file_path, duration_minutes=1, prints=False):
    """
    Read a .wav file and return the mono audio data for the first specified duration.

    Parameters:
    file_path (str): Path to the .wav file.
    duration_minutes (int): Duration to read in minutes.

    Returns:
    mono_audio (ndarray): Mono audio data.
    sample_rate (int): Sampling rate of the audio file.
    info_str (str): String containing information about the audio.
    """
    with wave.open(file_path, 'rb') as wav_file:
        # Get parameters
        num_channels = wav_file.getnchannels()
        sample_rate = wav_file.getframerate()
        sampwidth = wav_file.getsampwidth() * 8  # Sample width in bits
        num_frames = wav_file.getnframes()

        # Calculate the number of samples for the desired duration
        max_samples = int(60 * duration_minutes * sample_rate)

        # Read only the required number of frames
        frames_to_read = min(num_frames, max_samples)
        frames = wav_file.readframes(frames_to_read)
        audio_data = np.frombuffer(frames, dtype=np.int16)

        # Reshape to separate channels if stereo
        if num_channels > 1:
            audio_data = audio_data.reshape(-1, num_channels)
            # Convert stereo to mono by averaging channels
            mono_audio = audio_data.mean(axis=1)
        else:
            mono_audio = audio_data

        # Calculate actual duration
        duration_sec = frames_to_read / sample_rate
        duration_min = duration_sec / 60

        # Calculate additional statistics
        peak_amplitude = np.max(np.abs(mono_audio))
        rms_amplitude = np.sqrt(np.mean(mono_audio**2))
        zero_crossings = ((mono_audio[:-1] * mono_audio[1:]) < 0).sum()
        zero_crossing_rate = zero_crossings / duration_sec

        # Collect information into a string
        info_str = (
            f"File: {os.path.basename(file_path)}\n"
            f"Number of channels: {num_channels}\n"
            f"Sample rate: {sample_rate} Hz\n"
            f"Sample width: {sampwidth} bits\n"
            f"Number of frames: {num_frames}\n"
            f"Frames read: {frames_to_read}\n"
            f"Max samples to read (for {duration_minutes} minutes): {max_samples}\n"
            f"Type of frames: {type(frames)}\n"
            f"Length of frames: {len(frames)} bytes\n"
            f"Duration of audio data: {duration_sec:.2f} seconds (== {duration_min:.2f} minutes)\n"
            f"Shape of mono audio data: {mono_audio.shape}\n"
            f"Max value of mono audio data: {np.max(mono_audio)}\n"
            f"Min value of mono audio data: {np.min(mono_audio)}\n"
            f"Average value of mono audio data: {np.mean(mono_audio)}\n"
            f"Peak amplitude: {peak_amplitude}\n"
            f"RMS amplitude: {rms_amplitude:.2f}\n"
            f"Zero-crossing rate: {zero_crossing_rate:.2f} crossings/sec"
        )

        # Print information if requested
        if prints:
            print(info_str)

        return mono_audio, sample_rate, info_str

def plot_waveform(signal, sample_rate, title, output_filename_base):
    """
    Plot the waveform of the audio signal.
    """
    times = np.arange(len(signal)) / sample_rate
    plt.figure(figsize=(12, 4))
    plt.plot(times, signal, linewidth=0.1)
    plt.title(f"Waveform: {title}")
    plt.xlabel('Time (s)')
    plt.ylabel('Amplitude')
    plt.tight_layout()
    plt.savefig(output_filename_base + '_waveform.png', dpi=img_dpi)
    plt.savefig(output_filename_base + '_waveform.pdf', dpi=img_dpi)
    plt.close()

def plot_spectrogram(signal, sample_rate, title, output_filename_base):
    """
    Plot the spectrogram of the audio signal.
    """
    f, t, Sxx = spectrogram(signal, sample_rate, nperseg=1024)
    plt.figure(figsize=(12, 4))
    plt.pcolormesh(t, f, 10 * np.log10(Sxx), shading='gouraud')
    plt.title(f"Spectrogram: {title}")
    plt.ylabel('Frequency [Hz]')
    plt.xlabel('Time [sec]')
    plt.colorbar(label='Intensity [dB]')
    plt.tight_layout()
    plt.savefig(output_filename_base + '_spectrogram.png', dpi=img_dpi)
    plt.savefig(output_filename_base + '_spectrogram.pdf', dpi=img_dpi)
    plt.close()

def plot_frequency_spectrum(signal, sample_rate, title, output_filename_base):
    """
    Plot the frequency spectrum of the audio signal.
    """
    freqs = np.fft.rfftfreq(len(signal), d=1/sample_rate)
    fft_spectrum = np.abs(np.fft.rfft(signal))
    plt.figure(figsize=(12, 4))
    plt.plot(freqs, fft_spectrum, linewidth=0.1)
    plt.title(f"Frequency Spectrum: {title}")
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Amplitude')
    plt.tight_layout()
    plt.savefig(output_filename_base + '_frequency_spectrum.png', dpi=img_dpi)
    plt.savefig(output_filename_base + '_frequency_spectrum.pdf', dpi=img_dpi)
    plt.close()

def plot_mel_spectrogram(signal, sample_rate, title, output_filename_base):
    """
    Plot the mel spectrogram of the audio signal.
    """
    # Convert signal to float and normalize
    signal = signal.astype(float) / np.max(np.abs(signal))
    S = librosa.feature.melspectrogram(y=signal, sr=sample_rate, n_mels=128)
    S_dB = librosa.power_to_db(S, ref=np.max)
    plt.figure(figsize=(12, 4))
    librosa.display.specshow(S_dB, sr=sample_rate, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title(f"Mel Spectrogram: {title}")
    plt.tight_layout()
    plt.savefig(output_filename_base + '_mel_spectrogram.png', dpi=img_dpi)
    plt.savefig(output_filename_base + '_mel_spectrogram.pdf', dpi=img_dpi)
    plt.close()

def plot_waveform_and_info(signal, sample_rate, title, output_filename_base, info_str):
    """
    Plot the waveform and include information text.
    """
    times = np.arange(len(signal)) / sample_rate
    plt.figure(figsize=(12, 8))  # Increased size for better quality
    plt.subplot(2, 1, 1)
    plt.plot(times, signal, linewidth=0.1)
    plt.title(f"Waveform: {title}", fontsize=14)
    plt.xlabel('Time (s)', fontsize=12)
    plt.ylabel('Amplitude', fontsize=12)

    plt.subplot(2, 1, 2)
    plt.axis('off')
    plt.text(0.01, 0.5, info_str, fontsize=10, va='center', ha='left', wrap=True)

    plt.tight_layout()
    plt.savefig(output_filename_base + '_waveform_info.png', dpi=img_dpi)
    plt.savefig(output_filename_base + '_waveform_info.pdf', dpi=img_dpi)
    plt.close()

def process_audio_file(file_path, duration_minutes=1, save_directory='.', prints=False):
    """
    Process a single audio file, generate plots and print information.
    """
    if not os.path.exists(save_directory):
        os.makedirs(save_directory)

    # Read the wav file
    mono_audio, sample_rate, info_str = read_wav(file_path, duration_minutes, prints=prints)

    # Prepare the output filename base
    filename = os.path.basename(file_path)
    output_filename_base = os.path.join(save_directory, os.path.splitext(filename)[0])

    # Generate plots
    plot_waveform_and_info(mono_audio, sample_rate, filename, output_filename_base, info_str)
    #plot_spectrogram(mono_audio, sample_rate, filename, output_filename_base)
    #plot_frequency_spectrum(mono_audio, sample_rate, filename, output_filename_base)
    #plot_mel_spectrogram(mono_audio, sample_rate, filename, output_filename_base)

    # Additional prints
    if prints:
        print("\nProcessing complete. Plots saved to:", save_directory)

In [10]:
file_name = 'CAFE-CAFE-1_clean_16kHz_resampled_zero_clipped.wav'
load_directory = 'data/PROCESSED/16kHz_resampling_zero_clipped/'
save_directory = 'data/PROCESSED/plots/16kHz_resampling_zero_clipped/CAFE-CAFE-1_clean_16kHz_resampled_zero_clipped.wav'

# Build the full file path
file_path = os.path.join(load_directory, file_name)

read_wav(file_path, duration_minutes=30, prints=True)

process_audio_file(file_path, duration_minutes=30, save_directory=save_directory, prints=True)

File: CAFE-CAFE-1_clean_16kHz_resampled_zero_clipped.wav
Number of channels: 2
Sample rate: 16000 Hz
Sample width: 16 bits
Number of frames: 29760000
Frames read: 28800000
Max samples to read (for 30 minutes): 28800000
Type of frames: <class 'bytes'>
Length of frames: 115200000 bytes
Duration of audio data: 1800.00 seconds (== 30.00 minutes)
Shape of mono audio data: (28800000,)
Max value of mono audio data: 32767.0
Min value of mono audio data: 0.0
Average value of mono audio data: 490.88977553819444
Peak amplitude: 32767.0
RMS amplitude: 932.70
Zero-crossing rate: 0.00 crossings/sec
File: CAFE-CAFE-1_clean_16kHz_resampled_zero_clipped.wav
Number of channels: 2
Sample rate: 16000 Hz
Sample width: 16 bits
Number of frames: 29760000
Frames read: 28800000
Max samples to read (for 30 minutes): 28800000
Type of frames: <class 'bytes'>
Length of frames: 115200000 bytes
Duration of audio data: 1800.00 seconds (== 30.00 minutes)
Shape of mono audio data: (28800000,)
Max value of mono audio da

## Data preprocessing 1 prefix at a time

In [None]:
import os
import wave
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import spectrogram
import librosa
import librosa.display
import soundfile as sf

img_dpi = 900

# read_wav, plot_waveform, plot_spectrogram, plot_frequency_spectrum, plot_mel_spectrogram, and plot_waveform_and_info. These functions remain unchanged.

def process_audio_file(file_path, duration_minutes=1, save_directory='.', prints=False):
    """
    Process a single audio file, generate plots, and print information.
    """
    if not os.path.exists(save_directory):
        os.makedirs(save_directory)

    # Read the wav file
    mono_audio, sample_rate, info_str = read_wav(file_path, duration_minutes, prints=prints)

    # Prepare the output filename base
    filename = os.path.basename(file_path)
    output_filename_base = os.path.join(save_directory, os.path.splitext(filename)[0])

    # Generate plots
    plot_waveform_and_info(mono_audio, sample_rate, filename, output_filename_base, info_str)
    plot_spectrogram(mono_audio, sample_rate, filename, output_filename_base)
    plot_frequency_spectrum(mono_audio, sample_rate, filename, output_filename_base)
    plot_mel_spectrogram(mono_audio, sample_rate, filename, output_filename_base)

    # Additional prints
    if prints:
        print(f"\nProcessing complete. Plots saved to: {save_directory}")

def process_files_with_prefixes(load_directory, save_directory, prefixes, duration_minutes=1, prints=False):
    """
    Process all files in the load_directory that start with specific prefixes, and save their plots
    in folders named after the original files with 'plots' appended.

    Parameters:
    load_directory (str): Directory containing the .wav files.
    save_directory (str): Directory to save the plots.
    prefixes (list of str): List of prefixes to filter files.
    duration_minutes (int): Duration to read in minutes for each file.
    prints (bool): Whether to print processing details.
    """
    if not os.path.exists(load_directory):
        raise FileNotFoundError(f"Load directory '{load_directory}' does not exist.")

    for filename in os.listdir(load_directory):
        if filename.endswith('.wav') and any(filename.startswith(prefix) for prefix in prefixes):
            # Full path of the raw audio file
            file_path = os.path.join(load_directory, filename)

            # Save directory for the plots, named after the original file + "plots"
            file_save_directory = os.path.join(save_directory, os.path.splitext(filename)[0] + "_plots")
            # os.path.splitext(filename)[0] returns the filename without the extension, because [0] is the filename and [1] is the extension

            # Process the file
            process_audio_file(file_path, duration_minutes=duration_minutes, save_directory=file_save_directory, prints=prints)

In [None]:
load_directory = 'data/RAW'
save_directory = 'data/RAW/plots'
prefixes = ['CAFE-', 'STREET-']
# available prefixes: CAFE-, STREET-, HOME-, CAR-
# if the server can't process all of these I will try just 1 at a time

process_files_with_prefixes(load_directory, save_directory, prefixes, duration_minutes=30, prints=True)

## Duration of .wav files extraction

In [5]:
import os
import wave

def get_audio_file_length(file_path):
    """
    Calculate the total length of a .wav audio file in seconds.

    Parameters:
    file_path (str): Path to the .wav file.

    Returns:
    float: Length of the audio file in seconds.
    """
    with wave.open(file_path, 'rb') as wav_file:
        sample_rate = wav_file.getframerate()
        num_frames = wav_file.getnframes()
        duration_sec = num_frames / sample_rate
    return duration_sec

def print_audio_file_lengths(directory):
    """
    Print the total length of all .wav files in a directory.

    Parameters:
    directory (str): Path to the directory containing .wav files.
    """
    if not os.path.exists(directory):
        print(f"Directory '{directory}' does not exist.")
        return

    print(f"Audio File Lengths in Directory: {directory}")
    print("=" * 50)

    for filename in os.listdir(directory):
        if filename.endswith('.wav'):
            file_path = os.path.join(directory, filename)
            try:
                duration_sec = get_audio_file_length(file_path)
                duration_min = duration_sec / 60
                print(f"File: {filename}")
                print(f"  - Length: {duration_sec:.2f} seconds ({duration_min:.2f} minutes)")
            except Exception as e:
                print(f"  - Error processing file {filename}: {e}")


# get the minimum duration of all the files
def get_minimum_audio_file_length(directory):
    """
    Calculate the minimum length of all .wav audio files in a directory.

    Parameters:
    directory (str): Path to the directory containing .wav files.

    Returns:
    float: Minimum length of the audio files in seconds.
    """
    min_duration_sec = float('inf')

    if not os.path.exists(directory):
        print(f"Directory '{directory}' does not exist.")
        return min_duration_sec

    for filename in os.listdir(directory):
        if filename.endswith('.wav'):
            file_path = os.path.join(directory, filename)
            try:
                duration_sec = get_audio_file_length(file_path)
                if duration_sec < min_duration_sec:
                    min_duration_sec = duration_sec
            except Exception as e:
                print(f"Error processing file {filename}: {e}")

    min_duration_min = min_duration_sec / 60
    print(f"\nMinimum Duration of Audio Files: {min_duration_sec:.2f} seconds ({min_duration_min:.2f} minutes)")

# Specify the folder containing the .wav files
folder_path = 'data/RAW'

# Print the lengths of all .wav files in the folder
print_audio_file_lengths(folder_path)
get_minimum_audio_file_length(folder_path)

Audio File Lengths in Directory: data/RAW
File: CAR-WINUPB-1_clean.wav
  - Length: 2111.12 seconds (35.19 minutes)
File: STREET-CITY-1_clean.wav
  - Length: 1904.72 seconds (31.75 minutes)
File: CAFE-FOODCOURTB-2_clean.wav
  - Length: 1860.00 seconds (31.00 minutes)
File: CAFE-CAFE-1_clean.wav
  - Length: 2521.85 seconds (42.03 minutes)
File: HOME-LIVINGB-1_clean.wav
  - Length: 2340.00 seconds (39.00 minutes)
File: CAR-WINDOWNB-1_clean.wav
  - Length: 2291.11 seconds (38.19 minutes)
File: STREET-KG-2_clean.wav
  - Length: 2861.20 seconds (47.69 minutes)
File: HOME-KITCHEN-2_clean.wav
  - Length: 3035.47 seconds (50.59 minutes)
File: CAR-WINDOWNB-2_clean.wav
  - Length: 2080.03 seconds (34.67 minutes)
File: HOME-LIVINGB-2_clean.wav
  - Length: 2340.00 seconds (39.00 minutes)
File: STREET-KG-1_clean.wav
  - Length: 3408.18 seconds (56.80 minutes)
File: HOME-KITCHEN-1_clean.wav
  - Length: 1979.72 seconds (33.00 minutes)
File: CAR-WINUPB-2_clean.wav
  - Length: 2022.11 seconds (33.70 min

## Resampling from 48 kHz to 16 kHz

In [1]:
import os
import numpy as np
from scipy.io import wavfile
from scipy.signal import resample_poly

# Specify the directories
load_directory = 'data/RAW'
save_directory = 'data/PROCESSED/16kHz_resampling_zero_clipped'

# Ensure the save directory exists
if not os.path.exists(save_directory):
    os.makedirs(save_directory)

# Define target duration: 31 minutes at 16 kHz
target_minutes = 31
target_sample_rate = 16000
target_duration_seconds = target_minutes * 60
target_length = target_duration_seconds * target_sample_rate  # 31 * 60 * 16000 = 29,760,000 samples

# Process each .wav file in the load directory
for filename in os.listdir(load_directory):
    if filename.endswith('.wav'):
        file_path = os.path.join(load_directory, filename)
        # Read the wav file
        sample_rate, data = wavfile.read(file_path)
        
        # Check if sample rate is 48,000 Hz
        if sample_rate != 48000:
            print(f"File {filename} has sample rate {sample_rate} Hz, expected 48,000 Hz.")
            continue
        
        print(f"Processing {filename}...")

        # Store original data type and get data type info
        original_dtype = data.dtype
        if np.issubdtype(original_dtype, np.integer):
            dtype_info = np.iinfo(original_dtype)
        else:
            dtype_info = np.finfo(original_dtype)

        # Convert data to float64 for processing
        data_float = data.astype(np.float64)

        # Normalize data to -1.0 to 1.0 based on the max possible value for this dtype
        data_norm = data_float / np.abs(dtype_info.max)

        # Resample data from 48,000 Hz to 16,000 Hz
        # If stereo or multi-channel, data has shape (N, C); resample_poly with axis=0 handles all channels simultaneously.
        resampled_data = resample_poly(data_norm, up=1, down=3, axis=0)

        # Clip negative values to zero
        # This operation sets all negative samples to 0, leaving positive values unchanged.
        resampled_data[resampled_data < 0] = 0

        # Scale resampled_data back to original amplitude range
        resampled_data = resampled_data * np.abs(dtype_info.max)

        # Clip the resampled data to the valid range for the data type
        resampled_data = np.clip(resampled_data, dtype_info.min, dtype_info.max)

        # Ensure correct length (31 minutes at 16kHz)
        current_length = resampled_data.shape[0]
        
        # If too long, truncate
        if current_length > target_length:
            resampled_data = resampled_data[:target_length]
        # If too short, pad with zeros
        elif current_length < target_length:
            # Check if mono or multi-channel
            if resampled_data.ndim == 1:
                # Mono
                pad_length = target_length - current_length
                resampled_data = np.pad(resampled_data, (0, pad_length), mode='constant', constant_values=0)
            else:
                # Multi-channel
                pad_length = target_length - current_length
                # Pad along the first dimension (time)
                resampled_data = np.pad(resampled_data, ((0, pad_length), (0,0)), mode='constant', constant_values=0)

        # Convert resampled data to original data type
        resampled_data = resampled_data.astype(original_dtype)

        # Prepare the resampled filename
        resampled_filename = os.path.splitext(filename)[0] + '_16kHz_resampled_zero_clipped.wav'
        resampled_file_path = os.path.join(save_directory, resampled_filename)

        # Save the resampled data
        wavfile.write(resampled_file_path, target_sample_rate, resampled_data)

        print(f"Saved resampled file as {resampled_filename}")


Processing CAR-WINUPB-1_clean.wav...
Saved resampled file as CAR-WINUPB-1_clean_16kHz_resampled_zero_clipped.wav
Processing STREET-CITY-1_clean.wav...
Saved resampled file as STREET-CITY-1_clean_16kHz_resampled_zero_clipped.wav
Processing CAFE-FOODCOURTB-2_clean.wav...
Saved resampled file as CAFE-FOODCOURTB-2_clean_16kHz_resampled_zero_clipped.wav
Processing CAFE-CAFE-1_clean.wav...
Saved resampled file as CAFE-CAFE-1_clean_16kHz_resampled_zero_clipped.wav
Processing HOME-LIVINGB-1_clean.wav...
Saved resampled file as HOME-LIVINGB-1_clean_16kHz_resampled_zero_clipped.wav
Processing CAR-WINDOWNB-1_clean.wav...
Saved resampled file as CAR-WINDOWNB-1_clean_16kHz_resampled_zero_clipped.wav
Processing STREET-KG-2_clean.wav...
Saved resampled file as STREET-KG-2_clean_16kHz_resampled_zero_clipped.wav
Processing HOME-KITCHEN-2_clean.wav...
Saved resampled file as HOME-KITCHEN-2_clean_16kHz_resampled_zero_clipped.wav
Processing CAR-WINDOWNB-2_clean.wav...
Saved resampled file as CAR-WINDOWNB-

In [None]:
read_wav('data/PROCESSED/16kHz_resampling/CAR-WINUPB-1_clean_16kHz_resampled.wav', duration_minutes=31, prints=True)

File: CAR-WINUPB-1_clean_16kHz_resampled.wav
Number of channels: 2
Sample rate: 16000 Hz
Sample width: 16 bits
Number of frames: 33777861
Frames read: 28800000
Max samples to read (for 30 minutes): 28800000
Type of frames: <class 'bytes'>
Length of frames: 115200000 bytes
Duration of audio data: 1800.00 seconds (== 30.00 minutes)
Shape of mono audio data: (28800000,)
Max value of mono audio data: 16119.5
Min value of mono audio data: -15156.0
Average value of mono audio data: -3.9088782291666666
Peak amplitude: 16119.5
RMS amplitude: 828.02
Zero-crossing rate: 347.19 crossings/sec


(array([  25.5,   59.5,   90. , ..., -194.5, -180. , -191.5]),
 16000,
 "File: CAR-WINUPB-1_clean_16kHz_resampled.wav\nNumber of channels: 2\nSample rate: 16000 Hz\nSample width: 16 bits\nNumber of frames: 33777861\nFrames read: 28800000\nMax samples to read (for 30 minutes): 28800000\nType of frames: <class 'bytes'>\nLength of frames: 115200000 bytes\nDuration of audio data: 1800.00 seconds (== 30.00 minutes)\nShape of mono audio data: (28800000,)\nMax value of mono audio data: 16119.5\nMin value of mono audio data: -15156.0\nAverage value of mono audio data: -3.9088782291666666\nPeak amplitude: 16119.5\nRMS amplitude: 828.02\nZero-crossing rate: 347.19 crossings/sec")

## Processing .wav file into 1 second .csv files

In [22]:
import os
import numpy as np
from scipy.io import wavfile

def process_wav_to_csv(file_path, output_folder, start_second=0, stop_second=None):
    """
    Process a .wav file by slicing it into 1-second samples and saving each as a .csv file.

    Parameters:
    - file_path: str, path to the .wav file to process.
    - output_folder: str, path to the folder where .csv files will be saved.
    - start_second: int, starting second from which to begin processing (default is 0).
    - stop_second: int or None, ending second at which to stop processing (default is None, which means process up to 30 minutes).
    """
    # Ensure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Read the .wav file
    sample_rate, data = wavfile.read(file_path)

    # Check if the sample rate is 16 kHz
    if sample_rate != 16000:
        raise ValueError(f"Expected sample rate of 16 kHz, but got {sample_rate} Hz.")

    # Convert stereo to mono if necessary
    if len(data.shape) > 1:
        data = data.mean(axis=1)  # Average the two channels

    # Number of samples per second
    samples_per_second = sample_rate  # 16,000 samples per second

    # Total number of samples in the audio data
    total_samples = data.shape[0]

    # Total number of full seconds in the audio data
    num_seconds_in_file = total_samples // samples_per_second

    # Set default stop_second if not provided
    if stop_second is None:
        stop_second = min(num_seconds_in_file, 30 * 60)  # Limit to the first 30 minutes

    # Ensure start_second and stop_second are within the available data range
    if start_second < 0 or start_second >= num_seconds_in_file:
        raise ValueError(f"start_second ({start_second}) is out of range (0 to {num_seconds_in_file - 1})")
    if stop_second <= start_second or stop_second > num_seconds_in_file:
        raise ValueError(f"stop_second ({stop_second}) must be greater than start_second ({start_second}) and within the available data range.")

    # Get the base file name without extension
    base_file_name = os.path.splitext(os.path.basename(file_path))[0]

    # Process each second of audio in the specified range
    sample_number = 1  # Initialize sample numbering starting from 1
    for second in range(start_second, stop_second):
        start_idx = second * samples_per_second
        end_idx = start_idx + samples_per_second
        sample_data = data[start_idx:end_idx]

        # Save sample_data to .csv file with one column
        csv_file_name = f"{base_file_name}_sample_{sample_number}.csv"
        csv_file_path = os.path.join(output_folder, csv_file_name)

        # Save as .csv with one column
        np.savetxt(csv_file_path, sample_data, delimiter=',')
        print(f"Saved {csv_file_name}")

        sample_number += 1  # Increment sample number

    print("Processing complete.")


In [64]:
# Specify the file name and directories
file_name = 'CAFE-CAFE-2_clean_16kHz_resampled.wav'  # Replace with your actual .wav file name
input_folder = 'data/PROCESSED/16kHz_resampling'  # Replace with the path to your .wav files
output_folder = 'data/PROCESSED/16kHz_1sec_samples'  # Replace with the path to save .csv files

# Build the full file path
file_path = os.path.join(input_folder, file_name)

In [65]:
# Process the .wav file
process_wav_to_csv(file_path, output_folder, start_second=0, stop_second=60*30)

Saved CAFE-CAFE-2_clean_16kHz_resampled_sample_1.csv
Saved CAFE-CAFE-2_clean_16kHz_resampled_sample_2.csv
Saved CAFE-CAFE-2_clean_16kHz_resampled_sample_3.csv
Saved CAFE-CAFE-2_clean_16kHz_resampled_sample_4.csv
Saved CAFE-CAFE-2_clean_16kHz_resampled_sample_5.csv
Saved CAFE-CAFE-2_clean_16kHz_resampled_sample_6.csv
Saved CAFE-CAFE-2_clean_16kHz_resampled_sample_7.csv
Saved CAFE-CAFE-2_clean_16kHz_resampled_sample_8.csv
Saved CAFE-CAFE-2_clean_16kHz_resampled_sample_9.csv
Saved CAFE-CAFE-2_clean_16kHz_resampled_sample_10.csv
Saved CAFE-CAFE-2_clean_16kHz_resampled_sample_11.csv
Saved CAFE-CAFE-2_clean_16kHz_resampled_sample_12.csv
Saved CAFE-CAFE-2_clean_16kHz_resampled_sample_13.csv
Saved CAFE-CAFE-2_clean_16kHz_resampled_sample_14.csv
Saved CAFE-CAFE-2_clean_16kHz_resampled_sample_15.csv
Saved CAFE-CAFE-2_clean_16kHz_resampled_sample_16.csv
Saved CAFE-CAFE-2_clean_16kHz_resampled_sample_17.csv
Saved CAFE-CAFE-2_clean_16kHz_resampled_sample_18.csv
Saved CAFE-CAFE-2_clean_16kHz_resampl

## Dividing original .wav files into 24min for train and validation + 6min for test datasets

In [18]:
import os
import numpy as np
from scipy.io import wavfile

# Directories
source_directory = 'data/PROCESSED/16kHz_resampling_zero_clipped'
train_val_directory = 'data/PROCESSED/16kHz_resampling_zero_clipped/wav_train_val_dataset'
test_directory = 'data/PROCESSED/16kHz_resampling_zero_clipped/wav_test_dataset'

# Create output directories if they don't exist
os.makedirs(train_val_directory, exist_ok=True)
os.makedirs(test_directory, exist_ok=True)

# Define durations
# 16 kHz sampling rate
sample_rate = 16000
minutes_24 = 24 * 60  # seconds in 24 minutes
minutes_6 = 6 * 60    # seconds in 6 minutes
separation = 1 * 60  # 1 minute separation between train/val and test data

num_samples_24_min = minutes_24 * sample_rate
num_samples_6_min = minutes_6 * sample_rate
num_samples_sep = separation * sample_rate

# Process each .wav file in the source directory
for filename in os.listdir(source_directory):
    if filename.endswith('.wav'):
        file_path = os.path.join(source_directory, filename)

        # Read the wav file
        sr, data = wavfile.read(file_path)

        # Check if the sampling rate is 16 kHz
        if sr != sample_rate:
            print(f"File {filename} has sample rate {sr} Hz, expected 16,000 Hz.")
            continue

        print(f"Processing {filename}...")

        # Check length of the file
        total_samples = data.shape[0]

        # Ensure file is at least 31 minutes long: 24 + 6 + 1 = 31 min total
        required_length = num_samples_24_min + num_samples_6_min + num_samples_sep
        if total_samples < required_length:
            print(f"File {filename} is shorter than 31 minutes. Total samples: {total_samples}, needed {required_length}. Skipping.")
            continue

        # Extract first 24 minutes
        train_val_data = data[:num_samples_24_min]

        # Extract last 6 minutes
        test_data = data[-num_samples_6_min:]

        # Save the first 24 minutes to train_val_directory with the same filename
        train_val_filepath = os.path.join(train_val_directory, filename)
        wavfile.write(train_val_filepath, sample_rate, train_val_data)

        # Save the last 6 minutes to test_directory with the same filename
        test_filepath = os.path.join(test_directory, filename)
        wavfile.write(test_filepath, sample_rate, test_data)

        print(f"Saved first 24-minute part to {train_val_filepath}")
        print(f"Saved last 6-minute part to {test_filepath}")

Processing CAR-WINUPB-2_clean_16kHz_resampled_zero_clipped.wav...
Saved first 24-minute part to data/PROCESSED/16kHz_resampling_zero_clipped/wav_train_val_dataset/CAR-WINUPB-2_clean_16kHz_resampled_zero_clipped.wav
Saved last 6-minute part to data/PROCESSED/16kHz_resampling_zero_clipped/wav_test_dataset/CAR-WINUPB-2_clean_16kHz_resampled_zero_clipped.wav
Processing STREET-KG-2_clean_16kHz_resampled_zero_clipped.wav...
Saved first 24-minute part to data/PROCESSED/16kHz_resampling_zero_clipped/wav_train_val_dataset/STREET-KG-2_clean_16kHz_resampled_zero_clipped.wav
Saved last 6-minute part to data/PROCESSED/16kHz_resampling_zero_clipped/wav_test_dataset/STREET-KG-2_clean_16kHz_resampled_zero_clipped.wav
Processing STREET-CITY-2_clean_16kHz_resampled_zero_clipped.wav...
Saved first 24-minute part to data/PROCESSED/16kHz_resampling_zero_clipped/wav_train_val_dataset/STREET-CITY-2_clean_16kHz_resampled_zero_clipped.wav
Saved last 6-minute part to data/PROCESSED/16kHz_resampling_zero_clipped

## Separating .wav files into 1 second .csv files (for training / validation) or 1 minute (for test)

In [51]:
import os
import numpy as np
from scipy.io import wavfile
import csv

# Input .wav file
input_wav_file = 'data/PROCESSED/16kHz_resampling_zero_clipped/wav_test_dataset/STREET-KG-2_clean_16kHz_resampled_zero_clipped.wav'
output_directory = 'data/PROCESSED/1sec_TEST_dataset/STREET'

# Ensure the output directory exists
os.makedirs(output_directory, exist_ok=True)

# Read the WAV file
sample_rate, data = wavfile.read(input_wav_file)

# Check that the sample rate is 16 kHz
expected_sample_rate = 16000
if sample_rate != expected_sample_rate:
    raise ValueError(f"Expected sample rate {expected_sample_rate} Hz, got {sample_rate} Hz")

# Convert stereo to mono if necessary
if data.ndim > 1:
    data = data.mean(axis=1)

total_samples = data.shape[0]

# Number of samples per segment 
samples_per_segment = sample_rate * 60 # 1 minute segments
num_segments = total_samples // samples_per_segment

# Get the base name of the input file (without extension)
base_name = os.path.splitext(os.path.basename(input_wav_file))[0]

for i in range(num_segments):
    # Segment start and end indices
    start_idx = i * samples_per_segment
    end_idx = start_idx + samples_per_segment
    
    segment_data = data[start_idx:end_idx]
    
    # Prepare output CSV file name
    # Segment numbers start from 1 for readability
    segment_number = i + 1
    output_csv_filename = f"{base_name}_segment_{segment_number}.csv"
    output_csv_path = os.path.join(output_directory, output_csv_filename)
    
    # Save segment_data to CSV, one column
    # Each row in the CSV will contain a single sample value.
    with open(output_csv_path, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        for sample_value in segment_data:
            writer.writerow([sample_value])
    
    print(f"Saved segment {segment_number} to {output_csv_path}")

# If there is a remainder (file length not a multiple of 1 second), you can handle it here:
remainder = total_samples % samples_per_segment
if remainder > 0:
    # Decide what to do:
    # Option 1: Skip the leftover samples
    # Option 2: Save them as a shorter last segment
    # Here we choose to save them as a shorter segment:
    start_idx = num_segments * samples_per_segment
    remainder_data = data[start_idx:]
    segment_number = num_segments + 1
    output_csv_filename = f"{base_name}_segment_{segment_number}.csv"
    output_csv_path = os.path.join(output_directory, output_csv_filename)
    
    with open(output_csv_path, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        for sample_value in remainder_data:
            writer.writerow([sample_value])
    
    print(f"Saved remainder segment {segment_number} (shorter) to {output_csv_path}")


Saved segment 1 to data/PROCESSED/1sec_TEST_dataset/STREET/STREET-KG-2_clean_16kHz_resampled_zero_clipped_segment_1.csv
Saved segment 2 to data/PROCESSED/1sec_TEST_dataset/STREET/STREET-KG-2_clean_16kHz_resampled_zero_clipped_segment_2.csv
Saved segment 3 to data/PROCESSED/1sec_TEST_dataset/STREET/STREET-KG-2_clean_16kHz_resampled_zero_clipped_segment_3.csv
Saved segment 4 to data/PROCESSED/1sec_TEST_dataset/STREET/STREET-KG-2_clean_16kHz_resampled_zero_clipped_segment_4.csv
Saved segment 5 to data/PROCESSED/1sec_TEST_dataset/STREET/STREET-KG-2_clean_16kHz_resampled_zero_clipped_segment_5.csv
Saved segment 6 to data/PROCESSED/1sec_TEST_dataset/STREET/STREET-KG-2_clean_16kHz_resampled_zero_clipped_segment_6.csv
