In [None]:
import os

def rename_and_delete_files(directory):
    # Iterate over subdirectories
    for subdir in ['inhale', 'exhale', 'silence']:
        subdir_path = os.path.join(directory, subdir)
        if not os.path.exists(subdir_path):
            continue

        i = 1  # Counter for renaming files

        # Iterate over files in subdirectory
        for filename in os.listdir(subdir_path):
            file_path = os.path.join(subdir_path, filename)

            # If file starts with 'repo_' or 'master_', delete it
            if filename.startswith('repo_') or filename.startswith('master_'):
                os.remove(file_path)
                print(f'Deleted: {file_path}')
            else:
                # Rename file to subdir{i}.wav
                new_filename = f'{subdir}{i}.wav'
                new_file_path = os.path.join(subdir_path, new_filename)
                os.rename(file_path, new_file_path)
                print(f'Renamed: {file_path} to {new_file_path}')
                i += 1

# Call the function with the main directory path
rename_and_delete_files('../data-raw-ours')

In [None]:
import os
from pydub import AudioSegment

total_length = 0
# Function to print lengths of .wav files in the directories
def print_wav_lengths(directory):
    global total_length
    for filename in os.listdir(directory):
        if filename.endswith('.wav'):
            file_path = os.path.join(directory, filename)
            audio = AudioSegment.from_wav(file_path)
            duration = len(audio) / 1000  # Length in seconds
            # print(f'File: {filename}, Length: {duration:.2f} s')
            total_length += duration
    print()

# Directories to check
print_wav_lengths('../data-raw-ours/inhale')
print_wav_lengths('../data-raw-ours/exhale')
print_wav_lengths('../data-raw-ours/silence')
print(f'Total length: {total_length/60:.2f} m')


In [None]:
import numpy as np

def normalize_data(train_data, val_data):
    # Połącz wszystkie MFCC coefficients z danych treningowych w jedną tablicę
    all_mfccs = np.concatenate([np.array([item[0] for item in sequence]) for sequence in train_data])

    # Oblicz średnią i odchylenie standardowe
    mean = np.mean(all_mfccs, axis=0)
    std = np.std(all_mfccs, axis=0)

    # Funkcja do normalizacji pojedynczej sekwencji
    def normalize_sequence(sequence):
        return [( (np.array(mfcc) - mean) / std, label ) for mfcc, label in sequence]

    # Znormalizuj dane treningowe i walidacyjne
    normalized_train_data = [normalize_sequence(sequence) for sequence in train_data]
    normalized_val_data = [normalize_sequence(sequence) for sequence in val_data]

    return normalized_train_data, normalized_val_data, mean, std

# Load wav file from ../data-raw-ours/inhale/inhale1.wav
audio = AudioSegment.from_wav('../data-raw-ours/inhale/inhale1.wav')

# Normalize it and save it ad ./xd.wav
normalized = normalize_data()

In [None]:
import os
import wave

def count_channels(directory):
    counts = {
        'inhale': {
            '44100_mono': 0,
            '44100_stereo': 0,
            '48000_mono': 0,
            '48000_stereo': 0
        },
        'exhale': {
            '44100_mono': 0,
            '44100_stereo': 0,
            '48000_mono': 0,
            '48000_stereo': 0
        },
        'silence': {
            '44100_mono': 0,
            '44100_stereo': 0,
            '48000_mono': 0,
            '48000_stereo': 0
        }
    }

    for subdir in ['inhale', 'exhale', 'silence']:
        subdir_path = os.path.join(directory, subdir)
        if not os.path.exists(subdir_path):
            continue

        for root, _, files in os.walk(subdir_path):
            for file in files:
                if file.endswith('.wav'):
                    file_path = os.path.join(root, file)
                    with wave.open(file_path, 'rb') as wav_file:
                        channels = wav_file.getnchannels()
                        framerate = wav_file.getframerate()
                        if framerate == 44100:
                            if channels == 1:
                                counts[subdir]['44100_mono'] += 1
                            elif channels == 2:
                                counts[subdir]['44100_stereo'] += 1
                        elif framerate == 48000:
                            if channels == 1:
                                counts[subdir]['48000_mono'] += 1
                            elif channels == 2:
                                counts[subdir]['48000_stereo'] += 1

    return counts

directory = '../data-raw'  # Zmień na ścieżkę do katalogu, który chcesz przeszukać
counts = count_channels(directory)
for subdir in ['inhale', 'exhale', 'silence']:
    print(f"Katalog: {subdir}")
    print(f"  Liczba plików 44100 Hz mono: {counts[subdir]['44100_mono']}")
    print(f"  Liczba plików 44100 Hz stereo: {counts[subdir]['44100_stereo']}")
    print(f"  Liczba plików 48000 Hz mono: {counts[subdir]['48000_mono']}")
    print(f"  Liczba plików 48000 Hz stereo: {counts[subdir]['48000_stereo']}")

In [None]:
import os
from pydub import AudioSegment

def calculate_total_length(directory, subdir):
    total_length = 0
    subdir_path = os.path.join(directory, subdir)
    if not os.path.exists(subdir_path):
        return total_length

    for filename in os.listdir(subdir_path):
        if filename.endswith('.wav'):
            file_path = os.path.join(subdir_path, filename)
            audio = AudioSegment.from_wav(file_path)
            total_length += len(audio) / 1000  # Length in seconds

    return total_length

directory = '../data-raw-test'
inhale_length = calculate_total_length(directory, 'inhale')
exhale_length = calculate_total_length(directory, 'exhale')
silence_length = calculate_total_length(directory, 'silence')

print(f'Łączna długość nagrań w katalogu inhale: {inhale_length:.2f} sekund')
print(f'Łączna długość nagrań w katalogu exhale: {exhale_length:.2f} sekund')
print(f'Łączna długość nagrań w katalogu silence: {silence_length:.2f} sekund')

In [None]:
import os
import wave

def count_channels(directory):
    counts = {
        '44100_mono': 0,
        '44100_stereo': 0,
        '48000_mono': 0,
        '48000_stereo': 0
    }

    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(root, file)
                with wave.open(file_path, 'rb') as wav_file:
                    channels = wav_file.getnchannels()
                    framerate = wav_file.getframerate()
                    if framerate == 44100:
                        if channels == 1:
                            counts['44100_mono'] += 1
                        elif channels == 2:
                            counts['44100_stereo'] += 1
                    elif framerate == 48000:
                        if channels == 1:
                            counts['48000_mono'] += 1
                        elif channels == 2:
                            counts['48000_stereo'] += 1

    return counts

directory = '../master-data-sequences-processed'  # Zmień na ścieżkę do katalogu, który chcesz przeszukać
counts = count_channels(directory)
print(f"Liczba plików 44100 Hz mono: {counts['44100_mono']}")
print(f"Liczba plików 44100 Hz stereo: {counts['44100_stereo']}")
print(f"Liczba plików 48000 Hz mono: {counts['48000_mono']}")
print(f"Liczba plików 48000 Hz stereo: {counts['48000_stereo']}")

In [None]:
import struct
import sys

def decode_wav_header(filename):
    with open(filename, 'rb') as f:
        # Standardowy nagłówek WAV dla formatu PCM ma 44 bajty.
        header = f.read(44)
        # Rozpakowywanie zgodnie z little-endian ('<')
        riff, file_size, wave_id, fmt_id, fmt_chunk_size, audio_format, \
        num_channels, sample_rate, byte_rate, block_align, bits_per_sample, \
        data_id, data_size = struct.unpack('<4sI4s4sIHHIIHH4sI', header)

        print("RIFF header:", riff.decode('ascii'))
        print("Rozmiar pliku:", file_size)
        print("WAVE header:", wave_id.decode('ascii'))
        print("FMT header:", fmt_id.decode('ascii'))
        print("Rozmiar chunku fmt:", fmt_chunk_size)
        print("Format audio (1 = PCM):", audio_format)
        print("Liczba kanałów:", num_channels)
        print("Częstotliwość próbkowania:", sample_rate)
        print("Byte rate:", byte_rate)
        print("Block align:", block_align)
        print("Bity na próbkę:", bits_per_sample)
        print("Data header:", data_id.decode('ascii'))
        print("Rozmiar danych:", data_size)

        # Dodatkowe informacje o formacie próbek
        if audio_format == 1:
            format_type = "PCM (Pulse Code Modulation)"
        elif audio_format == 3:
            format_type = "IEEE 754 float"
        else:
            format_type = "Inny lub nieznany format"

        print("Typ formatu audio:", format_type)

        # Obliczanie liczby próbek
        total_samples = data_size * 8 / (num_channels * bits_per_sample)
        print("Łączna liczba próbek:", int(total_samples))

if __name__ == '__main__':
    decode_wav_header("../master-data-sequences-processed/sequence_150_master.wav")



In [None]:
import os
import wave

# Path to the folder containing .wav files
folder_path = 'data-raw-seq'

# Iterate over all files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.wav'):
        file_path = os.path.join(folder_path, filename)

        # Open the .wav file
        with wave.open(file_path, 'r') as wav_file:
            # Get the number of frames
            num_frames = wav_file.getnframes()
            print(f"{filename}: {num_frames} frames")

In [None]:
import os
import wave

def check_wav_properties(wav_path):
    """Checks if the .wav file is mono or stereo and gets the sample rate."""
    with wave.open(wav_path, 'rb') as wf:
        channels = wf.getnchannels()
        sample_rate = wf.getframerate()
        channel_type = 'mono' if channels == 1 else 'stereo'
        return channel_type, sample_rate

def check_folder(folder_path):
    """Checks all .wav files in the given folder."""
    for filename in os.listdir(folder_path):
        if filename.endswith('.wav'):
            wav_path = os.path.join(folder_path, filename)
            channel_type, sample_rate = check_wav_properties(wav_path)
            print(f"{filename}: {channel_type}, {sample_rate} Hz")

if __name__ == "__main__":
    folder_path = input("Enter the path to the folder containing .wav files: ")
    check_folder(folder_path)