In [21]:
import numpy as np
import librosa
import librosa.feature
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import wavfile;
from tqdm import tqdm
import os
import json

# Plots
font = {
    'size': 16
}
plt.rc('font', **font)

SAMPLING_RATE = 44100
FRAME_LENGTH = 2048
HOP_LENGTH = 2048


# ----- Time-domain features -----

def get_mean_amplitude(y: np.ndarray) -> float:
    return np.mean(y)


def get_zero_count(y: np.ndarray) -> int:
    return len([sample for sample in y if -2 <= sample <= 2])


def get_mean_onset_strength(y: np.ndarray) -> float:
    onset_env = librosa.onset.onset_strength(y=y, sr=SAMPLING_RATE)
    return np.mean(onset_env)


def get_beat_count(y: np.ndarray) -> int:
    rms = librosa.feature.rms(y=y)
    amplitude_env = rms[0]
    peaks = librosa.util.peak_pick(
        amplitude_env, pre_max=3, post_max=3, pre_avg=3, post_avg=5, delta=0.5, wait=10
    )
    return len(peaks)


def get_mean_distance_between_beats(y: np.ndarray):
    rms = librosa.feature.rms(y=y)
    amplitude_env = rms[0]
    peaks = librosa.util.peak_pick(
        amplitude_env, pre_max=3, post_max=3, pre_avg=3, post_avg=5, delta=0.5, wait=10
    )
    temp = np.insert(peaks, 0, 0, axis=0)
    distances = [peaks[i] - temp[i] for i in range(len(peaks))]
    return np.mean(distances)


def get_std_distance_between_beats(y: np.ndarray):
    rms = librosa.feature.rms(y=y)
    amplitude_env = rms[0]
    peaks = librosa.util.peak_pick(
        amplitude_env, pre_max=3, post_max=3, pre_avg=3, post_avg=5, delta=0.5, wait=10
    )
    temp = np.insert(peaks, 0, 0, axis=0)
    distances = [peaks[i] - temp[i] for i in range(len(peaks))]
    return np.std(distances)


# ----- Frequency-domain features -----


def get_mean_rms(y: np.ndarray) -> float:
    rms = librosa.feature.rms(y=y,
                              frame_length=FRAME_LENGTH,
                              hop_length=HOP_LENGTH)
    return np.mean(rms)


def get_mean_spectral_centroid(y: np.ndarray):
    per_frame = librosa.feature.spectral_centroid(y=y,
                                                  sr=SAMPLING_RATE,
                                                  hop_length=HOP_LENGTH)[0]
    return np.mean(per_frame)

directories = [
    'F1_32',
    'F33_66',
    'F67_80',
    'F81_104',
    'F117_130',
    'F131_144',
    'F145_160',
    'F161_163',
]

signal_directory = './Signals/'

def load_all_signals():
    res = []
    for directory in directories:
        signal_folder = os.path.join(signal_directory, directory)
        print(signal_folder)
        if not os.path.isdir(signal_folder):
            continue
        for filename in os.listdir(signal_folder):
            if filename.endswith('_loop.wav') or filename.endswith('_loop_mono.wav'):
                print(filename)
                signal_path = os.path.join(signal_folder, filename)
                a = wavfile.read(signal_path)
                res.append({
                    'id': filename.split('_')[0], 'y': np.array(a[1],dtype='float')})
    return res


    
df = load_all_signals()
results = []
X = pd.DataFrame(columns=[])
for item in df:
    id = item['id']
    y = item['y']
    print(id)
    res = {
        "id": id,
        # -- Temporal features
        "mean_amplitude": ["%.2f" % round(get_mean_amplitude(y), 2)],
        "rms": ["%.2f" % round(get_mean_rms(y), 4)],
        "pulse_count": [get_beat_count(y)],
        "std_pulse_dist": ["%.2f" % round(get_std_distance_between_beats(y), 2)],
        "zero_count": [get_zero_count(y)],
        "mean_onset_strength": ["%.2f" % round(get_mean_onset_strength(y), 2)],
        # -- Spectral features
        "spectral_centroid": ["%.2f" % round(get_mean_spectral_centroid(y), 2)],
    }
    feature_vec = pd.DataFrame.from_dict(res)
    results.append(res)
    X = pd.concat([X, feature_vec], ignore_index=True)


print(X.head())

# Save feature matrix to disk
path = './features.csv'
X.to_csv(path, index=False)

with open('features.json', 'w') as f:
    json.dump(results, f, indent=4)

./Signals/F1_32
F11_loop.wav
F10_loop.wav
F25_loop.wav
F24_loop.wav
F8_loop_mono.wav
F1_loop.wav
F32_loop.wav
F7_loop.wav
F6_loop.wav
F5_loop_mono.wav
F28_loop.wav
F29_loop.wav
F16_loop.wav
F17_loop.wav
F22_loop.wav
F23_loop.wav
F31_loop.wav
F30_loop.wav
F9_loop.wav
F3_loop.wav
F2_loop.wav
F26_loop.wav
F27_loop.wav
F18_loop.wav
F19_loop.wav
F12_loop.wav
F13_loop.wav
F21_loop.wav
F20_loop.wav
F15_loop.wav
F14_loop.wav
F4_loop.wav
./Signals/F33_66
F66_loop.wav
F58_loop.wav
F59_loop.wav
F52_loop.wav
F53_loop.wav
F38_loop.wav
F39_loop.wav
F33_loop.wav
F45_loop.wav
F44_loop.wav
F48_loop.wav
F49_loop.wav
F35_loop.wav
F34_loop.wav
F42_loop.wav
F43_loop.wav
F61_loop.wav
F60_loop.wav
F55_loop.wav
F54_loop.wav
F46_loop.wav
F47_loop.wav
F51_loop.wav
F50_loop.wav
F65_loop.wav
F64_loop.wav
F56_loop.wav
F57_loop.wav
F62_loop.wav
F63_loop.wav
F36_loop.wav
F37_loop.wav
F41_loop.wav
F40_loop.wav
./Signals/F67_80
F67_loop.wav
F71_loop.wav
F70_loop.wav
F76_loop.wav
F77_loop.wav
F78_loop.wav
F79_loop.wav


  a = wavfile.read(signal_path)


F90_loop.wav
F91_loop.wav
./Signals/F117_130
F124_loop.wav
F125_loop.wav
F123_loop.wav
F122_loop.wav
F129_loop.wav
F128_loop.wav
F117_loop.wav
F127_loop.wav
F126_loop.wav
F119_loop.wav
F118_loop.wav
F130_loop.wav
F120_loop.wav
F121_loop.wav
./Signals/F131_144
F133_loop.wav
F132_loop.wav
F144_loop.wav
F139_loop.wav
F138_loop.wav
F134_loop.wav
F135_loop.wav
F143_loop.wav
F142_loop.wav
F131_loop.wav
F137_loop.wav
F136_loop.wav
F140_loop.wav
F141_loop.wav
./Signals/F145_160
F145_loop.wav
F153_loop.wav
F152_loop.wav
F159_loop.wav
F158_loop.wav
F154_loop.wav
F155_loop.wav
F160_loop.wav
F149_loop.wav
F148_loop.wav
F150_loop.wav
F151_loop.wav
F147_loop.wav
F146_loop.wav
F157_loop.wav
F156_loop.wav
./Signals/F161_163
F161_loop.wav
F163_loop.wav
F162_loop.wav
F11
F10
F25
F24
F8
F1
F32
F7
F6
F5
F28
F29
F16
F17
F22
F23
F31
F30
F9
F3
F2
F26
F27
F18
F19
F12
F13
F21
F20
F15
F14
F4
F66
F58
F59
F52
F53
F38
F39
F33
F45
F44
F48
F49
F35
F34
F42
F43
F61
F60
F55
F54
F46
F47
F51
F50
F65
F64
F56
F57
F62
F63
F

In [4]:
import json

# Load the original feature data
with open('features.json', 'r') as f:
    data = json.load(f)

# Dictionary to collect feature values
features = {}

# Aggregate values for each feature
for item in data:
    for key, value in item.items():
        if key == "id":
            continue
        val = float(value[0]) if isinstance(value[0], str) else value[0]
        features.setdefault(key, []).append(val)

# Build simplified min-max structure
feature_ranges = {
    feature: [min(values), max(values)] for feature, values in features.items()
}

# Write to new JSON file
with open('feature_ranges.json', 'w') as f:
    json.dump(feature_ranges, f, indent=4)

print("Simplified feature ranges saved to 'feature_ranges.json'")

Simplified feature ranges saved to 'feature_ranges.json'
