# Generate Frequency Statistics CSV

This notebook extracts statistical features (mean and standard deviation) from audio spectrograms.

**Output:** `work/trn_curated_feature.csv`

For each audio file, it computes:
- Mean and std for each STFT frequency band
- Mean and std for each Mel frequency band (128 bands)
- Mean and std for each CQT frequency band

In [1]:
from preprocessor import AudioPreprocessor
import os
import glob
import pandas as pd
import numpy as np
from PIL import Image
from tqdm.notebook import tqdm

In [2]:
def get_files(type):
    """Get list of audio files and their destination paths"""
    src_files = glob.glob(f"input/{type}/*.wav")
    dst_files = [os.path.split(file) for file in src_files]
    dst_files = [(path.split("/")+file.split(".")) for (path, file) in dst_files]
    dst_files = [(os.path.join("work",f[1],f"{f[2]}/ft.png"),
                os.path.join("work",f[1],f"{f[2]}/mel.png"),
                os.path.join("work",f[1],f"{f[2]}/cqt.png")) for f in dst_files]
    audio_files = [(src_files[i],dst_files[i]) for i in range(len(src_files))]
    
    return audio_files

In [3]:
# Get sorted list of audio files
audio_files = sorted(get_files("trn_curated"))
print(f"Found {len(audio_files)} audio files to process")
print(f"Example: {audio_files[0][0]}")

Found 4970 audio files to process
Example: input/trn_curated/0006ae4e.wav


In [4]:
# Initialize preprocessor
ap = AudioPreprocessor()
print(f"AudioPreprocessor initialized with:")
print(f"  Sampling rate: {ap.sampling_rate} Hz")
print(f"  Duration: {ap.duration} seconds")
print(f"  Mel bands: {ap.n_mels}")
print(f"  Hop length: {ap.hop_length}")

AudioPreprocessor initialized with:
  Sampling rate: 44100 Hz
  Duration: 2 seconds
  Mel bands: 128
  Hop length: 512


In [5]:
# Process all audio files
files = []
stft_means = []
stft_stds = []
mel_means = []
mel_stds = []
cqt_means = []
cqt_stds = []

for src, (stft_file, mel_file, cqt_file) in tqdm(audio_files, desc="Processing audio files"):
    # Read and process audio
    y = ap.read_audio(src)
    _, file = os.path.split(src)
    
    # Generate spectrograms and extract statistics
    stft = ap.normalize(ap.trim_blank(ap.audio_to_stft(y)))
    mel = ap.normalize(ap.trim_blank(ap.audio_to_mel(y)))
    cqt = ap.normalize(ap.trim_blank(ap.audio_to_cqt(y)))
    
    stft_mean, stft_std = ap.get_statistics(stft)
    mel_mean, mel_std = ap.get_statistics(mel)
    cqt_mean, cqt_std = ap.get_statistics(cqt)
    
    # Store results
    files.append(file)
    stft_means.append(stft_mean)
    stft_stds.append(stft_std)
    mel_means.append(mel_mean)
    mel_stds.append(mel_std)
    cqt_means.append(cqt_mean)
    cqt_stds.append(cqt_std)

Processing audio files:   0%|          | 0/4970 [00:00<?, ?it/s]

  spectrogram = (spectrogram - norm_min) / (norm_max - norm_min)


In [6]:
# Convert lists to numpy arrays
stft_means = np.array(stft_means)
stft_stds = np.array(stft_stds)
mel_means = np.array(mel_means)
mel_stds = np.array(mel_stds)
cqt_means = np.array(cqt_means)
cqt_stds = np.array(cqt_stds)

print(f"\nStatistics shapes:")
print(f"  STFT means: {stft_means.shape}")
print(f"  Mel means: {mel_means.shape}")
print(f"  CQT means: {cqt_means.shape}")


Statistics shapes:
  STFT means: (4970, 1025)
  Mel means: (4970, 128)
  CQT means: (4970, 84)


In [7]:
# Create DataFrame with all features
df = pd.DataFrame({
    "file": files,
    **{f"stft_mean_{i}": stft_means[:, i] for i in range(stft_means.shape[1])},
    **{f"stft_std_{i}": stft_stds[:, i] for i in range(stft_stds.shape[1])},
    **{f"mel_mean_{i}": mel_means[:, i] for i in range(mel_means.shape[1])},
    **{f"mel_std_{i}": mel_stds[:, i] for i in range(mel_stds.shape[1])},
    **{f"cqt_mean_{i}": cqt_means[:, i] for i in range(cqt_means.shape[1])},
    **{f"cqt_std_{i}": cqt_stds[:, i] for i in range(cqt_stds.shape[1])},
})

print(f"\nDataFrame shape: {df.shape}")
print(f"Columns: {len(df.columns)}")
print(f"  - file: 1 column")
print(f"  - STFT features: {stft_means.shape[1] * 2} columns (mean + std)")
print(f"  - Mel features: {mel_means.shape[1] * 2} columns (mean + std)")
print(f"  - CQT features: {cqt_means.shape[1] * 2} columns (mean + std)")


DataFrame shape: (4970, 2475)
Columns: 2475
  - file: 1 column
  - STFT features: 2050 columns (mean + std)
  - Mel features: 256 columns (mean + std)
  - CQT features: 168 columns (mean + std)


In [8]:
# Preview the data
df.head()

Unnamed: 0,file,stft_mean_0,stft_mean_1,stft_mean_2,stft_mean_3,stft_mean_4,stft_mean_5,stft_mean_6,stft_mean_7,stft_mean_8,...,cqt_std_74,cqt_std_75,cqt_std_76,cqt_std_77,cqt_std_78,cqt_std_79,cqt_std_80,cqt_std_81,cqt_std_82,cqt_std_83
0,0006ae4e.wav,0.630177,0.695077,0.683876,0.648365,0.61096,0.586519,0.557335,0.523971,0.511032,...,0.141729,0.123818,0.097963,0.098019,0.108474,0.11421,0.101129,0.088244,0.098404,0.104893
1,0019ef41.wav,0.543871,0.59202,0.65419,0.642607,0.679943,0.670065,0.612787,0.567508,0.531819,...,0.142544,0.140708,0.126795,0.122447,0.133664,0.148726,0.155113,0.169245,0.179506,0.178364
2,001ec0ad.wav,0.221086,0.209854,0.28183,0.315324,0.311852,0.284359,0.299771,0.332832,0.346691,...,0.224214,0.218608,0.208298,0.188692,0.178248,0.174347,0.146416,0.148517,0.152399,0.154559
3,0026c7cb.wav,0.445869,0.547143,0.564999,0.48569,0.467381,0.471674,0.431237,0.41491,0.40471,...,0.13511,0.12988,0.1295,0.127833,0.125553,0.123609,0.123089,0.120902,0.119276,0.116201
4,0026f116.wav,0.318727,0.523809,0.633859,0.631903,0.546305,0.480865,0.467831,0.419503,0.395317,...,0.206768,0.219073,0.208102,0.193863,0.187029,0.188512,0.185782,0.181636,0.181744,0.185358


In [9]:
# Save to CSV
output_path = "work/trn_curated_feature.csv"
os.makedirs("work", exist_ok=True)
df.to_csv(output_path, index=False)
print(f"\n✅ Successfully saved to: {output_path}")
print(f"   File size: {os.path.getsize(output_path) / (1024*1024):.2f} MB")


✅ Successfully saved to: work/trn_curated_feature.csv
   File size: 126.77 MB


In [10]:
# Quick sanity check
print(f"\nSanity checks:")
print(f"  Total rows: {len(df)}")
print(f"  Total columns: {len(df.columns)}")
print(f"  Any NaN values: {df.isna().any().any()}")
print(f"  Memory usage: {df.memory_usage(deep=True).sum() / (1024*1024):.2f} MB")


Sanity checks:
  Total rows: 4970
  Total columns: 2475
  Any NaN values: True
  Memory usage: 47.23 MB
