## Recompute spectrograms that were saved with padding
Unfortunately; I saved the spectrograms with padding, if the audio clip was shorter than 15 seconds.

This is not optimal, so this notebook can be used to recompute these spectrograms

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import librosa
import h5py
import os

In [2]:
df_train = pd.read_csv("../../data/dataset_train.csv")
df_test = pd.read_csv("../../data/dataset_test.csv")

In [3]:
import json

## Conifgurations
config = json.load(open("../resources/config.json", "r"))

class cfg:
    seed = config["seed"]
    # audio settings
    sr = config["sampling_rate"] # = 22050
    duration = config["duration"] # the duration of the clips
    
    n_samples = duration*sr
    
    # spectrogram settings
    hop_length = config["hop_length"] # = 2048 "stepsize" of the fft for the melspectrograms
    nfft = config["nfft"] # = 4096 windowsize of the fft for the melspectrograms
    n_mels = config["n_mels"] # = 128 number of mel frequency bins
    fmax = sr/2 # maximum frequency in the melspectrograms
    input_dim = (n_mels, int(duration*sr//hop_length + 1))
    
    test_size = config["test_size"]

In [4]:
def resave_spec(spec, hdf5_path, name, compression="gzip", chunks=True):
    try:
        with h5py.File(hdf5_path, 'a') as f:  # Open in append mode
            del f[name]
            f.create_dataset(name, data=spec, compression=compression, chunks=chunks)
    except Exception as e:
        print(f'Error saving spectrograms to {hdf5_path}: {e}')

def recompute_and_save_spec(filepath, name):
    spec = compute_spec(filepath)
    hdf5_path = os.path.dirname(filepath) + "/spectrograms.h5"
    resave_spec(spec, hdf5_path = hdf5_path, name = name)
    return spec.shape[-1]

def compute_spec(filepath, sr=cfg.sr, duration=cfg.duration, nfft=cfg.nfft, hop_length=cfg.hop_length, n_mels=cfg.n_mels, fmax=cfg.fmax):
    audio, sr = librosa.load(filepath, sr = sr)
    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=nfft, hop_length=hop_length, n_mels=n_mels, fmin = 0, fmax=fmax)
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram)
    return log_mel_spectrogram

In [5]:
indices = df_test[df_test["length_seconds"] <= 15].index

for i in tqdm(indices):
    df_test.loc[i, "length_spectrogram"] = recompute_and_save_spec("../" + df_test.loc[i, "fullfilename"], df_test.loc[i, "spectrogram"])

 55%|██████████████████████▋                  | 208/376 [00:06<00:04, 37.98it/s]Note: Illegal Audio-MPEG-Header 0x616c6261 at offset 111094.
Note: Trying to resync...
Note: Hit end of (available) data during resync.
100%|█████████████████████████████████████████| 376/376 [00:10<00:00, 35.37it/s]


In [6]:
indices = df_train[df_train["length_seconds"] <= 15].index

for i in tqdm(indices):
    df_train.loc[i, "length_spectrogram"] = recompute_and_save_spec("../" + df_train.loc[i, "fullfilename"], df_train.loc[i, "spectrogram"])

 11%|████▎                                   | 156/1435 [00:04<00:37, 34.10it/s]Note: Illegal Audio-MPEG-Header 0x50455441 at offset 585059.
Note: Trying to resync...
Note: Hit end of (available) data during resync.
[src/libmpg123/layer3.c:INT123_do_layer3():1801] error: dequantization failed!
 23%|█████████▎                              | 335/1435 [00:08<00:27, 39.60it/s]Note: Illegal Audio-MPEG-Header 0x63682028 at offset 214748.
Note: Trying to resync...
Note: Hit end of (available) data during resync.
 25%|██████████▏                             | 364/1435 [00:09<00:26, 40.92it/s]Note: Illegal Audio-MPEG-Header 0x50455441 at offset 214329.
Note: Trying to resync...
Note: Hit end of (available) data during resync.
 50%|███████████████████▉                    | 715/1435 [00:18<00:17, 40.17it/s][src/libmpg123/layer3.c:INT123_do_layer3():1801] error: dequantization failed!
 57%|██████████████████████▌                 | 811/1435 [00:20<00:16, 38.51it/s]Note: Illegal Audio-MPEG-Header 0x

In [7]:
df_train.to_csv("../../data/dataset_train.csv", index = False)
df_test.to_csv("../../data/dataset_test.csv", index = False)