In [1]:
#Imports
import sys
import librosa
import numpy as np
import json

In [36]:
# Equalization without any standardization
# def normalize(data, A=0, B=100):
#     data_min = np.min(data)
#     data_max = np.max(data)
#     return A + ((data - data_min) * (B - A)) / (data_max - data_min)

# def generate_visualization_data(audio_path, output_path):
#     y, sr = librosa.load(audio_path, sr=None, mono=True)
#     n_fft = 2048
#     hop_length = 512

#     peak_frequencies = [63, 160, 400, 1000, 2500, 6250, 16000]

#     spectrogram = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
#     freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

#     band_data = []
#     for peak_freq in peak_frequencies:
#         index = np.argmin(np.abs(freqs - peak_freq))
#         band_data.append(spectrogram[index])

#     band_data = np.stack(band_data)
#     band_data = normalize(band_data)  # Normalize the band data
    
#     np.save(output_path, band_data)
#     return band_data


# Uncomment if running this code as an independent script ( see usage below )
# if __name__ == '__main__':
#     if len(sys.argv) != 3:
#         print('Usage: python generate_visualization_data.py <input_audio_path> <output_data_path>')
#     else:
#         audio_path = sys.argv[1]
#         output_path = sys.argv[2]
#         generate_visualization_data(audio_path, output_path)


In [6]:
# Standardization using LUFS (Loudness Units relative to Full Scale)
from pyloudnorm import Meter

def generate_visualization_data_to_npy(audio_path, output_path, reference_lufs=-14):
    y, sr = librosa.load(audio_path, sr=None, mono=True)
    
    # Calculate LUFS
    meter = Meter(sr)  # create meter
    lufs = meter.integrated_loudness(y)

    # Apply gain adjustment
    gain_adjustment = 10 ** ((reference_lufs - lufs) / 20)
    y = y * gain_adjustment

    n_fft = 2048
    hop_length = 512

    peak_frequencies = [63, 160, 400, 1000, 2500, 6250]

    spectrogram = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
    freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

    band_data = []
    for peak_freq in peak_frequencies:
        index = np.argmin(np.abs(freqs - peak_freq))
        band_data.append(spectrogram[index])

    # Normalize to 0-100 range
    max_val = np.max(band_data)
    band_data_normalized = (band_data / max_val) * 100

    np.save(output_path, band_data_normalized)
    return band_data_normalized


In [7]:
song_filepath = "./songs/droeloe_panorama.wav"
output_filepath = "./data/droeloe_panorama_data_lufs_standardization.json"
song_data = generate_visualization_data_to_npy(song_filepath, output_filepath)

In [None]:
def generate_visualization_data_to_json(audio_path, output_path, reference_lufs=-14):
    y, sr = librosa.load(audio_path, sr=None, mono=True)
    
    # Calculate LUFS
    meter = Meter(sr)  # create meter
    lufs = meter.integrated_loudness(y)

    # Apply gain adjustment
    gain_adjustment = 10 ** ((reference_lufs - lufs) / 20)
    y = y * gain_adjustment

    n_fft = 2048
    hop_length = 512

    peak_frequencies = [63, 160, 400, 1000, 2500, 6250, 16000]

    spectrogram = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
    freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

    band_data = []
    for peak_freq in peak_frequencies:
        index = np.argmin(np.abs(freqs - peak_freq))
        band_data.append(spectrogram[index].tolist())  # Converting numpy array to list

    # Normalize to 0-100 range
    max_val = np.max(band_data)
    band_data_normalized = [[val / max_val * 100 for val in band] for band in band_data]

    # Save as JSON
    with open(output_path, 'w') as json_file:
        json.dump(band_data_normalized, json_file)
        
    return band_data_normalized


In [8]:
print(song_data)
print(song_data.shape)

[[0.00018872899083531767, 0.00011352580686282746, 5.660251511451905e-05, 4.223090457395041e-05, 2.291946898859057e-05, 5.7433030570114464e-05, 4.9915978489281366e-05, 3.1934896383600345e-05, 2.798505642781966e-05, 8.80035821852921e-06, 5.2407826818882615e-05, 4.5447476454610364e-05, 3.7962788134128395e-05, 3.2385169002904684e-05, 1.3170202859919835e-05, 1.3918645845998275e-05, 2.8799020226741468e-05, 7.42672834944563e-05, 0.00017070473828311784, 4.526620152334187e-05, 0.0003066356025661178, 0.0004128214508620949, 0.0005918305759990948, 0.0009719799368858733, 0.0006779701468453435, 0.001430070015209858, 0.0017884526680673955, 0.002174557104163167, 0.0017770405618096506, 0.00243519846774094, 0.0018121586529468246, 0.002093219499541623, 0.002585202293663443, 0.0023752331974762064, 0.0066864791806506415, 0.012540084402108171, 0.028958753949669364, 0.022643647022823725, 0.07861291229443138, 0.15164704868415685, 0.17673288284305622, 0.1551495271264184, 0.10413044543200806, 0.0687149768926481

AttributeError: 'list' object has no attribute 'shape'

In [9]:
print(song_data[:, 10000])
print(song_data.min())
print(song_data.max())

TypeError: list indices must be integers or slices, not tuple