In [1]:
#Imports
import sys
import librosa
import numpy as np
import json

In [36]:
# Equalization without any standardization
# def normalize(data, A=0, B=100):
#     data_min = np.min(data)
#     data_max = np.max(data)
#     return A + ((data - data_min) * (B - A)) / (data_max - data_min)

# def generate_visualization_data(audio_path, output_path):
#     y, sr = librosa.load(audio_path, sr=None, mono=True)
#     n_fft = 2048
#     hop_length = 512

#     peak_frequencies = [63, 160, 400, 1000, 2500, 6250, 16000]

#     spectrogram = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
#     freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

#     band_data = []
#     for peak_freq in peak_frequencies:
#         index = np.argmin(np.abs(freqs - peak_freq))
#         band_data.append(spectrogram[index])

#     band_data = np.stack(band_data)
#     band_data = normalize(band_data)  # Normalize the band data
    
#     np.save(output_path, band_data)
#     return band_data


# Uncomment if running this code as an independent script ( see usage below )
# if __name__ == '__main__':
#     if len(sys.argv) != 3:
#         print('Usage: python generate_visualization_data.py <input_audio_path> <output_data_path>')
#     else:
#         audio_path = sys.argv[1]
#         output_path = sys.argv[2]
#         generate_visualization_data(audio_path, output_path)


In [2]:
# Standardization using LUFS (Loudness Units relative to Full Scale)
from pyloudnorm import Meter

def generate_visualization_data_to_npy(audio_path, output_path, reference_lufs=-14):
    y, sr = librosa.load(audio_path, sr=None, mono=True)
    
    # Calculate LUFS
    meter = Meter(sr)  # create meter
    lufs = meter.integrated_loudness(y)

    # Apply gain adjustment
    gain_adjustment = 10 ** ((reference_lufs - lufs) / 20)
    y = y * gain_adjustment

    n_fft = 2048
    hop_length = 512

    peak_frequencies = [63, 160, 400, 1000, 2500, 6250]

    spectrogram = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
    freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

    band_data = []
    for peak_freq in peak_frequencies:
        index = np.argmin(np.abs(freqs - peak_freq))
        band_data.append(spectrogram[index])

    # Normalize to 0-100 range
    max_val = np.max(band_data)
    band_data_normalized = (band_data / max_val) * 100

    np.save(output_path, band_data_normalized)
    return band_data_normalized


In [7]:
song_filepath = "./songs/droeloe_panorama.wav"
output_filepath = "./data/droeloe_panorama_data_lufs_standardization.json"
song_data = generate_visualization_data_to_npy(song_filepath, output_filepath)

In [3]:
def generate_visualization_data_to_json(audio_path, output_path, reference_lufs=-14):
    y, sr = librosa.load(audio_path, sr=None, mono=True)
    
    # Calculate LUFS
    meter = Meter(sr)  # create meter
    lufs = meter.integrated_loudness(y)

    # Apply gain adjustment
    gain_adjustment = 10 ** ((reference_lufs - lufs) / 20)
    y = y * gain_adjustment

    n_fft = 2048
    hop_length = 512

    peak_frequencies = [63, 160, 400, 1000, 2500, 6250, 16000]

    spectrogram = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
    freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)

    band_data = []
    for peak_freq in peak_frequencies:
        index = np.argmin(np.abs(freqs - peak_freq))
        band_data.append(spectrogram[index].tolist())  # Converting numpy array to list

    # Normalize to 0-100 range
    max_val = np.max(band_data)
    band_data_normalized = [[val / max_val * 100 for val in band] for band in band_data]

    # Save as JSON
    with open(output_path, 'w') as json_file:
        json.dump(band_data_normalized, json_file)
        
    return band_data_normalized


In [15]:
song_filepath = "./songs/droeloe_panorama.wav"
output_filepath = "./data/droeloe_panorama_data_lufs_standardization.json"
song_data_json = generate_visualization_data_to_json(song_filepath, output_filepath)

In [19]:
song_filepath = "./songs/droeloe_sunburn.wav"
output_filepath = "./data/droeloe_sunburn_data_lufs_standardization.json"
song_data_json = generate_visualization_data_to_json(song_filepath, output_filepath)

In [4]:
song_filepath = "./songs/DROELOE_Statues.wav"
output_filepath = "./data/DROELOE_Statues_data_lufs_standardization.json"
song_data_json = generate_visualization_data_to_json(song_filepath, output_filepath)

In [5]:
song_filepath = "./songs/MELVV_Blank.wav"
output_filepath = "./data/MELVV_Blank_data_lufs_standardization.json"
song_data_json = generate_visualization_data_to_json(song_filepath, output_filepath)

In [8]:
print(song_data)
print(song_data.shape)

[[1.8872900e-04 1.1352581e-04 5.6602516e-05 ... 6.6412557e-03
  1.1278910e-04 9.3474788e-05]
 [1.2876425e-04 6.8364810e-05 1.6411139e-05 ... 7.4288307e-04
  1.7967897e-04 2.0184548e-04]
 [2.4217181e-05 2.9594767e-05 5.1025705e-05 ... 2.9056570e-03
  4.0422023e-05 3.9710219e-05]
 [7.1141917e-06 4.8104328e-05 6.9273912e-05 ... 7.8737451e-04
  6.2999206e-05 5.9721027e-05]
 [4.5149056e-05 9.8356897e-05 4.0167382e-05 ... 1.1009362e-04
  8.5383572e-06 5.0994954e-06]
 [3.7977522e-06 2.4378067e-05 3.9630060e-05 ... 2.7319074e-06
  2.4783772e-05 1.0175625e-05]]
(6, 19785)


In [9]:
print(song_data[:, 10000])
print(song_data.min())
print(song_data.max())

[2.393634   3.4370944  1.9616172  0.398194   0.02805529 0.05049457]
1.056107e-06
100.0
