In [None]:
import glob
import json
import os
import sys
from pathlib import Path

import librosa
import numpy as np
import openl3
import soundfile as sf
import tensorflow as tf

In [None]:

# Filepath to get audio files from 
ROOT = Path("../data/fma_small")
files = sorted(glob.glob(os.path.join(ROOT, "**", "*.mp3"), recursive=True))


# Filepath to write embeddings to (change if new embedding folder desired)
out = Path("embeds_small").mkdir(exist_ok=True) or Path("embeds_small")
files = sorted(ROOT.rglob("*.mp3"))
n = len(files)
indexf = out / "index.jsonl"



In [None]:


embeds, labels = [], []
processed_count = 0


gpus = tf.config.list_physical_devices('GPU')
for gpu in gpus:
    # Let TF grab memory lazily, expanding on demand
    tf.config.experimental.set_memory_growth(gpu, True)


# Count existing files to show progress correctly
existing_files = sum(1 for file in files if (out / f"{file.stem}.npy").exists())
print(f"Found {existing_files} already processed files, {len(files) - existing_files} remaining")

for i, file in enumerate(files):

    stem = file.stem # 12345 from 12345.mp3
    npy_path = out / f"{stem}.npy" # Create embedding filename

    # File exists
    if npy_path.exists(): 
        print(f"Skipping {file} - already processed [{i+1}/{len(files)}]")
        continue
    
    try:
        audio, sr = librosa.load(file, sr=None, mono=True)
    except Exception as e:
        print(f"Error loading {file}: {e}")
        continue

    # Preprocessing:
    audio_48 = librosa.resample(audio, orig_sr=sr, target_sr=48000, res_type="soxr_vhq") # Resample to 48 kHz 
    audio_mono = librosa.to_mono(audio_48) # Ensure mono channel   
    peak = np.max(np.abs(audio_mono))  # Find peak amplitude
    audio_norm = 0.891 * audio_mono / peak # Normalisation

    print(f"Processing {file} ({i+1}/{len(files)})")
    i += 1
   
    # Embedding extraction:
    embedding, ts = openl3.get_audio_embedding(
        audio_48, 48000, 
        content_type='music', 
        input_repr='mel256', 
        embedding_size=512, # Change to 6144 for larger embeddings
        hop_size=0.1, 
        center=True, 
        batch_size=1
    )

    np.save(npy_path, embedding.astype(np.float32))      # atomic write

    # log minimal metadata for later reconstruction
    with indexf.open("a") as f:
        json.dump({"track_id": int(stem),
                    "n_frames": len(embedding),
                    "path": str(npy_path)}, f)
        f.write("\n")

    print(f"[{i}/{n}] {stem}: wrote {embedding.shape}")

    embeds.append(embedding)
    labels.append(file)
    


Found 7997 already processed files, 3 remaining
Skipping ..\data\fma_small\000\000002.mp3 - already processed [1/8000]
Skipping ..\data\fma_small\000\000005.mp3 - already processed [2/8000]
Skipping ..\data\fma_small\000\000010.mp3 - already processed [3/8000]
Skipping ..\data\fma_small\000\000140.mp3 - already processed [4/8000]
Skipping ..\data\fma_small\000\000141.mp3 - already processed [5/8000]
Skipping ..\data\fma_small\000\000148.mp3 - already processed [6/8000]
Skipping ..\data\fma_small\000\000182.mp3 - already processed [7/8000]
Skipping ..\data\fma_small\000\000190.mp3 - already processed [8/8000]
Skipping ..\data\fma_small\000\000193.mp3 - already processed [9/8000]
Skipping ..\data\fma_small\000\000194.mp3 - already processed [10/8000]
Skipping ..\data\fma_small\000\000197.mp3 - already processed [11/8000]
Skipping ..\data\fma_small\000\000200.mp3 - already processed [12/8000]
Skipping ..\data\fma_small\000\000203.mp3 - already processed [13/8000]
Skipping ..\data\fma_smal

  audio, sr = librosa.load(file, sr=None, mono=True)


Skipping ..\data\fma_small\108\108808.mp3 - already processed [4875/8000]
Skipping ..\data\fma_small\108\108809.mp3 - already processed [4876/8000]
Skipping ..\data\fma_small\108\108812.mp3 - already processed [4877/8000]
Skipping ..\data\fma_small\108\108836.mp3 - already processed [4878/8000]
Skipping ..\data\fma_small\108\108837.mp3 - already processed [4879/8000]
Skipping ..\data\fma_small\108\108838.mp3 - already processed [4880/8000]
Skipping ..\data\fma_small\108\108839.mp3 - already processed [4881/8000]
Skipping ..\data\fma_small\108\108840.mp3 - already processed [4882/8000]
Skipping ..\data\fma_small\108\108841.mp3 - already processed [4883/8000]
Skipping ..\data\fma_small\108\108842.mp3 - already processed [4884/8000]
Skipping ..\data\fma_small\108\108843.mp3 - already processed [4885/8000]
Skipping ..\data\fma_small\108\108845.mp3 - already processed [4886/8000]
Skipping ..\data\fma_small\108\108846.mp3 - already processed [4887/8000]
Skipping ..\data\fma_small\108\108847.

ValueError: need at least one array to concatenate