In [None]:
import os
import numpy as np
import librosa
from google.colab import drive
import json
import math

# Monta Google Drive
drive.mount('/content/drive')
dataset_path = "/content/drive/MyDrive/Colab Notebooks/dataset"
JSON_PATH = "data.json"
SAMPLE_RATE = 22050
DURATION = 4  # in secondi
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION

def save_mfcc(dataset_path, json_path, n_mfcc=13, n_fft=2048, hop_length=512, num_segments=5, percentage=0.1):
    data = {
        "mapping": [],  # i nomi degli strumenti diventano indici del vettore
        "labels": [],   # indica gli indici corrispondenti alla mfcc
        "mfcc": []      # sono gli ingressi
    }

    samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    expected_num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)

    # Contatore per il numero di file processati
    total_files = 0

    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
        if dirpath is not dataset_path:
            dirpath_components = os.path.split(dirpath)
            semantic_label = dirpath_components[-1]
            data["mapping"].append(semantic_label)
            print("\nProcessando {}".format(semantic_label))

            # Filtra i file per il 10% del dataset
            filenames = filenames[:int(len(filenames) * percentage)]

            for f in filenames:
                file_path = os.path.join(dirpath, f)
                signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)  # carica il file audio

                # Processa ogni segmento del file audio
                for s in range(num_segments):
                    start = samples_per_segment * s
                    finish = start + samples_per_segment

                    # Verifica se il segmento è valido
                    if finish <= len(signal):
                        mfcc = librosa.feature.mfcc(y=signal[start:finish], sr=sr, n_fft=n_fft, n_mfcc=n_mfcc, hop_length=hop_length)
                        mfcc = mfcc.T  # Trasponi per avere i coefficienti come righe

                        # Salva solo se il numero di MFCC è quello atteso
                        if len(mfcc) == expected_num_mfcc_vectors_per_segment:
                            data["mfcc"].append(mfcc.tolist())
                            data["labels"].append(i - 1)  # Usa l'indice della cartella come label
                            print("{}, segment: {}".format(file_path, s))
                            total_files += 1

    # Salva i dati nel file JSON
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)

    print(f"Processati {total_files} file e salvati in {json_path}")

# Esempio di utilizzo
save_mfcc(dataset_path, JSON_PATH)



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

Processando Vocal Acoustic
/content/drive/MyDrive/Colab Notebooks/dataset/Vocal Acoustic/vocal_acoustic_000-050-025.wav, segment: 0
/content/drive/MyDrive/Colab Notebooks/dataset/Vocal Acoustic/vocal_acoustic_000-050-025.wav, segment: 1
/content/drive/MyDrive/Colab Notebooks/dataset/Vocal Acoustic/vocal_acoustic_000-050-025.wav, segment: 2
/content/drive/MyDrive/Colab Notebooks/dataset/Vocal Acoustic/vocal_acoustic_000-050-025.wav, segment: 3
/content/drive/MyDrive/Colab Notebooks/dataset/Vocal Acoustic/vocal_acoustic_000-050-025.wav, segment: 4
/content/drive/MyDrive/Colab Notebooks/dataset/Vocal Acoustic/vocal_acoustic_000-065-100.wav, segment: 0
/content/drive/MyDrive/Colab Notebooks/dataset/Vocal Acoustic/vocal_acoustic_000-065-100.wav, segment: 1
/content/drive/MyDrive/Colab Notebooks/dataset/Vocal Acoustic/vocal_acoustic_000-065-100.wav, segment: 2
/co