# Descarga y preparación del corpus de datos

El objetivo de este primer notebook es realizar la carga del dataset utilizado, para su posterior estudio y entrenamiento mediante modelos de Machine Learning.

## Importaciones

In [1]:
import librosa
import wave

import numpy as np
import datasets

from tqdm.auto import tqdm
import os

## Acceso al conjunto de datos de HuggingFace

In [2]:
ccmusic_corpus = datasets.load_dataset("ccmusic-database/music_genre", name="default",trust_remote_code=True)

In [3]:
print(ccmusic_corpus)

DatasetDict({
    train: Dataset({
        features: ['audio', 'mel', 'fst_level_label', 'sec_level_label', 'thr_level_label'],
        num_rows: 1370
    })
    validation: Dataset({
        features: ['audio', 'mel', 'fst_level_label', 'sec_level_label', 'thr_level_label'],
        num_rows: 171
    })
    test: Dataset({
        features: ['audio', 'mel', 'fst_level_label', 'sec_level_label', 'thr_level_label'],
        num_rows: 172
    })
})


In [4]:
print(ccmusic_corpus['train'][0])

{'audio': {'path': 'C:\\Users\\Usuario\\.cache\\huggingface\\datasets\\downloads\\extracted\\9978c1aa27e41c465d1a0a120f523eae02b367b680fff33f401cee81e10d28c4\\audio\\2_non-classic\\11_rock\\21_Soft Rock\\6573efb8158239c2015abeaea6bb8f65.mp3', 'array': array([0., 0., 0., ..., 0., 0., 0.]), 'sampling_rate': 22050}, 'mel': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=496x369 at 0x1BD0F31CFE0>, 'fst_level_label': 1, 'sec_level_label': 8, 'thr_level_label': 14}


## Generación de la estructura de directorios del corpus

* Funciones auxiliares para la generación de características:

In [5]:
def amplitude_envelope(signal,frame_size=1024,hop_length=512):
    F=frame_size
    H=hop_length
    N=signal.shape[0]
    return np.array([max(signal[k:k+F]) for k in range(0, N, H)])



def calculate_ber(signal, split_freq, sample_rate, frame_size=1024, hop_length=512):

    spec = librosa.stft(signal, n_fft=frame_size, hop_length=hop_length)
    range_of_freq = sample_rate / 2
    change_per_bin = range_of_freq / spec.shape[0]
    split_freq_bin = int(np.floor(split_freq / change_per_bin))

    modified_spec = np.abs(spec).T
    res = []
    for sub_arr in modified_spec:
        low_freq_density = sum(i ** 2 for i in sub_arr[:split_freq_bin])
        high_freq_density = sum(i ** 2 for i in sub_arr[split_freq_bin:])
        high_freq_density = high_freq_density if high_freq_density > 0 else 1e-10
        ber_val = low_freq_density / high_freq_density
        res.append(ber_val)
    return np.array(res)

* Función para la generación de las características:

In [6]:
def store_features(audio_data, audio_file, label_column, csv_file):

    # Extraer la información del audio (array numpy, sample rate, label)
    audio_array = audio_data["audio"]["array"]
    audio_sr = audio_data["audio"]["sampling_rate"]
    audio_label = audio_data[label_column]

    # Extraer las características del audio 

    # Dominio temporal
    envelope = amplitude_envelope(audio_array)
    rms = librosa.feature.rms(y=audio_array)
    zcr = librosa.feature.zero_crossing_rate(audio_array)

    # Dominio frecuencial
    # split_freq a 500 para separar graves de agudos
    ber = calculate_ber(audio_array,500, audio_sr)
    spec_cent = librosa.feature.spectral_centroid(y=audio_array, sr=audio_sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=audio_array, sr=audio_sr)

    chroma_stft = librosa.feature.chroma_stft(y=audio_array, sr=audio_sr)
    rolloff = librosa.feature.spectral_rolloff(y=audio_array, sr=audio_sr)
    mfcc = librosa.feature.mfcc(y=audio_array, sr=audio_sr, n_mfcc=13)

    # Generar fila para introducir en el CSV
    row = f"{audio_file},{audio_label},{np.mean(envelope)},{np.mean(rms)},{np.mean(zcr)},{np.mean(ber)},{np.mean(spec_cent)},{np.mean(spec_bw)},{np.mean(chroma_stft)},{np.mean(rolloff)}"
    for e in mfcc:
        row += f",{np.mean(e)}"

    # Escribir en el CSV
    with open(csv_file, "a") as f:
        f.write(row + "\n")

* Función para almacenar las anotaciones:

In [7]:
def store_annotation(audio_data, label_column, label_mapper, audio_file, annotations_file):
    label_id = audio_data[label_column]
    label = label_mapper[label_id]
    with open(annotations_file, "a") as f:
        f.write(f"{audio_file},{label_id},{label}\n")

* Función para la generación del fichero wav:

In [8]:
def store_audio_file(audio_data, audio_file):

    audio_array = audio_data["audio"]["array"]

    ww_obj=wave.open(audio_file,'w')
    ww_obj.setnchannels(1)
    ww_obj.setsampwidth(2)
    ww_obj.setframerate(22050)

    signal=np.int16(audio_array * 32767)
    ww_obj.writeframesraw(signal)

* Función para la generación del directorio de ficheros del corpus:

In [9]:
def generate_corpus_files(corpus_data, label_column, label_mapper, corpus_folder):

    # Crear directorio principal
    os.makedirs(corpus_folder, exist_ok=True)

    # Almacenar ficheros de cada partición
    for partition in ["train", "validation", "test"]:

        # Crear subdirectorios para almacenar los audios
        os.makedirs(f"{corpus_folder}/{partition}/audios", exist_ok=True)

        # Crear fichero de anotaciones
        with open(f"{corpus_folder}/{partition}/annotations.csv", "w") as f:
            f.write("audio_file,label_id,label_name\n")

        # Crear fichero de características
        with open(f"{corpus_folder}/{partition}/features.csv", "w") as f:
            f.write(
                "audio_file,label,mean_envelope,mean_rms,mean_zcr,"
                "mean_ber,mean_spec_cent,mean_spec_bw,mean_chroma_stft,mean_rolloff,"
                "mean_mfcc1,mean_mfcc2,mean_mfcc3,mean_mfcc4,mean_mfcc5,mean_mfcc6,mean_mfcc7,"
                "mean_mfcc8,mean_mfcc9,mean_mfcc10,mean_mfcc11,mean_mfcc12,mean_mfcc13\n"
            )
        
        features_file = f"{corpus_folder}/{partition}/features.csv"
        annotations_file = f"{corpus_folder}/{partition}/annotations.csv"
        audios_dir = f"{corpus_folder}/{partition}/audios"

        for i, audio_data in enumerate(corpus_data[partition]):
            
            audio_file = f"{audios_dir}/audio_{partition}_{i}.wav"

            # Guardar características en fichero CSV
            store_features(audio_data, audio_file, label_column, features_file)

            # Guardar anotación en fichero CSV
            store_annotation(audio_data, label_column, label_mapper, audio_file, annotations_file)
            
            # Guardar el audio en formato WAV
            store_audio_file(audio_data, audio_file)

In [10]:
label_mapper = {
        0: "Classic",
        2: "Symphony",
        3: "Opera",
        4: "Solo",
        5: "Chamber",
        1: "Non_classic",
        6: "Pop",
        11: "Pop_vocal_ballad",
        12: "Adult_contemporary",
        13: "Teen_pop",
        7: "Dance_and_house",
        14: "Contemporary_dance_pop",
        15: "Dance_pop",
        8: "Indie",
        16: "Classic_indie_pop",
        17: "Chamber_cabaret_and_art_pop",
        9: "Soul_or_r_and_b",
        10: "Rock",
        18: "Adult_alternative_rock",
        19: "Uplifting_anthemic_rock",
        20: "Soft_rock",
        21: "Acoustic_pop"
}

* Carga de datos:

In [11]:
generate_corpus_files(ccmusic_corpus, "fst_level_label", label_mapper, "ccmusic")

In [12]:
generate_corpus_files(ccmusic_corpus, "sec_level_label", label_mapper, "ccmusic2")

KeyboardInterrupt: 