# Generazione dataset CNN
Il codice seguente permette la generazione di un dataset a attraverso un'elaborazione degli audio scaricati da FMA. In particolare le operazioni eseguite ,per ogni audio scaricato, per la generazione del dataset sono:
- caricamento audio
- calcolo dello spettrogramma dell'audio tramite librosa
- generazione di un immagine 80x80 pixel che mantiene lo spettrogramma
- salvataggio dell'immagine in scala di grigi

Ogni riga del dataset generato si compone quindi di 2 elementi: l'immagine in scala di grigi e il target corrispondente che indica il genere dell'audio.

In [5]:

%matplotlib inline

import os

import IPython.display as ipd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm
import librosa
import librosa.display
import utils
import dotenv
import numpy as np
import pandas as pd
import multiprocessing.sharedctypes as sharedctypes
import os.path
import ast
import warnings
warnings.filterwarnings('ignore')

plt.rcParams['figure.figsize'] = (17, 5)

# Number of samples per 30s audio clip.
# TODO: fix dataset to be constant.
NB_AUDIO_SAMPLES = 1321967
SAMPLING_RATE = 44100

# Load the environment from the .env file.
dotenv.load_dotenv(dotenv.find_dotenv())


def load(filepath):

    filename = os.path.basename(filepath)

    if 'features' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'echonest' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'genres' in filename:
        return pd.read_csv(filepath, index_col=0)

    if 'tracks' in filename:
        tracks = pd.read_csv(filepath, index_col=0, header=[0, 1])

        COLUMNS = [('track', 'tags'), ('album', 'tags'), ('artist', 'tags'),
                   ('track', 'genres'), ('track', 'genres_all')]
        for column in COLUMNS:
            tracks[column] = tracks[column].map(ast.literal_eval)

        COLUMNS = [('track', 'date_created'), ('track', 'date_recorded'),
                   ('album', 'date_created'), ('album', 'date_released'),
                   ('artist', 'date_created'), ('artist', 'active_year_begin'),
                   ('artist', 'active_year_end')]
        for column in COLUMNS:
            tracks[column] = pd.to_datetime(tracks[column])

        SUBSETS = ('small', 'medium', 'large')
        try:
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                    'category', categories=SUBSETS, ordered=True)
        except (ValueError, TypeError):
            # the categories and ordered arguments were removed in pandas 0.25
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                     pd.CategoricalDtype(categories=SUBSETS, ordered=True))

        COLUMNS = [('track', 'genre_top'), ('track', 'license'),
                   ('album', 'type'), ('album', 'information'),
                   ('artist', 'bio')]
        for column in COLUMNS:
            tracks[column] = tracks[column].astype('category')

        return tracks


def get_audio_path(audio_dir, track_id):
    """
    Return the path to the mp3 given the directory where the audio is stored
    and the track ID.

    Examples
    --------
    >>> import utils
    >>> AUDIO_DIR = os.environ.get('AUDIO_DIR')
    >>> utils.get_audio_path(AUDIO_DIR, 2)
    '../data/fma_small/000/000002.mp3'

    """
    tid_str = '{:06d}'.format(track_id)
    return os.path.join(audio_dir, tid_str[:3], tid_str + '.mp3')




In [6]:

# Load metadata and features.
tracks = utils.load('data/fma_metadata/tracks.csv')
genres = utils.load('data/fma_metadata/genres.csv')
features = utils.load('data/fma_metadata/features.csv')
echonest = utils.load('data/fma_metadata/echonest.csv')

small = tracks['set', 'subset'] <= 'small'
y_train = tracks.loc[small, ('track', 'genre_top')]


In [8]:
from PIL import Image
from numpy import array
from sklearn.model_selection import train_test_split
import matplotlib

matplotlib.use('Agg')

AUDIO_DIR = "data\\fma_small"
AUDIO_SAVE = "out.png"

immagini = []
etichette = []

all_id_audio_train = y_train.index.values.tolist()
i=0

for id_audio in all_id_audio_train[0:1]:
    fig, ax = plt.subplots()
    fig.set_size_inches(1, 1)

    filename = utils.get_audio_path(AUDIO_DIR, id_audio)
    y, sr = librosa.load(filename, sr=None, mono=True)

    librosa.feature.melspectrogram(y=y, sr=sr)
    D = np.abs(librosa.stft(y))**2
    S = librosa.feature.melspectrogram(S=D, sr=sr)
    S_dB = librosa.power_to_db(S, ref=np.max)


    img = librosa.display.specshow(S_dB, sr=sr,fmax=8000, ax=ax,cmap='gray')
    plt.savefig(AUDIO_SAVE,dpi=80,bbox_inches='tight', pad_inches=0)

    imm = Image.open(AUDIO_SAVE)
    ar = np.array(imm)

    immagini.append(ar)
    etichette.append(y_train[id_audio])

    i=i+1
    np.savez("Cnn_data\\Cnn_data.npz", immagini=immagini, etichette=etichette)




In [2]:
from sklearn.model_selection import train_test_split
import numpy as np

# Carica il training set dal file npz
training_set = np.load("Cnn_data\\Cnn_data.npz")
immagini = training_set["immagini"]
etichette = training_set["etichette"]

print(len(etichette))



1368
