## Preprocessing the Free Music Archive (FMA) Dataset

**Note**: Most of the preprocessing segments of this notebook require local installations of the FMA dataset. If you do not have these files installed, please disregard all the annotated code blocks that refer to this and load the .npz files as the data source.

The purpose of this notebook is to preprocess and clean the FMA dataset that will be used in `model_training.ipynb`.

All code and content contained in this notebook is the original work of Tobey Brizuela and Michael Umeh. This intellectual property is intended solely for educational and non-commercial use as part of the final project for Neural Nets and Deep Learning (CSC413) at the University of Toronto.

This material may not be reproduced, distributed, transmitted, displayed, published, or broadcast without the prior written permission of Tobey Brizuela and Michael Umeh. Unauthorized use or reproduction of this content may constitute a violation of copyright law.

For inquiries regarding the use of this material, please contact:
- Tobey Brizuela: tobey.brizuela@mail.utoronto.ca
- Michael Umeh: michael.umeh@mail.utoronto.ca

April 14, 2024

**Imports:**

In [1]:
# Importing all required methods and libraries
# --------------------------------------------

# Note: In order to run this project using CUDA (which is highly recommended!), you must have PyTorch installed with CUDA.
%matplotlib inline
import tensorflow as tf
import torch
import torch.nn as nn
import torchvision as tv
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import Dataset, DataLoader, TensorDataset, random_split
import itertools

from torch.optim import Adam
import os
import IPython.display as ipd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
import myutils
from sklearn.model_selection import train_test_split
import librosa
from pathlib import Path

# pip install opencv-python
import cv2 as cv

# UNUSED imports:
# ---------------
# from tensorflow.keras import layers, models, optimizers
# from tensorflow.keras.layers import Conv2D, Input
# from tensorflow.keras.models import Model
# from torchvision.models import vgg19
# from tensorflow.keras.applications import VGG19
# from tensorflow.keras.applications import ResNet50
# from tensorflow.keras.preprocessing.image import ImageDataGenerator
# from tensorflow.keras import callbacks
# from tensorflow.keras import regularizers
# from sklearn.preprocessing import OneHotEncoder
# from torchvision.models.resnet import Bottleneck
# from matplotlib import cm
# import pylab
# from PIL import Image
# from matplotlib.pyplot import imshow
# from sklearn.utils.class_weight import compute_class_weight

**Constants:**

In [2]:
# Constant definitions
SPEC_HEIGHT = 216
SPEC_WIDTH = 216
SPEC_SIZE = 216
NUM_GENRES = 16

# UNUSED? Directory where mp3 are stored:
# -------------------------------
# AUDIO_DIR = os.environ.get('AUDIO_DIR')

# Raw song data:
# --------------
# get tracks for small data set
# d_size = 'small'
# get tracks for medium data set
d_size = 'medium'
# set the directory path to the correct data set
directory_path = 'data/fma_' + d_size

## Initial Data Construction and Clean-up

Load song metadata

In [None]:
# ------------------------------------------------------------------
# NOTE: If you are going to run this preprocessing data construction
#       you must have the fma_metadata files installed locally in
#       the data folder directory!!!
# ------------------------------------------------------------------

tracks = myutils.load('data/fma_metadata/tracks.csv')
genres = myutils.load('data/fma_metadata/genres.csv')
features = myutils.load('data/fma_metadata/features.csv')
# UNUSED data
# echonest = myutils.load('data/fma_metadata/echonest.csv')

Cleaning up data

In [None]:
# ------------------------------------------------------------------
# NOTE: If you are going to run this preprocessing data construction
#       you must have the fma_small and/or fma_medium data installed
#       locally in the data folder directory!!!
# ------------------------------------------------------------------

selected_tracks = tracks[tracks['set', 'subset'] <= d_size]

# select top genre column
hottest_genres = selected_tracks["track"]["genre_top"]
hottest_genres = hottest_genres.to_frame()
hottest_genres = hottest_genres.dropna()

# get indices
ids = hottest_genres.index.values.reshape(-1, 1)
# note: track ids for tracks that ONLY have a valid top_genre

# Extract categories from categorical column
categories = hottest_genres['genre_top'].cat.categories

# Convert categories to a dictionary mapping category values to integer indices
categories_dict = {category: i for i, category in enumerate(categories)}

# convert to integer codes
hottest_genres['genre_top'] = hottest_genres['genre_top'].cat.codes

# convert to array
hottest_genres = np.array(hottest_genres)

# create labels data 
labels = np.zeros((hottest_genres.shape[0], 16))
labels[np.arange(hottest_genres.shape[0]), hottest_genres.flatten()] = 1

# flattening ids to single list, to make easier to work with
flattened_ids = [item for sublist in ids for item in sublist]

# filtering the ids out from features
selected_features = features.loc[flattened_ids]
features_data = np.array(selected_features)

Remove the tracks that have broken audio data from features and labels

In [None]:
# ------------------------------------------------------------------
# NOTE: If you are going to run this preprocessing data construction
#       you must have the fma_small and/or fma_medium data installed
#       locally in the data folder directory!!!
# ------------------------------------------------------------------

# Hard-coded found broken audio tracks for fma_medium dataset...
broken_audio_tracks = [316, 977, 6996, 7709, 10675, 13146, 15626, 15627, 15628, 15634, 15836, 16305, 16643, 16959, 20621, 20780, 21988, 23620]

# create the mask for features
keep_features_mask = np.ones(features_data.shape[0], dtype=bool)
keep_features_mask[broken_audio_tracks] = False

# create the masks for labels
keep_labels_mask = np.ones(labels.shape[0], dtype=bool)
keep_labels_mask[broken_audio_tracks] = False

# remove broken tracks
filtered_features_data = np.delete(features_data, np.where(~keep_features_mask), axis=0)
filtered_labels_data = np.delete(labels, np.where(~keep_labels_mask), axis=0)

# Prints to verify filters successful
print("Original features shape:", features_data.shape)
print("Filtered features shape:", filtered_features_data.shape)

print("Original labels shape:", labels.shape)
print("Filtered labels shape:", filtered_labels_data.shape)

Audio-to-Spectrogram Function

In [None]:
def spectrogrammer(mp3_path, alpha):

    print(mp3_path)
    try:
        # Load the audio file
        y, sr = librosa.load(mp3_path, sr=22050)
    except Exception as e:
        # return an empty array
        return []

    # pre-emphasis filter to improve noise to signal ratio    
    y = librosa.effects.preemphasis(y, coef=alpha)

    # Compute the spectrogram
    D = librosa.stft(y)

    D_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)

    D_db = cv.resize(D_db, dsize=(SPEC_SIZE, SPEC_SIZE), interpolation=cv.INTER_CUBIC)

    return D_db

Construct spectrogram data

In [None]:
# ------------------------------------------------------------------
# NOTE: If you are going to run this preprocessing data construction
#       you must have the fma_small and/or fma_medium data installed
#       locally in the data folder directory!!!
# ------------------------------------------------------------------

alpha = 0.97
image_data = []


def build_a_bear(directory_path):
    bad_indices = []
    bad_index = 0
    directory = Path(directory_path)  # Use Path object for directory
    for folder in sorted(directory.iterdir()):  # Iterate through folders in sorted order (id order)
        if folder.is_dir():
            for file_path in sorted(folder.iterdir()):  # Iterate through files in sorted order (id order)
                spec_array = spectrogrammer(file_path, alpha)
                if np.any(spec_array):
                    image_data.append(spec_array)
                else:
                    bad_indices.append(bad_index)
                bad_index += 1
    print(bad_indices)

build_a_bear(directory_path)

# cast the spectrograms into an np.array
image_data = np.array(image_data)

print(image_data.shape)

Normalize Spectrogram Values Function

In [None]:
# ------------------------------------------------------------------
# NOTE: If you are going to run this preprocessing data construction
#       you must have the fma_small and/or fma_medium data installed
#       locally in the data folder directory!!!
# ------------------------------------------------------------------

# Function to help normalize a spectrogram to range [0,1]
def normalize_spectrogram(spectrogram):
    # Rescale values to range [0, 1]
    scaled_spectrogram = (spectrogram + 80) / 80.0
    return scaled_spectrogram

UNUSED: Storing un-normalized spectrogram data to file

In [None]:
# # export the spectrograms array to its own file for safe keeping!

# # Specify the directory path for saving
# data_directory = 'data'
# npz_file_name = 'fma_spectrograms.npz'
# npz_file_path = os.path.join(data_directory, npz_file_name)

# # Create the directory if it does not exist
# os.makedirs(data_directory, exist_ok=True)

# np.savez(npz_file_path, *image_data)

# print(f"Array saved successfully at: {npz_file_path}")

UNUSED: Load un-normalized spectrogram data

In [None]:
# # Specify the directory path
# data_directory = 'data'
# npz_file_name = 'fma_spectrograms.npz'
# npz_file_path = os.path.join(data_directory, npz_file_name)

# # load the .npz file for use when we want to return to testing!
# image_data_load = np.load(npz_file_path)
# spectrograms = list(image_data_load.values())
# image_data = np.array(spectrograms)

Normalize the spectrograms

In [None]:
# ------------------------------------------------------------------
# NOTE: If you are going to run this preprocessing data construction
#       you must have the fma_small and/or fma_medium data installed
#       locally in the data folder directory!!!
# ------------------------------------------------------------------

normalized_spectrograms = [normalize_spectrogram(spec) for spec in image_data]
image_data_new = np.array(normalized_spectrograms)

## Exporting Data

Export the **features and labels** arrays to their own files for safe keeping

In [None]:
# ------------------------------------------------------------------
# NOTE: If you are going to run this preprocessing data construction
#       you must have the fma_small and/or fma_medium data installed
#       locally in the data folder directory!!!
# ------------------------------------------------------------------

# Specify the directory path for saving
data_directory = 'data'

features_file_name = 'fma_features.npz'
features_file_path = os.path.join(data_directory, features_file_name)

labels_file_name = 'fma_labels.npz'
labels_file_path = os.path.join(data_directory, labels_file_name)

# Create the directory if it does not exist
os.makedirs(data_directory, exist_ok=True)

np.savez(features_file_path, *filtered_features_data)

print(f"Features saved successfully at: {features_file_path}")

np.savez(labels_file_path, *filtered_labels_data)

print(f"Labels saved successfully at: {labels_file_path}")

Export the **normalized spectrograms** to file for safe keeping

In [None]:
# ------------------------------------------------------------------
# NOTE: If you are going to run this preprocessing data construction
#       you must have the fma_small and/or fma_medium data installed
#       locally in the data folder directory!!!
# ------------------------------------------------------------------

data_directory = 'data'
npz_file_name = 'fma_norm_spectrograms.npz'
npz_file_path = os.path.join(data_directory, npz_file_name)

# Create the directory if it does not exist
os.makedirs(data_directory, exist_ok=True)

np.savez(npz_file_path, *image_data_new)

print(f"Array saved successfully at: {npz_file_path}")