<a href="https://colab.research.google.com/github/michelemiko1/genetic_music_CNN/blob/main/4__music_preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# import libraries
import os
import librosa
import numpy as np
import librosa.display
import matplotlib.pyplot as plt
import random
import json

In [None]:
# define constant values
DATASET_PATH = '/content/drive/MyDrive/Colab Notebooks/Datasets/genres'
SAMPLE_RATE = 22050
DURATION_SECONDS = 30
SAVING_PATH = '/content/drive/MyDrive/Colab Notebooks/preprocessed_data_segments.json'  
NUM_SEGMENTS = 9
SAMPLES_PER_SEGMENTS = 3*SAMPLE_RATE

In [None]:
# verify max and min duration of the audio files

def verify_dataset_legth(dataset_path):
    
    samples_for_each_song = []

    print("\nGet the minimum and maximum length of the files\n\nprocessing:", end=" ")

    # access to all the folders and subfolders
    for dirpath, dirnames, filenames in os.walk(dataset_path):

        # not consider the first dirpath that is only 'genre' but go ahead (to genres/blues)
        if dirpath is not dataset_path:

            dirpath_components = os.path.split(dirpath)
            folder_name = dirpath_components[-1]

            print(f"{folder_name}", end=" ")

            # consider each song in the current folder
            for file in filenames:

                # find the complete path of a specific song ( es: genres\rock\rock.00092.wav )
                file_path = os.path.join(dirpath, file)

                # load the song to verify the length of each song
                signal, _ = librosa.load(file_path, sr=SAMPLE_RATE)
                samples_for_each_song.append(len(signal))

    # verify how many songs in the dataset
    number_of_songs = len(samples_for_each_song)
    print(f"\n\ntotal number of analyzed songs: {number_of_songs}")

    # calculate the max and min duration
    max_duration = np.max(samples_for_each_song)
    min_duration = np.min(samples_for_each_song)

    # print informations
    print(f"casual sample duration (samples): {samples_for_each_song[34]}")
    print(f"max duration (samples): {max_duration}\n"
          f"min duration (samples): {min_duration}")

    return number_of_songs, max_duration, min_duration

In [None]:
# extract spectrogram or/and MFCCs from the songs

def data_preprocessing(dataset_path, min_duration, hop_length=512, n_fft=2048, n_mfcc=13):

    # walk through all the files, extract MFCCs, spectrogram, save labels and mapping
    data = {
        'mapping': [],
        'MFCCs': [],
        'spectrogram': [],
        'labels': []
    }

    '''
    old version were only one segment for each song was considered

    # select a slice of song to preprocess
    starting_point = int(min_duration / 3)     # start at second 10 
    ending_point = int(1.5 * starting_point)   # consider 3 seconds

    print(f"duration considered in samples: {starting_point}")
    print(f"duration considered in seconds: {(ending_point - starting_point)/ SAMPLE_RATE}")
    '''
    # counter to identify the current label
    current_label = -1

    for dirpath, dirnames, filenames in os.walk(dataset_path):

        # don't consider the root folder
        if dirpath is not dataset_path:

            # increment current label (the first, that is blues = 0)
            current_label += 1

            # save into mapping the actual names of the labels
            dirpath_components = os.path.split(dirpath)
            folder_name = dirpath_components[-1]
            data['mapping'].append(folder_name)

            for file in filenames:

                # find the complete path of a specific song ( es: genres\rock\rock.00092.wav )
                file_path = os.path.join(dirpath, file)

                # load the song and truncate
                signal, _ = librosa.load(file_path, sr=SAMPLE_RATE)
                
                # divide signal in 3 sec segmens and estract here
                for starting_point in range(0, 9*SAMPLES_PER_SEGMENTS, SAMPLES_PER_SEGMENTS):
                  segment = signal[starting_point:starting_point+SAMPLES_PER_SEGMENTS]

                  # save the label of the specific segment in numerical format
                  data['labels'].append(current_label)

                  # extract spectrogram and save it
                  # stft = librosa.core.stft(signal, hop_length=hop_length, n_fft=n_fft)
                  # spectrogram = np.abs(stft)
                  # data['spectrogram'].append(spectrogram.tolist())

                  # extract MFCCs and save it
                  MFCCs = librosa.feature.mfcc(segment, sr=SAMPLE_RATE, hop_length=hop_length, n_fft=n_fft, n_mfcc=n_mfcc)
                  data['MFCCs'].append(MFCCs.tolist())

    return data

In [None]:
# make a visual representation of spectrogram or MFCCs

def display_MFCCS_or_spectrograms(data, number_of_songs, hop_length=512, sr=SAMPLE_RATE, MFCCs=True):
    
    # MFCCs: set to False if you want the spectrogram instead of the MFCCs
    
    # select first index of each genre
    list_of_indexes = []

    for desired_label in range(10):
        for temporal_index in range(len(data['labels'])):

            # generate a random index between 0 and 999
            random_index = int(random.random() * len(data['labels']))

            # verify if the corresponding label is equal to desired_label
            if data['labels'][random_index] == desired_label:
                list_of_indexes.append(random_index)
                break


    # mapping the labels
    labels = data['mapping']

    # plot MFCCs or spectrograms 
    plt.figure(figsize=(12, 5))

    for i, index in enumerate(list_of_indexes):
        plt.subplot(2, 5, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)

        # choose between MFCCs and Spectrogram
        if MFCCs:
            data_array = np.array(data['MFCCs'][index])
            image_to_display = data_array
        else:
            data_array = np.array(data['spectrogram'][index])
            image_to_display = librosa.amplitude_to_db(data_array)

        # display spectrogram / MFCCs
        librosa.display.specshow(image_to_display, sr=SAMPLE_RATE, hop_length=hop_length)

        # extract and print the associated label
        current_label_index = data['labels'][index]
        current_label_name = labels[current_label_index]
        plt.xlabel(f"{current_label_name}\n ( sample:{index} )")

    plt.show()

In [None]:
# print some examples 

def verify_data_preprocessing(data, number_of_songs):

    # print some examples of MFCCs
    display_MFCCS_or_spectrograms(data, number_of_songs, MFCCs=True)

    # print some examples of spectrograms
    #display_MFCCS_or_spectrograms(data, number_of_songs, MFCCs=False)

    print(f"\nVerify data preprocessing:\n"
          f"- mapping: {data['mapping']}\n"
          f"- labels:  {data['labels']}")


In [None]:
# save data dictionary into a json file

def save_into_file(saving_path, data):
    with open(saving_path, 'w') as f:
        json.dump(data, f, indent=4)

In [None]:
def main():
  
    # verify the length of each file
    number_of_songs, max_duration, min_duration = verify_dataset_legth(DATASET_PATH)

    # make preprocessing and save
    data = data_preprocessing(DATASET_PATH, min_duration, n_mfcc=39)

    # verify data preprocessing
    verify_data_preprocessing(data, number_of_songs)

    # save preprocessed data into a json file
    save_into_file(SAVING_PATH, data)

In [None]:
# run the entire preprocessing

main()