In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import kapre
import keras
import os
import soundfile as sf
import time
import progressbar
import gc
import h5py

Using TensorFlow backend.


In [3]:
# Load metadata to extract labels
metadata = pd.read_csv('/data/musicnet/metadata.csv')

In [4]:
metadata.head()

Unnamed: 0,id,composer,composition,movement,ensemble,source,transcriber,catalog_name,seconds
0,1727,Schubert,Piano Quintet in A major,2. Andante,Piano Quintet,European Archive,http://tirolmusic.blogspot.com/,OP114,447
1,1728,Schubert,Piano Quintet in A major,3. Scherzo: Presto,Piano Quintet,European Archive,http://tirolmusic.blogspot.com/,OP114,251
2,1729,Schubert,Piano Quintet in A major,4. Andantino - Allegretto,Piano Quintet,European Archive,http://tirolmusic.blogspot.com/,OP114,444
3,1730,Schubert,Piano Quintet in A major,5. Allegro giusto,Piano Quintet,European Archive,http://tirolmusic.blogspot.com/,OP114,368
4,1733,Schubert,Piano Sonata in A major,2. Andantino,Solo Piano,Museopen,Segundo G. Yogore,D959,546


In [5]:
# Construct a dictionary id --> composer, which will be used later to set the labels for all sound snippets
composers = pd.Series(metadata.composer.values,index=metadata.id).to_dict()
examples = [1755, 2211, 2368]
[composers.get(example) for example in examples]

['Schubert', 'Bach', 'Beethoven']

In [6]:
# Creating dataset
# First we define a function to load the audio snippets and to create the labels
def load_audio(path, downsampling_rate, duration_sec, composers):
        '''Requires the soundfile package, imported as sf '''
        files = os.listdir(path)
        message = "Processing {0} audio files...".format(len(files))
        print(message)
        message = "Downsampling dataset to {0}% (equivalent to {1} audio files).".format(downsampling_rate*100, int(downsampling_rate * len(files)) )
        print(message)
        
        ds = np.random.choice(files, int(len(files) * downsampling_rate), replace=False)
        
        # Initialise empty arrays
        data = np.zeros((len(ds), 1, 88200))
        labels = np.zeros(len(ds), dtype = "<U10")
        
        with progressbar.ProgressBar(max_value=len(ds)) as bar:
            
            for i, file in enumerate(ds):
                # load and process file, then add to array
                audio_clip, sr = sf.read(path + file)
                audio_clip = audio_clip[:int(sr*duration_sec)]
                audio_clip = audio_clip[np.newaxis, :]
                data[i, :audio_clip.shape[0],:audio_clip.shape[1]] = audio_clip
                audio_clip = None
            
                # look up label and add to array
                file_id = file.split("-")[0]
                label = composers[int(file_id)]
                labels[i] = label 
                label = None
                bar.update(i)
            
        return labels, data
        

In [7]:
# CREATE TRAINING DATASET
training_labels, training_data = load_audio("/data/musicnet/train_chunks/", 1, 2, composers)

  0% (209 of 39472) |                    | Elapsed Time: 0:00:00 ETA:   0:00:18

Processing 39472 audio files...
Downsampling dataset to 100% (equivalent to 39472 audio files).


100% (39472 of 39472) |##################| Elapsed Time: 0:00:18 Time:  0:00:18


In [8]:
# Turn labels to categorical (one-hot encoding)
training_labels_pd = pd.DataFrame(training_labels)
training_onehot = pd.get_dummies(training_labels_pd)
training_targets = training_onehot.as_matrix()
training_targets.shape

(39472, 5)

In [9]:
# Store datasets as hdf5 files
train_data = h5py.File('/data/musicnet/hdf5/train_data.hdf5', 'w')
train_data.create_dataset('data', data=training_data)
train_data.create_dataset('targets', data=training_targets)
train_data.close()

In [10]:
# Remove variables and release all memory
training_data = None
training_labels = None
training_targets = None
train_data = None
gc.collect()

18

In [13]:
# CREATE VALIDATION DATASET
validation_labels, validation_data = load_audio("/data/musicnet/val_chunks/", 1, 2, composers)

  0% (34 of 8089) |                      | Elapsed Time: 0:00:00 ETA:   0:00:23

Processing 8089 audio files...
Downsampling dataset to 100% (equivalent to 8089 audio files).


100% (8089 of 8089) |####################| Elapsed Time: 0:00:27 Time:  0:00:27


In [14]:
# Turn labels to categorical (one-hot encoding)
validation_labels_pd = pd.DataFrame(validation_labels)
validation_onehot = pd.get_dummies(validation_labels_pd)
validation_targets = validation_onehot.as_matrix()
validation_targets.shape

(8089, 5)

In [15]:
# Store datasets as hdf5 files
val_data = h5py.File('/data/musicnet/hdf5/val_data.hdf5', 'w')
val_data.create_dataset('data', data=validation_data)
val_data.create_dataset('targets', data=validation_targets)
val_data.close()

In [20]:
# Remove variables and release all memory
validation_data = None
validation_labels = None
validation_targets = None
val_data = None
gc.collect()

247

In [21]:
# CREATE TEST DATASET
testing_labels, testing_data = load_audio("/data/musicnet/test_chunks/", 1, 2, composers)

  0% (2 of 8810) |                       | Elapsed Time: 0:00:00 ETA:   0:09:52

Processing 8810 audio files...
Downsampling dataset to 100% (equivalent to 8810 audio files).


100% (8810 of 8810) |####################| Elapsed Time: 0:00:45 Time:  0:00:45


In [22]:
# Turn labels to categorical (one-hot encoding)
testing_labels_pd = pd.DataFrame(testing_labels)
testing_onehot = pd.get_dummies(testing_labels_pd)
testing_targets = testing_onehot.as_matrix()
testing_targets.shape

(8810, 5)

In [23]:
# Store datasets as hdf5 files
test_data = h5py.File('/data/musicnet/hdf5/test_data.hdf5', 'w')
test_data.create_dataset('data', data=testing_data)
test_data.create_dataset('targets', data=testing_targets)
test_data.close()

In [24]:
# Remove variables and release all memory
testing_data = None
testing_labels = None
testing_targets = None
test_data = None
gc.collect()

7