In [17]:
import pretty_midi
import pandas as pd
import os
from tqdm import tqdm_notebook
import pickle
import shutil
import tensorflow as tf
from tensorflow import keras
import ast
import numpy as np
from collections import namedtuple

# Create data

In [11]:
metadata = pd.read_csv('data/maestro-v2.0.0/maestro-v2.0.0.csv')

In [14]:
len(metadata)

1282

In [19]:
%timeit pretty_midi.PrettyMIDI(os.path.join('data/maestro-v2.0.0', row['midi_filename']))

346 ms ± 2.63 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [21]:
songs = []
for _, row in tqdm_notebook(list(metadata.iterrows())):
    midi = pretty_midi.PrettyMIDI(os.path.join('data/maestro-v2.0.0', row['midi_filename']))
    midi.split = row['split']
    songs.append(midi)

HBox(children=(IntProgress(value=0, max=1282), HTML(value='')))

In [33]:
shutil.rmtree('data/songs.txt', ignore_errors=True)
for midi in tqdm_notebook(songs):
    assert len(midi.instruments) == 1
    with open('data/songs.txt', 'a') as f:
        f.write('{split}#{tempo}#{notes}\n'.format(
            split=midi.split, tempo=midi.estimate_tempo(), 
            notes=[(n.start, n.end, n.pitch, n.velocity) for n in midi.instruments[0].notes]
        ))

HBox(children=(IntProgress(value=0, max=1282), HTML(value='')))

# Create labelled dataset

In [18]:
NoteTuple = namedtuple('NoteTuple', ['start', 'end', 'pitch', 'velocity'])

In [23]:
def process_string(s):
    split, tempo, notes = s.split('#')
    tempo = float(tempo)
    notes = [NoteTuple(*tpl) for tpl in ast.literal_eval(notes)]
    return split, tempo, notes

In [24]:
def create_features(notes, nb_timesteps):
    sequences = []
    labels = []
    for i in range(nb_timesteps, len(notes)):
        sequences.append(np.array([n.pitch for n in notes[i - nb_timesteps:i]]))
        labels.append(notes[i].pitch)
    return sequences, labels

In [25]:
timesteps = 50

In [26]:
x_train = []
y_train = []
x_validate = []
y_validate = []

In [27]:
nb_lines = 0
with open('data/songs.txt') as f:
    for line in f:
        nb_lines += 1

In [None]:
with open('data/songs.txt') as f:
    for line in tqdm_notebook(f, total=nb_lines):
        split, tempo, notes = process_string(line)
        sequences, labels = create_features(notes, nb_timesteps=timesteps)
        if split == 'train':
            x_train.extend(sequences)
            y_train.extend(labels)
        elif split == 'validate':
            x_validate.extend(sequences)
            y_validate.extend(labels)

HBox(children=(IntProgress(value=0, max=1341), HTML(value='')))