In [4]:
from tqdm import tqdm_notebook
import ast
import pretty_midi
import numpy as np
import datetime as dt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Masking, Embedding
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [5]:
def process_string(s):
    split, tempo, notes = s.split('#')
    tempo = float(tempo)
    notes = [pretty_midi.Note(start=tpl[0], end=tpl[1], pitch=tpl[2], velocity=tpl[3]) for tpl in ast.literal_eval(notes)]
    return split, tempo, notes

nb_songs = 0
with open('../data/songs.txt') as f:
    for line in f:
        nb_songs += 1

In [6]:
train_songs = []
validation_songs = []
with open('../data/songs.txt') as f:
    for line in tqdm_notebook(f, total=nb_songs):
        split, tempo, notes = process_string(line)
        if split == 'train':
            train_songs.append(notes)
        elif split == 'validation':
            validation_songs.append(notes)

HBox(children=(IntProgress(value=0, max=1341), HTML(value='')))




In [7]:
notes[0:10]

[Note(start=1.016667, end=1.214583, pitch=48, velocity=40),
 Note(start=1.545833, end=1.780208, pitch=52, velocity=46),
 Note(start=1.000000, end=2.061458, pitch=79, velocity=72),
 Note(start=2.060417, end=2.183333, pitch=77, velocity=59),
 Note(start=2.047917, end=2.247917, pitch=48, velocity=35),
 Note(start=2.166667, end=2.257292, pitch=76, velocity=65),
 Note(start=2.262500, end=2.346875, pitch=74, velocity=69),
 Note(start=2.379167, end=2.622917, pitch=72, velocity=63),
 Note(start=2.508333, end=2.744792, pitch=50, velocity=44),
 Note(start=2.542708, end=2.764583, pitch=79, velocity=61)]

In [8]:
def constrain_pitch(pitch, min_pitch, max_pitch):
    if pitch < min_pitch:
        pitch = pitch % 12 + 12 * (min_pitch // 12) + 12
        if pitch >= min_pitch + 12:
            pitch -= 12
        assert min_pitch <= pitch < min_pitch + 12
        return pitch
    if pitch > max_pitch:
        pitch = pitch % 12 + 12 * (max_pitch // 12) - 12
        if pitch <= max_pitch - 12:
            pitch += 12
        assert max_pitch - 12 < pitch <= max_pitch
        return pitch
    return pitch

In [9]:
class FeaturedNote:
    
    def __init__(self, pitch, wait):
        self.pitch = pitch
        self.wait = wait
        
    def calculate_features(self, min_pitch, max_pitch):
        pitch_features = [0] * (max_pitch - min_pitch + 1)  # plus one because out-of-range note is used to start sequence
        pitch_features[self.pitch] = 1
        self.features = np.array(pitch_features + [self.wait])
        
        self.pitch_label = np.array(pitch_features)

In [10]:
nb_notes_history = 16
min_pitch = 45
max_pitch = 85
nb_pitches = max_pitch - min_pitch + 1  # including out of bounds pitch
nb_features = nb_pitches + 1  # including wait

In [15]:
def feature_songs(songs, chord_time=0.05):
    
    nb_datapoints = sum(len(song) for song in songs)
    
    sequences = -1 * np.ones(shape=(nb_datapoints, nb_notes_history, nb_features))
    pitch_labels = -1 * np.ones(shape=(nb_datapoints, nb_pitches))
    wait_labels = -1 * np.ones(shape=(nb_datapoints,))
    
    data_index = 0
    
    for song in tqdm_notebook(songs):
        
        # Songs all start at t=0
        first_note_start = min(note.start for note in song)
        last_note_time = -1  # every song begins after 1 second delay on a out-of-range note
        for note in song:
            note.start -= first_note_start
            note.end -= first_note_start
            assert note.start >= last_note_time  # check that notes are sorted
            
            if note.start >= last_note_time + chord_time:  # simple algorithm to detect chords
                note.start = last_note_time
            else:
                last_note_time = note.start
        
        chord_corrected_song = []
        last_note_time = -1
        current_chord = []
        for note in song:
            if note.start > last_note_time:
                if current_chord:
                    # chord is finished, sort and add to song
                    chord_corrected_song.extend(sorted(current_chord, key=lambda note: note.pitch))
                    current_chord = []
                else:
                    chord_corrected_song.append(note)
                    
                last_note_time = note.start
                continue
            
            # chord is ongoing
            assert note.start == last_note_time
            current_chord.append(note)
        
        first_note = FeaturedNote(pitch=max_pitch - min_pitch, wait=0)
        first_note.calculate_features(min_pitch, max_pitch)
        featured_notes = [first_note for i in range(nb_notes_history)]
        previous_time = -1
        for note in song:
            pitch = constrain_pitch(note.pitch, min_pitch, max_pitch) - min_pitch
            wait = note.start - previous_time
            fnote = FeaturedNote(pitch, wait)
            fnote.calculate_features(min_pitch, max_pitch)
            featured_notes.append(fnote)
            previous_time = note.start
            
        for i in range(nb_notes_history, len(featured_notes)):
            sequences[data_index] = np.array([fnote.features for fnote in featured_notes[i - nb_notes_history:i]])
            pitch_labels[data_index] = featured_notes[i].pitch_label
            wait_labels[data_index] = featured_notes[i].wait
            data_index += 1
    
    return sequences, pitch_labels, wait_labels

In [16]:
validation_sequences, validation_pitch, validation_wait = feature_songs(validation_songs)

HBox(children=(IntProgress(value=0, max=144), HTML(value='')))




In [19]:
np.min(validation_wait)

0.0

In [20]:
train_sequences, train_pitch, train_wait = feature_songs(train_songs)

HBox(children=(IntProgress(value=0, max=1010), HTML(value='')))




In [23]:
np.min(train_sequences)

0.0

In [22]:
train_pitch.shape

(5996034, 41)

In [24]:
model = Sequential()

# Recurrent layer
model.add(LSTM(64, input_shape=(nb_notes_history, nb_features),
               return_sequences=False, dropout=0.1, recurrent_dropout=0.1))

# Fully connected layer
model.add(Dense(64, activation='relu'))

# Dropout for regularization
model.add(Dropout(0.5))

# Output layer
model.add(Dense(nb_pitches, activation='softmax'))

# Compile the model
model.compile(
    optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [25]:
run_time = dt.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')

In [26]:
run_time

'2019-06-30_00:19:50'

In [27]:
# Create callbacks
callbacks = [keras.callbacks.TensorBoard(log_dir='tb_logs/notebased_featuresv1_{}'.format(run_time), histogram_freq=0, write_graph=True, write_grads=False, 
                                         write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, 
                                         embeddings_data=None, update_freq='batch'),
             EarlyStopping(monitor='val_loss', patience=5),
             ModelCheckpoint('models/notebased_featuresv1_{}.h5'.format(run_time), save_best_only=True, save_weights_only=False)]

In [28]:
train_sequences.shape

(5996034, 16, 42)

In [None]:
history = model.fit(train_sequences,  train_pitch, 
                    batch_size=2048, epochs=5,
                    callbacks=callbacks,
                    validation_data=(validation_sequences, validation_pitch))

W0630 00:20:39.629843 140479974627072 deprecation.py:323] From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 5996034 samples, validate on 660126 samples
Epoch 1/5
 899072/5996034 [===>..........................] - ETA: 14:37 - loss: 3.3973 - accuracy: 0.0891