# LSTM
A basic LSTM written in tensorflow. We'll use this to learn patterns in the matrix we've generated, and then sample it to generate new musical matrices that can be translated back to .midi files.

In [1]:
import tensorflow as tf
import numpy as np

In [2]:
# define params

# meta-parameters
model_save_path = 'C:/Users/Emerson/Desktop/saved_models/'
generated_matrix_save_path = 'C:/Users/Emerson/Documents/bigdata/midis/generated/matrices/'
data_path = 'C:/Users/Emerson/Documents/bigdata/midis/processed/mastermatrix.npy'
test_path = 'C:/Users/Emerson/Documents/bigdata/midis/processed/testmatrix.npy'
display_step = 200

# learning parameters
learning_rate = 1e-4
epochs = 120
batch_size = 64
n_steps = 64

#network parameters
n_hidden = 256
n_dense = 174
n_input = 174
n_output = n_input

# create variables
x = tf.placeholder("float", [None, n_steps, n_input])
y = tf.placeholder("float", [None, n_output])

weights = {
    'out' : tf.Variable(tf.truncated_normal([n_hidden, n_dense])),
    'bias' : tf.Variable(tf.truncated_normal([n_dense]))
}

lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden, forget_bias = 1.0)

In [3]:
# Build network ops

x_unstacked = tf.unstack(x, n_steps, 1) # reshape data from (batch_size, n_steps, n_input) to (n_steps, batch_size, n_input)

outputs, states = tf.contrib.rnn.static_rnn(lstm_cell, x_unstacked, dtype=tf.float32)

pred = tf.matmul(outputs[-1], weights['out']) + weights['bias']

cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = pred, labels = y))
optimizer = tf.train.RMSPropOptimizer(learning_rate = learning_rate).minimize(cost)

# Evaluate model
probas = tf.sigmoid(pred)
accuracy = tf.reduce_mean(tf.reduce_min(tf.cast(tf.equal(tf.round(probas), y), tf.float32), axis=-1))

# initialize variables

init = tf.global_variables_initializer()

In [4]:
def load_batches(path, batch_size, n_steps, n_input):
    '''
    Generator function that loads batches of size batch_size from a pickled numpy 
    array. Batches contain random sequences of length n_steps.
    '''
    master_matrix = np.load(path, mmap_mode = 'r', encoding = 'latin1') # load memory map of the pickled note matrix
    seq_starts = np.arange(0, len(master_matrix) - n_steps, 3) # define sequence starts as every third time-step.
    np.random.shuffle(seq_starts) # shuffle the sequence starts to randomize the contents of the batches
    print("Loading {} sequences in {} batches.".format(len(seq_starts), len(seq_starts)//batch_size))
    batch_x = np.empty((batch_size, n_steps, n_input))
    batch_y = np.empty((batch_size, n_input))
    for i, start in enumerate(seq_starts):
        batch_x[i % batch_size] = master_matrix[start:start+n_steps]
        batch_y[i % batch_size] = master_matrix[start+n_steps]
        if i % batch_size == 0 and i != 0:
            yield batch_x, batch_y
            batch_x = np.empty((batch_size, n_steps, n_input), dtype = float)
            batch_y = np.empty((batch_size, n_input), dtype = float)
    batch_x = np.delete(batch_x, np.s_[i % batch_size + 1:], 0) # trim empty rows off last, shorter batch
    batch_y = np.delete(batch_y, np.s_[i % batch_size + 1:], 0)
    yield batch_x, batch_y

In [5]:
# Launch the graph
saver = tf.train.Saver(max_to_keep = 120)
with tf.Session() as sess:
    sess.run(init)
    ### Uncomment this line to restore a saved model and continue training. Leave commented out to start from scratch.
    # saver.restore(sess, model_save_path + '112')
    # Keep training for the prescribed number of epochs
    e = 0 # if continuing from a checkpoint, set e to the number of the epoch you are starting at.
    while e <= epochs:
        batch = 0
        for batch_x, batch_y in load_batches(data_path, batch_size, n_steps, n_input):
            # Run optimization ops (backprop)
            sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
            if batch % display_step == 0:
                # Calculate batch accuracy and loss
                loss, train_acc = sess.run((cost, accuracy), feed_dict={x: batch_x, y: batch_y})
                print("Epoch " + str(e) + ", Batch " + str(batch) + 
                      ": Minibatch Loss = {:.6f}, Training accuracy = {:.6f}".format(loss, train_acc))
            batch += 1
        valid_acc = np.empty(0)
        for batch_x, batch_y in load_batches(test_path, batch_size, n_steps, n_input):
            batch_acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
            valid_acc = np.append(valid_acc, batch_acc)
        print("Validation accuracy: {}".format(valid_acc.mean()))
        saver.save(sess, model_save_path + str(e))
        print("Model saved.")
        e += 1
    print("Optimization Finished!")

Loading 169027 sequences in 2641 batches.
Epoch 113, Batch 0: Minibatch Loss = 0.037120, Training accuracy = 0.375000
Epoch 113, Batch 200: Minibatch Loss = 0.048970, Training accuracy = 0.296875
Epoch 113, Batch 400: Minibatch Loss = 0.047676, Training accuracy = 0.312500
Epoch 113, Batch 600: Minibatch Loss = 0.045513, Training accuracy = 0.312500
Epoch 113, Batch 800: Minibatch Loss = 0.039739, Training accuracy = 0.375000
Epoch 113, Batch 1000: Minibatch Loss = 0.041781, Training accuracy = 0.406250
Epoch 113, Batch 1200: Minibatch Loss = 0.045061, Training accuracy = 0.234375
Epoch 113, Batch 1400: Minibatch Loss = 0.039670, Training accuracy = 0.296875
Epoch 113, Batch 1600: Minibatch Loss = 0.035186, Training accuracy = 0.312500
Epoch 113, Batch 1800: Minibatch Loss = 0.031006, Training accuracy = 0.406250
Epoch 113, Batch 2000: Minibatch Loss = 0.041830, Training accuracy = 0.296875
Epoch 113, Batch 2200: Minibatch Loss = 0.039193, Training accuracy = 0.328125
Epoch 113, Batch 

## Sample Generation
Now that the network has been trained, we can randomly sample it and feed the output back in as input, creating a self-writing digital piano roll.

In [43]:
sample_len = 256 # length in sixteenth-notes. a length of 256 generates a ~31 second sample.
checkpoint_num = 10 # select a saved model to generate the sample from
checkpoint_path = model_save_path + str(checkpoint_num) 
sample_num = 1
seed = None

with tf.Session() as sess:
    output_matrix = np.empty((sample_len, n_input))

    # load checkpoint
    loader = tf.train.Saver()
    loader.restore(sess, checkpoint_path)
    
    # use a seed for generation if one is provided
    if seed is None:
        input_seq = np.zeros((1, n_steps, n_input))
    else:
        input_seq = np.array(seed).reshape((1, n_steps, n_input))
    
    for timestep in range(sample_len):
        # get predicted probabilities for next timestep
        probs = sess.run(probas, feed_dict = {x: input_seq})
        output = np.zeros((1, 1, n_input))
        # randomly sample using predicted probabilities
        for i, p in enumerate(probs[0]):
            output[0, 0, i] = 1 if np.random.random() < p else 0
        # ensure consistency of the output
        for i, n in enumerate(output[0,0,:]):
            if i % 2 == 1: # check notes being held first
                # if a note was not sounding last time-step and is not being played, 
                # it cannot be sounding in the current timestep.
                if input_seq[0, -1, i] == 0 and output[0, 0, i - 1] == 0:
                    output[0, 0, i] = 0
            # check notes being played this time step
            if i % 2 == 0:
                if n == 1: # if a note is being played,
                    output[0, 0, i + 1] = 1 # it must also be sounding.
        # save the output for this timestep to the output matrix
        output_matrix[timestep] = output
        # prepare the next input sequence
        input_seq = np.append(input_seq[:, 1: ,:], output, axis = 1)
        
    print("Generation complete!")
    np.save(generated_matrix_save_path + 'epoch ' + str(checkpoint_num) + ' sample ' + str(sample_num), output_matrix)

Generation complete!
