In [1]:
import mido
import os
import numpy as np
from scipy import stats
from os import listdir
from os.path import isfile, join
import tensorflow as tf
from tensorflow.contrib import rnn
import time

import matplotlib.pyplot as plt
from pylab import rcParams
rcParams['figure.figsize'] = (12, 8)

In [2]:
%run get_training_data.py

### Create training data

In [3]:
def get_training_data():
    """Generate training data array for all files in "midis_processed/" directory.
       
    Returns:
        Numpy array of training data.
    """
    training_data = []
    for filename in os.listdir('midis_processed/')[:10]:
        if filename.endswith(".mid"):
            print(filename)
            training_data.append(midi_to_vector('midis_processed/' + filename))
    return np.array(training_data)

In [4]:
training_data = get_training_data()

1943 (1).mid
1943-lev1 (1).mid
1943-lev3 (1).mid
1943-Lev3Win (1).mid
1943boss.mid
1943boss1.mid
1943BossWin.mid
1943lost (1).mid
1943sab (1).mid
1943won.mid


In [5]:
np.array(training_data[0])

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ..., 
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

### Parameters

In [6]:
vocab_size = len(training_data[0][0])
vocab_size

1124

In [7]:
# Parameters
learning_rate = 0.1
training_iters = 500
display_step = 10
n_input = 16

# number of units in RNN cell
n_hidden = 512

# tf Graph input
x = tf.placeholder("float", [n_input, vocab_size])
y = tf.placeholder("int32", [1, n_input])

# RNN output node weights and biases
softmax_w = tf.Variable(tf.random_normal([n_hidden, vocab_size]))
softmax_b = tf.Variable(tf.random_normal([vocab_size]))

max_grad_norm = 5

## Create tensor flow graph

In [8]:
rnn_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(n_hidden),rnn.BasicLSTMCell(n_hidden)])

In [9]:
x_split = tf.split(x, n_input, 0)
x_split

[<tf.Tensor 'split:0' shape=(1, 1124) dtype=float32>,
 <tf.Tensor 'split:1' shape=(1, 1124) dtype=float32>,
 <tf.Tensor 'split:2' shape=(1, 1124) dtype=float32>,
 <tf.Tensor 'split:3' shape=(1, 1124) dtype=float32>,
 <tf.Tensor 'split:4' shape=(1, 1124) dtype=float32>,
 <tf.Tensor 'split:5' shape=(1, 1124) dtype=float32>,
 <tf.Tensor 'split:6' shape=(1, 1124) dtype=float32>,
 <tf.Tensor 'split:7' shape=(1, 1124) dtype=float32>,
 <tf.Tensor 'split:8' shape=(1, 1124) dtype=float32>,
 <tf.Tensor 'split:9' shape=(1, 1124) dtype=float32>,
 <tf.Tensor 'split:10' shape=(1, 1124) dtype=float32>,
 <tf.Tensor 'split:11' shape=(1, 1124) dtype=float32>,
 <tf.Tensor 'split:12' shape=(1, 1124) dtype=float32>,
 <tf.Tensor 'split:13' shape=(1, 1124) dtype=float32>,
 <tf.Tensor 'split:14' shape=(1, 1124) dtype=float32>,
 <tf.Tensor 'split:15' shape=(1, 1124) dtype=float32>]

In [10]:
outputs, states = rnn.static_rnn(rnn_cell, x_split, dtype=tf.float32)
outputs

[<tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_1/cell_1/basic_lstm_cell/mul_2:0' shape=(1, 512) dtype=float32>,
 <tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_1/cell_1/basic_lstm_cell/mul_5:0' shape=(1, 512) dtype=float32>,
 <tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_1/cell_1/basic_lstm_cell/mul_8:0' shape=(1, 512) dtype=float32>,
 <tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_1/cell_1/basic_lstm_cell/mul_11:0' shape=(1, 512) dtype=float32>,
 <tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_1/cell_1/basic_lstm_cell/mul_14:0' shape=(1, 512) dtype=float32>,
 <tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_1/cell_1/basic_lstm_cell/mul_17:0' shape=(1, 512) dtype=float32>,
 <tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_1/cell_1/basic_lstm_cell/mul_20:0' shape=(1, 512) dtype=float32>,
 <tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_1/cell_1/basic_lstm_cell/mul_23:0' shape=(1, 512) dtype=float32>,
 <tf.Tensor 'rnn/rnn/multi_rnn_cell/cell_1/cell_1/basic_lstm_cell/mul_26:0' shape=(1, 512) dtype=float32>,
 <tf.Tensor 'rnn/rnn/multi_rnn_cell/cell

In [11]:
logits = [tf.nn.xw_plus_b(output, softmax_w, softmax_b) for output in outputs]

In [12]:
logits = tf.reshape(logits, [1, n_input, vocab_size])
logits

<tf.Tensor 'Reshape:0' shape=(1, 16, 1124) dtype=float32>

In [13]:
loss = tf.contrib.seq2seq.sequence_loss(logits, y, weights=tf.ones([1, n_input], dtype=tf.float32), # check this
                                        average_across_timesteps=False, average_across_batch=True)
loss

<tf.Tensor 'sequence_loss/truediv:0' shape=(16,) dtype=float32>

In [14]:
cost = tf.reduce_sum(loss)
cost

<tf.Tensor 'Sum:0' shape=() dtype=float32>

In [15]:
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)
optimizer

<tf.Operation 'RMSProp' type=NoOp>

In [16]:
tvars = tf.trainable_variables()
tvars

[<tf.Variable 'Variable:0' shape=(512, 1124) dtype=float32_ref>,
 <tf.Variable 'Variable_1:0' shape=(1124,) dtype=float32_ref>,
 <tf.Variable 'rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel:0' shape=(1636, 2048) dtype=float32_ref>,
 <tf.Variable 'rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias:0' shape=(2048,) dtype=float32_ref>,
 <tf.Variable 'rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel:0' shape=(1024, 2048) dtype=float32_ref>,
 <tf.Variable 'rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias:0' shape=(2048,) dtype=float32_ref>]

In [17]:
#grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), max_grad_norm)
#grads

In [18]:
#optimizer = tf.train.GradientDescentOptimizer(learning_rate)
#optimizer

In [19]:
#_train_op = optimizer.apply_gradients(
#    zip(grads, tvars),
#    global_step=tf.train.get_or_create_global_step())
#_train_op

## Run tensor flow session

In [20]:
def get_midi_xy(midi_file_number):
    offset = np.random.randint(0, len(training_data[midi_file_number]) - n_input - 1)
    midi_x = np.array(training_data[midi_file_number])[offset:offset+n_input]
    midi_y = np.array(training_data[midi_file_number])[1 + offset:1 + offset+n_input].dot(range(vocab_size)).reshape(1, n_input)
    return midi_x, midi_y

In [21]:
midi_x = np.array(training_data[0])[1:1+n_input]
midi_x, midi_x.shape

(array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ..., 
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]]), (16, 1124))

In [22]:
midi_y = np.array(training_data[0])[2:2+n_input].dot(range(vocab_size)).reshape(1, n_input)
midi_y

array([[ 439,  560, 1028,  828, 1028,  724,  316, 1026,  828, 1026,  328,
        1026,  840, 1026,  327, 1026]])

In [None]:
x

<tf.Tensor 'Placeholder:0' shape=(16, 1124) dtype=float32>

In [None]:
training_loss_list = []
step_list = []
with tf.Session() as session:
    init = tf.global_variables_initializer()
    session.run(init)
    step = 0
    loss_total = 0
    while step < training_iters:
        for i in range(len(training_data)):
            # Generate a minibatch. Add some randomness on selection process.
            
            midi_x, midi_y = get_midi_xy(i)

            _, loss = session.run([optimizer, cost], \
                                                    feed_dict={x: midi_x, y: midi_y})
            loss_total += loss
            if (step+1) % display_step == 0:
                print("Iter= " + str(step+1) + ", Average Loss= " + \
                      "{:.6f}".format(loss_total/display_step))
                loss_total = 0
            step_list.append(step)
            training_loss_list.append(loss_total/display_step)
            test = session.run([optimizer, cost], feed_dict={x: midi_x, y: midi_y})
            step += 1

Iter= 10, Average Loss= 900.913566
Iter= 20, Average Loss= 464.602542
Iter= 30, Average Loss= 244.563252
Iter= 40, Average Loss= 173.125620
Iter= 50, Average Loss= 172.028709
Iter= 60, Average Loss= 191.015423
Iter= 70, Average Loss= 300.740213
Iter= 80, Average Loss= 319.988586
Iter= 90, Average Loss= 401.373956
Iter= 100, Average Loss= 346.719711
Iter= 110, Average Loss= 364.822234
Iter= 120, Average Loss= 359.989612
Iter= 130, Average Loss= 356.747511
Iter= 140, Average Loss= 331.451039
Iter= 150, Average Loss= 395.099834
Iter= 160, Average Loss= 522.037860
Iter= 170, Average Loss= 513.170633
Iter= 180, Average Loss= 562.625156
Iter= 190, Average Loss= 452.203613
Iter= 200, Average Loss= 457.663092


In [None]:
test

## Generate music from trained model.

In [None]:
symbols_in_keys = [dictionary[str(words[i])] for i in range(len(words))]
for i in range(32):
    keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, 1])
    onehot_pred = session.run(logits, feed_dict={x: keys})
    onehot_pred_index = int(tf.argmax(onehot_pred, 1).eval())
    sentence = "%s %s" % (sentence,reverse_dictionary[onehot_pred_index])
    symbols_in_keys = symbols_in_keys[1:]
    symbols_in_keys.append(onehot_pred_index)
print(sentence)