In [1]:
import sys, os
sys.path.append(os.path.abspath('..'))

from midi_parser import *

In [2]:
import numpy as np
import pandas as pd
import glob
import tensorflow as tf
from tqdm import tqdm

Sample timesteps using RBM trained on single song

In [3]:
def get_songs(path):
    files = glob.glob('{}/*.mid*'.format(path))
    songs = []
    for f in files:
        try:
            song = np.array(midiToStatematrix(f))
            if np.array(song).shape[0] > 50:
                songs.append(song)
        except:
            # Just ignore songs that can't be parsed
            continue         
    return songs

In [4]:
songs = get_songs('../Jazz_Music_Midi')

In [5]:
print "{} songs processed".format(len(songs))

1 songs processed


In [6]:
### HyperParameters

lowest_note = 24
highest_note = 102 
note_range = highest_note-lowest_note

num_timesteps  = 10 
n_visible      = 2*note_range*num_timesteps #size of visible layer
n_hidden       = 100

num_epochs = 500 
batch_size = 100 
learning_rate = tf.constant(0.005, tf.float32) #learning rate

In [8]:
x  = tf.placeholder(tf.float32, [None, n_visible], name="x") 
W  = tf.Variable(tf.random_normal([n_visible, n_hidden], 0.01), name="W") 
hb = tf.Variable(tf.zeros([1, n_hidden],  tf.float32, name="hb")) # bias hidden layer
vb = tf.Variable(tf.zeros([1, n_visible],  tf.float32, name="vb")) # bias visible

In [113]:
#Sample from a vector of probabilities
def sample(probs):
    #Takes in a vector of probabilities, and returns a random vector of 0s and 1s sampled from the input vector
    return tf.floor(probs + tf.random_uniform(tf.shape(probs), 0, 1))

In [114]:
def gibbs_sample(k): # iterates for k steps
    def step(i, k, x_k):
        #Runs a single step.
        h_k = sample(tf.sigmoid(tf.matmul(x_k, W) + hb)) #Propagate the visible values to sample the hidden values
        x_k = sample(tf.sigmoid(tf.matmul(h_k, tf.transpose(W)) + vb)) #Propagate the hidden values to sample the visible values
        return i + 1, k, x_k
    
    i = tf.constant(0) #counter
    [_, _, x_sample] = tf.while_loop(lambda i, k, *args: i < k, step, \
                                        [i, tf.constant(k), x], parallel_iterations=1, back_prop=False)
    
    # TF tutorials said we need this to stop RBM values from backpropogating
    x_sample = tf.stop_gradient(x_sample) 
    return x_sample

In [115]:
### Training

# Run gibbs sampling for one step and save samples for x and h
h = sample(tf.sigmoid(tf.matmul(x, W) + hb)) 

x_sample = gibbs_sample(1) 
h_sample = sample(tf.sigmoid(tf.matmul(x_sample, W) + hb)) 

#Update the values of W, hb, and vb
size_x = tf.cast(tf.shape(x)[0], tf.float32)
W_update  = tf.mul(learning_rate/size_x, tf.sub(tf.matmul(tf.transpose(x), h), \
                                        tf.matmul(tf.transpose(x_sample), h_sample)))

vb_update = tf.mul(learning_rate/size_x, tf.reduce_sum(tf.sub(x, x_sample), 0, True))
hb_update = tf.mul(learning_rate/size_x, tf.reduce_sum(tf.sub(h, h_sample), 0, True))

#When we do sess.run(updt), TensorFlow will run all 3 update steps
updt = [W.assign_add(W_update), vb.assign_add(vb_update), hb.assign_add(hb_update)]

In [91]:
song = songs[0]
song = song[:int(np.floor((song.shape[0]/num_timesteps) * num_timesteps))]
song.shape

(3260, 156)

In [92]:
np.reshape(song, [song.shape[0]/num_timesteps, song.shape[1]*num_timesteps]).shape

(326, 1560)

In [None]:
# Train the model
with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    #Run through all of the training data num_epochs times
    for epoch in tqdm(range(num_epochs)):
        for song in songs:
            song = np.array(song)
            # Round down to nearest multiple
            song = song[:int(np.floor((song.shape[0]/num_timesteps) * num_timesteps))]
            # Reshape into blocks of num_timesteps
            song = np.reshape(song, [song.shape[0]/num_timesteps, song.shape[1]*num_timesteps])
            #Train the RBM on batch_size examples at a time
            for i in range(1, len(song), batch_size): 
                tr_x = song[i:i+batch_size]
                sess.run(updt, feed_dict={x: tr_x})
    
    gen = gibbs_sample(1).eval(feed_dict={x: np.zeros((10, n_visible))})
    for i in range(gen.shape[0]):
        if not any(gen[i,:]):
            continue
        #Here we reshape the vector to be time x notes, and then save the vector as a midi file
        s = np.reshape(gen[i,:], (num_timesteps, 2*note_range))
        statematrixToMidi(s, "generated_{}".format(i))

Test custom RBM class.

In [7]:
from rbm import RBM

In [8]:
model = RBM()

In [9]:
X = []
for song in songs:
    song = np.array(song)
    # Round down to nearest multiple
    song = song[:int(np.floor((song.shape[0]/num_timesteps) * num_timesteps))]
    # Reshape into blocks of num_timesteps
    song = np.reshape(song, [song.shape[0]/num_timesteps, song.shape[1]*num_timesteps])
    X.extend(song)
X = np.array(X)

In [10]:
X.shape

(326, 1560)

In [11]:
model.fit(X)

100%|██████████| 500/500 [00:11<00:00, 44.24it/s]


Model saved in file: models/rbm.ckpt


In [16]:
gen = model.sample(np.zeros((10, n_visible)))

In [17]:
for i in range(gen.shape[0]):
    if not any(gen[i,:]):
        continue
    #Here we reshape the vector to be time x notes, and then save the vector as a midi file
    s = np.reshape(gen[i,:], (num_timesteps, 2*note_range))
    statematrixToMidi(s, "generated_{}".format(i))