## Homework: Deep Jazz

In [1]:
import sys
sys.path.append("/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/")

import numpy as np
from music21 import stream, midi, tempo, note

# import lstm
from grammar import unparse_grammar
from preprocess import get_musical_data, get_corpus_data

from qa import prune_grammar, prune_notes, clean_up_notes# 
from generator import __sample, __generate_grammar, __predict

In [2]:
N_epochs = 128  # default
data_fn = 'midi/' + 'original_metheny.mid'  # 'And Then I Knew' by Pat Metheny
out_fn = 'midi/' 'deepjazz_on_metheny...' + str(N_epochs)

In [3]:
max_len = 20
max_tries = 1000
diversity = 0.5

# musical settings
bpm = 130

# get data
chords, abstract_grammars = get_musical_data(data_fn)
corpus, values, val_indices, indices_val = get_corpus_data(abstract_grammars)
print('corpus length:', len(corpus))
print('total # of values:', len(values))

corpus length: 193
total # of values: 78


In [4]:
chords[0]

[<music21.instrument.Piano Piano>,
 <music21.tempo.MetronomeMark Quarter=112.0>,
 <music21.key.Key of G major>,
 <music21.meter.TimeSignature 4/4>]

In [5]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM

def get_keras_model(max_len, N_values):
    # build a 2 stacked LSTM
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, input_shape=(max_len, N_values)))
    model.add(Dropout(0.2))
    model.add(LSTM(128, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(N_values))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    return model

Using TensorFlow backend.


## Task

Replace previous model with equivalent in prettytensor or tf.slim

Try to make you code as compact as possible

In [6]:
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.contrib.rnn import MultiRNNCell, LSTMCell, DropoutWrapper, LayerNormBasicLSTMCell

def get_init():
    return tf.random_normal_initializer(0.0, 0.2)
class myJazzNet:
    def __init__(self, max_len, N_values):
        self.max_len = max_len
        self.N_values = N_values
        self.keep_prob = 0.8
        self.predictions = None
        self.loss = None
        self.optimization = None
        self.accuracy = None
        self.sess = None
        self.create_placeholders()
        self.build()
    
    def create_placeholders(self):
        self.data = tf.placeholder(tf.float32, 
                                   [None, self.max_len, self.N_values]) 
        self.target = tf.placeholder(tf.float32, [None, self.N_values])
    
    def build(self):
        print("build")
        out, _ = tf.nn.dynamic_rnn(
            MultiRNNCell([LSTMCell(128, initializer=get_init()), LSTMCell(128, initializer=get_init())]),
            self.data, dtype=tf.float32)
        
        index = tf.range(0, tf.shape(out)[0]) * self.max_len + (self.max_len - 1)
        out = tf.reshape(out, [-1, 128])
        cut_out = tf.gather(out, index)
                
        with slim.arg_scope([slim.fully_connected],
                            activation_fn=tf.nn.softmax,
                            weights_initializer=get_init(),
                            biases_initializer=get_init()):
            
            self.predictions = slim.fully_connected(cut_out, self.N_values, scope='final')
        
        self.loss = tf.losses.softmax_cross_entropy(self.target, self.predictions)
        self.optimization = tf.train.AdamOptimizer(learning_rate=.001).minimize(self.loss)
                
        correct_predictions = tf.equal(tf.argmax(self.target, 1), tf.argmax(self.predictions, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))
        
    def fit(self, X, y, batch_size, epochs):
        config = tf.ConfigProto(allow_soft_placement=True)
        self.sess = tf.Session(config=config)
        self.sess.run(tf.global_variables_initializer())
        i = 0
        
        for epoch in range(epochs):
            fd = {self.data: X, self.target: y}
            loss, _, acc = self.sess.run([self.loss, self.optimization, self.accuracy], feed_dict=fd)
            print('epoch_{} loss: {:.3f}, acc: {:.3f}'.format(epoch, loss, acc))
    def predict(self, x, verbose=0):
        return self.sess.run(self.predictions, {self.data: x})

            
def get_slim_model(max_len, N_values):
    return myJazzNet(max_len, N_values)

In [7]:
get_model = get_slim_model

In [8]:
import numpy as np

''' Build a 2-layer LSTM from a training corpus '''


def build_model(corpus, val_indices, max_len, N_epochs=128):
    # number of different values or words in corpus
    N_values = len(set(corpus))

    # cut the corpus into semi-redundant sequences of max_len values
    step = 3
    sentences = []
    next_values = []
    for i in range(0, len(corpus) - max_len, step):
        sentences.append(corpus[i: i + max_len])
        next_values.append(corpus[i + max_len])
    print('nb sequences:', len(sentences))

    # transform data into binary matrices
    X = np.zeros((len(sentences), max_len, N_values), dtype=np.bool)
    y = np.zeros((len(sentences), N_values), dtype=np.bool)
    for i, sentence in enumerate(sentences):
        for t, val in enumerate(sentence):
            X[i, t, val_indices[val]] = 1
        y[i, val_indices[next_values[i]]] = 1

    model = get_model(max_len, N_values)
    model.fit(X, y, batch_size=128, epochs=N_epochs)

    return model


In [9]:
# build model
model = build_model(corpus=corpus, val_indices=val_indices,
                         max_len=max_len, N_epochs=N_epochs)

# set up audio stream
out_stream = stream.Stream()

# generation loop
curr_offset = 0.0
loopEnd = len(chords)
for loopIndex in range(1, loopEnd):
    # get chords from file
    curr_chords = stream.Voice()
    for j in chords[loopIndex]:
        curr_chords.insert((j.offset % 4), j)

    # generate grammar
    curr_grammar = __generate_grammar(model=model, corpus=corpus,
                                      abstract_grammars=abstract_grammars,
                                      values=values, val_indices=val_indices,
                                      indices_val=indices_val,
                                      max_len=max_len, max_tries=max_tries,
                                      diversity=diversity)

    curr_grammar = curr_grammar.replace(' A', ' C').replace(' X', ' C')

    # Pruning #1: smoothing measure
    curr_grammar = prune_grammar(curr_grammar)

    # Get notes from grammar and chords
    curr_notes = unparse_grammar(curr_grammar, curr_chords)

    # Pruning #2: removing repeated and too close together notes
    curr_notes = prune_notes(curr_notes)

    # quality assurance: clean up notes
    curr_notes = clean_up_notes(curr_notes)

    # print # of notes in curr_notes
    print('After pruning: %s notes' % (len([i for i in curr_notes
                                            if isinstance(i, note.Note)])))

    # insert into the output stream
    for m in curr_notes:
        out_stream.insert(curr_offset + m.offset, m)
    for mc in curr_chords:
        out_stream.insert(curr_offset + mc.offset, mc)

    curr_offset += 4.0

out_stream.insert(0.0, tempo.MetronomeMark(number=bpm))

# Play the final stream through output (see 'play' lambda function above)
play = lambda x: midi.realtime.StreamPlayer(x).play()
play(out_stream)

# save stream
mf = midi.translate.streamToMidiFile(out_stream)
mf.open(out_fn, 'wb')
mf.write()
mf.close()    

nb sequences: 58
build


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


epoch_0 loss: 4.359, acc: 0.000
epoch_1 loss: 4.347, acc: 0.103
epoch_2 loss: 4.332, acc: 0.207
epoch_3 loss: 4.315, acc: 0.310
epoch_4 loss: 4.295, acc: 0.345
epoch_5 loss: 4.274, acc: 0.362
epoch_6 loss: 4.251, acc: 0.414
epoch_7 loss: 4.225, acc: 0.483
epoch_8 loss: 4.196, acc: 0.517
epoch_9 loss: 4.164, acc: 0.569
epoch_10 loss: 4.133, acc: 0.569
epoch_11 loss: 4.100, acc: 0.569
epoch_12 loss: 4.068, acc: 0.569
epoch_13 loss: 4.035, acc: 0.569
epoch_14 loss: 4.003, acc: 0.621
epoch_15 loss: 3.973, acc: 0.638
epoch_16 loss: 3.942, acc: 0.638
epoch_17 loss: 3.915, acc: 0.672
epoch_18 loss: 3.886, acc: 0.690
epoch_19 loss: 3.862, acc: 0.690
epoch_20 loss: 3.838, acc: 0.724
epoch_21 loss: 3.816, acc: 0.741
epoch_22 loss: 3.795, acc: 0.759
epoch_23 loss: 3.774, acc: 0.759
epoch_24 loss: 3.755, acc: 0.759
epoch_25 loss: 3.733, acc: 0.759
epoch_26 loss: 3.719, acc: 0.759
epoch_27 loss: 3.703, acc: 0.793
epoch_28 loss: 3.689, acc: 0.793
epoch_29 loss: 3.676, acc: 0.793
epoch_30 loss: 3.665

StreamPlayerException: StreamPlayer requires pygame.  Install first

You can play generated sample using any midi player

Under linux I prefer timidity

In [None]:
!! timidity midi/deepjazz_on_metheny...128_epochs.midi