# Main notebook - training the model and outputting midi files

##  Importing functions / tf packages

In [22]:
import pygame
import tensorflow as tf
from tensorflow.contrib.rnn import RNNCell
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from func.jupyter_tensorboard import show_graph
from func.midi_to_statematrix import *
from func.data import *
import os
import pickle
import signal
import numpy as np
import random
path = os.getcwd()

## Functions to save and import models 

In [3]:
def save_obj(obj, name):
    WD = os.getcwd()
    with open(WD +'/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
        

def load_obj(name):
    WD = os.getcwd()
    with open(WD + '/'+ name + '.pkl', 'rb') as f:
        return pickle.load(f,encoding='latin1')

## From midi to inputs 

We first define the batch width (number of sequences in a batch), the batch length (length of each sequence) and the interval between possible start locations. Then, We use Johnson's functions (loadPieces getPieceSegment and getPieceBatch) to transform a midi file in an input ready to be trained on the model.

In [6]:
batch_width = 10 # number of sequences in a batch
batch_len = 16*8 # length of each sequence
division_len = 16 # interval between possible start locations

In [7]:
def loadPieces(dirpath):

    pieces = {}

    for fname in os.listdir(dirpath):
        if fname[-4:] not in ('.mid','.MID'):
            continue

        name = fname[:-4]

        outMatrix = midiToNoteStateMatrix(os.path.join(dirpath, fname))
        if len(outMatrix) < batch_len:
            continue

        pieces[name] = outMatrix
        print("Loaded {}".format(name))

    return pieces

def getPieceSegment(pieces):
    pcs=pieces.values()
    piece_output = random.choice(list(pcs))
    start = random.randrange(0,len(piece_output)-batch_len,division_len)
    
    # print "Range is {} {} {} -> {}".format(0,len(piece_output)-batch_len,division_len, start)

    seg_out = piece_output[start:start+batch_len]
    seg_in = noteStateMatrixToInputForm(seg_out)

    return seg_in, seg_out

def getPieceBatch(pieces):
    i,o = zip(*[getPieceSegment(pieces) for _ in range(batch_width)])
    return numpy.array(i), numpy.array(o)

## Downloading the midi files

In [8]:
pcs = loadPieces(path  + '/music_test')

Loaded alb_esp2
Loaded alb_esp5
Loaded appass_2
Loaded appass_3
Loaded bach_846
Loaded bach_847
Loaded bach_850
Loaded beethoven_hammerklavier_1
Loaded beethoven_les_adieux_1
Loaded beethoven_les_adieux_2
Loaded beethoven_opus10_2
Loaded beethoven_opus10_3
Loaded beethoven_opus22_1
Loaded beethoven_opus22_4
Loaded beethoven_opus90_2


## Defining the model architecture 

Next, we define the model architecture through the Model function. We also define other subfunctions to set the optimization process.

In [9]:
#We choose one file to train the model on
song={}
song['beethoven_hammerklavier_1']=pcs['beethoven_hammerklavier_1']

In [16]:
def Model(t_layer_sizes,p_layer_sizes,xs,ys):

    
    #xs
    input_slice = xs[:,0:-1]
    
    n_batch, n_time, n_note, n_ipn = tf.shape(input_slice)[0], tf.shape(input_slice)[1],tf.shape(input_slice)[2],input_slice.get_shape().as_list()[3]
    
    input_slice = tf.reshape(tf.transpose(input_slice,(1,0,2,3)),(n_time,n_batch*n_note,n_ipn))
    
    
    
    #Defining the first lstm 
    
    t_input_size = 80
    lstm_time=[]
    for i in t_layer_sizes:
        lstm_time.append(tf.contrib.rnn.LSTMCell(i))

    time_model=tf.contrib.rnn.MultiRNNCell(lstm_time)        
    init_state_time=time_model.zero_state(tf.shape(ys)[0],tf.float32)
    
    
    with tf.variable_scope('lstm1'):
        #tf.get_variable
        outputs_time,final_state_time=tf.nn.dynamic_rnn(time_model, input_slice, dtype = tf.float32)
        for one_lstm_cell in lstm_time:
            one_kernel, one_bias = one_lstm_cell.variables
            # I think TensorBoard handles summaries with the same name fine.
            tf.summary.histogram("Kernel-time", one_kernel)
            tf.summary.histogram("Bias-time", one_bias)    
    p_input_size = t_layer_sizes[-1] + 2

    # Transpose to be (note, batch/time, hidden_states)
    n_hidden = outputs_time.get_shape().as_list()[2]
   
    time_final = tf.reshape(tf.transpose(tf.reshape(outputs_time,(n_time,n_batch,n_note,n_hidden)),(2,1,0,3)),(n_note,n_batch*n_time,n_hidden))
    
    
    
        
    start_note_values = tf.zeros([1,tf.shape(time_final)[1] ,2], tf.float32)
    correct_choices = tf.reshape(tf.transpose(ys[:,1:,0:-1,:],(2,0,1,3)),(n_note-1,n_batch*n_time,2))
    note_choices_inputs = tf.concat([start_note_values, correct_choices], axis=0)    
    note_inputs = tf.concat( [time_final, note_choices_inputs], axis=2)
    num_timebatch = note_inputs.shape[1]

    #Defining the second lstm
    
    lstm_pitch=[]

    for i in p_layer_sizes:
        lstm_pitch.append(tf.contrib.rnn.LSTMCell(i))
    lstm_pitch.append(tf.contrib.rnn.LSTMCell(2))


    pitch_model=tf.contrib.rnn.MultiRNNCell(lstm_pitch)
    
    init_state_pitch=pitch_model.zero_state(tf.shape(note_inputs)[0],tf.float32)
    with tf.variable_scope('lstm2'):
        outputs_pitch,final_state_pitch=tf.nn.dynamic_rnn(pitch_model,note_inputs,dtype = tf.float32)
        for one_lstm_cell in lstm_pitch:
            one_kernel, one_bias = one_lstm_cell.variables
            
            
            tf.summary.histogram("Kernel-pitch", one_kernel)
            tf.summary.histogram("Bias-pitch", one_bias)    
     
    outputs_pitch = tf.transpose(tf.reshape(outputs_pitch,(n_note,n_batch,n_time,2)),(1,2,0,3)) 
 
    
    #Defining the cost function with tf operations
    
    term_1=tf.multiply(2.0,tf.multiply(tf.sigmoid(outputs_pitch),ys[:,1:]))
    term_2=tf.multiply(-1.0,tf.sigmoid(outputs_pitch))
    term_3=tf.multiply(-1.0,ys[:,1:])
    
    term_4=tf.add(term_1,term_2)
    term_5=tf.add(term_4,term_3)
    term_6=tf.add(1.0,term_5)
    
    term_7=tf.log(term_6)
    term_8=tf.reduce_mean(term_7)
    
    term_9=tf.multiply(-1.0,term_8)
    
    return (outputs_pitch,term_9)


def cross_entropy(output, input_y):
    with tf.name_scope('cross_entropy'):

        ce = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=input_y[:,1:], logits=output))
        
    return ce


def train_step(loss, learning_rate=1e-3):
    with tf.name_scope('train_step'):
        step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

    return step


def evaluate(output, input_y):
    with tf.name_scope('evaluate'):
        pred = tf.argmax(output, axis=1)
        error_num = tf.count_nonzero(pred - tf.cast(input_y, tf.int64), name='error_num')
        
    return error_num





## Training the model 

Now that the architecture is well-defined, we can start training the model through the training function.

In [None]:
def training(song,t_layer_sizes,p_layer_sizes, pre_trained_model=None):
    
    tf.reset_default_graph()
    # Define the variables and parameter needed during training
    with tf.name_scope('inputs'):
        xs = tf.placeholder(tf.float32, [None,None,None, t_input_size])
        ys = tf.placeholder(tf.float32, [None,None,None, 2])
    m= Model(t_layer_sizes,p_layer_sizes,xs,ys) 
    output= m[0]
    
    
    loss=m[1]
    
    
    iters = int(np.array(list(song.values())[0]).shape[0] / batch_len)
    
    print('number of batches for training: {}'.format(iters))

    step = train_step(loss)
    eve = evaluate(output, ys)

    iter_total = 0
    best_acc = 0
    cur_model_name = 'amadeus'

    epoch=1
    
    with tf.Session() as sess:
        
        merge = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter("log/{}".format(cur_model_name), sess.graph)
        saver = tf.train.Saver()
        
        sess.run(tf.global_variables_initializer())
        

        # try to restore the pre_trained
        if pre_trained_model is not None:
            try:
                print("Load the model from: {}".format(pre_trained_model))
                saver.restore(sess, 'model_save/{}'.format(pre_trained_model))
            except Exception:
                print("Load model Failed!")
                pass

        for epc in range(epoch):
            print("epoch {} ".format(epc + 1))

            for itr in range(iters):

                training_batch_x,training_batch_y= map(numpy.array, getPieceBatch(song))
                
                _, cur_loss = sess.run([step, loss], feed_dict={xs: training_batch_x, ys:training_batch_y})
                    
                merge_result=sess.run(merge, feed_dict={xs: training_batch_x, ys: training_batch_y})

                saver.save(sess,'model_save/{}'.format(cur_model_name))

                train_writer.add_summary(merge_result,itr)
               
                
                print(cur_loss)
                

                    
                    
                

In [None]:
a=training(song,[300,300],[100,50], pre_trained_model=None)

## Tensorboard

Let's visualize the results of our training process.

In [None]:
# show the graph
from func.jupyter_tensorboard import show_graph 
tf.reset_default_graph()
with tf.Session() as sess:
    saver = tf.train.import_meta_graph('model_save/amadeus.meta')
    graph = tf.get_default_graph()
    show_graph(graph)

## Generating predictions 

Now that we have a model along with its parameters, we can use it to generate prediction probabilities through the prediction function.

In [17]:
def prediction(song,t_layer_sizes,p_layer_sizes, pre_trained_model=None):
    
    tf.reset_default_graph()
    # define the variables and parameter needed during training
    with tf.name_scope('inputs'):
        xs = tf.placeholder(tf.float32, [None,None,None, t_input_size])
        ys = tf.placeholder(tf.float32, [None,None,None, 2])
    m= Model(t_layer_sizes,p_layer_sizes,xs,ys) 
    output= m[0]
    
    cur_model_name = 'amadeus'
    
    with tf.Session() as sess:
        
        sess.run(tf.global_variables_initializer())
        xIpt, xOpt = map(lambda x: numpy.array(x, dtype='int8'), getPieceSegment(song))                
        saver = tf.train.import_meta_graph('model_save/amadeus.meta')
        saver.restore(sess, tf.train.latest_checkpoint('model_save/'))
                
        feed_dict= {xs : xIpt.reshape([1,128,78,80]), ys : xOpt.reshape([1,128,78,2]) }
        predi=sess.run([output],feed_dict)
        return predi

In [18]:
t_input_size=80

In [19]:
a=prediction(song,[300,300],[100,50], pre_trained_model=None)

INFO:tensorflow:Restoring parameters from model_save/amadeus


## From probabilities to binary outputs 

Finally, we define a few functions to transform the probabilies into binary values.

In [20]:
def transnote(x):
    x[:,0]/=x[:,0].max()
    b=np.percentile(x[:,1],70)
    for i in range(len(x)):
        if (x[i][0]==1):
            x[i][0]=1
            x[i][1]=1*(x[i][1]>b)
            
        else : 
            x[i][0]=0
            x[i][1]=0
    return x
def superpiece(x):
    piece=[]
    for i in range(x.shape[0]):
        piece.append(transnote(a[0][0][i]))
        
    return piece
    

## From output to midi file 

In [21]:
noteStateMatrixToMidi(np.array(superpiece(a[0][0])), name="output"+str('bonus'))