In [1]:
import pygame
import tensorflow as tf
from tensorflow.contrib.rnn import RNNCell
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import math_ops
from func.jupyter_tensorboard import show_graph

from func.midi_to_statematrix import *
from func.data import *
import func.multi_training



import os
#import cPickle as pickle
import pickle
import signal
import numpy as np
import random

path = os.getcwd()



In [2]:
def play_music(music_file):
    """
    stream music with mixer.music module in blocking manner
    this will stream the sound from disk while playing
    """
    clock = pygame.time.Clock()
    try:
        pygame.mixer.music.load(music_file)
        print("Music file %s loaded!" % music_file)
    except pygame.error:
        print("File %s not found! (%s)" % (music_file, pygame.get_error()))
        return
    pygame.mixer.music.play()
    while pygame.mixer.music.get_busy():
        # check if playback has finished
        
        clock.tick(30)
        pygame.quit()

freq = 44100    # audio CD quality
bitsize = -16   # unsigned 16 bit
channels = 2    # 1 is mono, 2 is stereo
buffer = 1024    # number of samples
pygame.mixer.init(freq, bitsize, channels, buffer)
# optional volume 0 to 1.0
pygame.mixer.music.set_volume(0.8)



In [None]:
play_music(path + '/music_test/' + 'beethoven_opus10_1.mid')

In [3]:
pcs = func.multi_training.loadPieces(path  + '/music_test')

Loaded alb_esp2
Loaded alb_esp5
Loaded appass_2
Loaded appass_3
Loaded bach_846
Loaded bach_847
Loaded bach_850
Loaded beethoven_hammerklavier_1
Loaded beethoven_les_adieux_1
Loaded beethoven_les_adieux_2
Loaded beethoven_opus10_2
Loaded beethoven_opus10_3
Loaded beethoven_opus22_1
Loaded beethoven_opus22_4
Loaded beethoven_opus90_2


In [4]:
batch_width = 10 # number of sequences in a batch
batch_len = 16*8 # length of each sequence
division_len = 16 # interval between possible start locations

def loadPieces(dirpath):

    pieces = {}

    for fname in os.listdir(dirpath):
        if fname[-4:] not in ('.mid','.MID'):
            continue

        name = fname[:-4]

        outMatrix = midiToNoteStateMatrix(os.path.join(dirpath, fname))
        if len(outMatrix) < batch_len:
            continue

        pieces[name] = outMatrix
        print("Loaded {}".format(name))

    return pieces

def getPieceSegment(pieces):
    pcs=pieces.values()
    piece_output = random.choice(list(pcs))
    start = random.randrange(0,len(piece_output)-batch_len,division_len)
    
    # print "Range is {} {} {} -> {}".format(0,len(piece_output)-batch_len,division_len, start)

    seg_out = piece_output[start:start+batch_len]
    seg_in = noteStateMatrixToInputForm(seg_out)

    return seg_in, seg_out

def getPieceBatch(pieces):
    i,o = zip(*[getPieceSegment(pieces) for _ in range(batch_width)])
    return numpy.array(i), numpy.array(o)

In [5]:
def trainPiece(model,pieces,epochs,start=0):
    stopflag = [False]
    def signal_handler(signame, sf):
        stopflag[0] = True
    old_handler = signal.signal(signal.SIGINT, signal_handler)
    for i in range(start,start+epochs):
        if stopflag[0]:
            break
        error = model.update_fun(*getPieceBatch(pieces))
        if i % 100 == 0:
            print("epoch {}, error={}".format(i,error))
        if i % 500 == 0 or (i % 100 == 0 and i < 1000):
            xIpt, xOpt = map(numpy.array, getPieceSegment(pieces))
            noteStateMatrixToMidi(numpy.concatenate((numpy.expand_dims(xOpt[0], 0), model.predict_fun(batch_len, 1, xIpt[0])), axis=0),'output/sample{}'.format(i))
            pickle.dump(model.learned_config,open('output/params{}.p'.format(i), 'wb'))
    signal.signal(signal.SIGINT, old_handler)

In [6]:
def reshape_time(x):
    
    input_slice = x[:,0:-1]
    n_batch, n_time, n_note, n_ipn = input_slice.shape
        
    # time_inputs is a matrix (time, batch/note, input_per_note)
    time_inputs = input_slice.transpose((1,0,2,3)).reshape((n_time,n_batch*n_note,n_ipn))
    return time_inputs,n_batch,n_time,n_note,n_ipn

In [7]:
song={}
song['beethoven_hammerklavier_1']=pcs['beethoven_hammerklavier_1']

In [8]:
a,b,c,d = [0,1,2,3]

In [96]:
def Model(t_layer_sizes,p_layer_sizes,xs,ys):

    
    #xs
    input_slice = xs[:,0:-1]
    
    n_batch, n_time, n_note, n_ipn = tf.shape(input_slice)[0], tf.shape(input_slice)[1],tf.shape(input_slice)[2],input_slice.get_shape().as_list()[3]
    
    input_slice = tf.reshape(tf.transpose(input_slice,(1,0,2,3)),(n_time,n_batch*n_note,n_ipn))
    
    
    
    t_input_size = 80
    lstm_time=[]
    for i in t_layer_sizes:
        lstm_time.append(tf.contrib.rnn.LSTMCell(i))

    time_model=tf.contrib.rnn.MultiRNNCell(lstm_time)        
    init_state_time=time_model.zero_state(tf.shape(ys)[0],tf.float32)
    
    
    
    #output, self.final_state = tf.nn.dynamic_rnn(...)


    
    
    
    with tf.variable_scope('lstm1'):
        #tf.get_variable
        outputs_time,final_state_time=tf.nn.dynamic_rnn(time_model, input_slice, dtype = tf.float32)
        for one_lstm_cell in lstm_time:
            one_kernel, one_bias = one_lstm_cell.variables
            # I think TensorBoard handles summaries with the same name fine.
            tf.summary.histogram("Kernel-time", one_kernel)
            tf.summary.histogram("Bias-time", one_bias)    
    p_input_size = t_layer_sizes[-1] + 2

    # Transpose to be (note, batch/time, hidden_states)
    n_hidden = outputs_time.get_shape().as_list()[2]
   
    time_final = tf.reshape(tf.transpose(tf.reshape(outputs_time,(n_time,n_batch,n_note,n_hidden)),(2,1,0,3)),(n_note,n_batch*n_time,n_hidden))
    
    #ys:
    
        
    start_note_values = tf.zeros([1,tf.shape(time_final)[1] ,2], tf.float32)
    
    correct_choices = tf.reshape(tf.transpose(ys[:,1:,0:-1,:],(2,0,1,3)),(n_note-1,n_batch*n_time,2))
    
    note_choices_inputs = tf.concat([start_note_values, correct_choices], axis=0)
    
    
    note_inputs = tf.concat( [time_final, note_choices_inputs], axis=2)
    num_timebatch = note_inputs.shape[1]
    
    
    # note_choices_inputs represents the last chosen note. Starts with [0,0], doesn't include last note.
    # In (note, batch/time, 2) format
    # Shape of start is thus (1, N, 2), concatenated with all but last element of output_mat transformed to (x, N, 2)
    ##start_note_values = T.alloc(np.array(0,dtype=np.int8), 1, tf.shape(time_final)[1], 2 )
    ##correct_choices = self.output_mat[:,1:,0:-1,:].transpose((2,0,1,3)).reshape((n_note-1,n_batch*n_time,2))
    ##note_choices_inputs = T.concatenate([start_note_values, correct_choices], axis=0)
        
    # Together, this and the output from the last LSTM goes to the new LSTM, but rotated, so that the batches in
    # one direction are the steps in the other, and vice versa.
    ##note_inputs = T.concatenate( [time_final, note_choices_inputs], axis=2 )
    ##num_timebatch = note_inputs.shape[1]
        
    
    
    
    
    lstm_pitch=[]

    for i in p_layer_sizes:
        lstm_pitch.append(tf.contrib.rnn.LSTMCell(i))
    lstm_pitch.append(tf.contrib.rnn.LSTMCell(2))


    pitch_model=tf.contrib.rnn.MultiRNNCell(lstm_pitch)
    
    init_state_pitch=pitch_model.zero_state(tf.shape(note_inputs)[0],tf.float32)
    with tf.variable_scope('lstm2'):
        outputs_pitch,final_state_pitch=tf.nn.dynamic_rnn(pitch_model,note_inputs,dtype = tf.float32)
        for one_lstm_cell in lstm_pitch:
            one_kernel, one_bias = one_lstm_cell.variables
            # I think TensorBoard handles summaries with the same name fine.
            
            tf.summary.histogram("Kernel-pitch", one_kernel)
            tf.summary.histogram("Bias-pitch", one_bias)    
        #variables_names =[v.name for v in tf.trainable_variables()]
        #values = sess.run(variables_names)
                
        #for k,v in zip(variables_names, values):
            #tf.summary.histogram('parameters/{}'.format(k),v)
    #note_final = get_last_layer(note_result).reshape((n_note,n_batch,n_time,2)).transpose(1,2,0,3)
    outputs_pitch = tf.transpose(tf.reshape(outputs_pitch,(n_note,n_batch,n_time,2)),(1,2,0,3)) 
    
    #ys is output_mat
    #outputs_pitch is note_final
    
    #ce=tf.multiply(-1.0,tf.reduce_mean(tf.log(tf.add(tf.add(tf.add(tf.multiply(2.0, tf.cast(outputs_pitch*ys[:,1:],tf.float32)) ,tf.cast(outputs_pitch,tf.float32)), tf.cast(ys[:,1:],tf.float32)),  1.0) )))
    
    
    #2*outputs_pitch*ys[:,1:] - outputs_pitch - ys[:,1:] + 1
    
    #first
    term_1=tf.multiply(2.0,tf.multiply(tf.sigmoid(outputs_pitch),ys[:,1:]))
    term_2=tf.multiply(-1.0,tf.sigmoid(outputs_pitch))
    term_3=tf.multiply(-1.0,ys[:,1:])
    
    term_4=tf.add(term_1,term_2)
    term_5=tf.add(term_4,term_3)
    term_6=tf.add(1.0,term_5)
    
    term_7=tf.log(term_6)
    term_8=tf.reduce_mean(term_7)
    
    term_9=tf.multiply(-1.0,term_8)
    
    
    
    #ce=tf.reduce_mean(tf.log(tf.add(tf.add(tf.add(tf.multiply(2.0, tf.cast(outputs_pitch*ys[:,1:],tf.float32)) ,tf.multiply(tf.cast(outputs_pitch,tf.float32),-1.0)), tf.multiply( tf.cast(ys[:,1:],tf.float32),-1.0)),  1.0) ))
    #ce=-tf.reduce_mean(tf.log(2*outputs_pitch*ys[:,1:] - outputs_pitch - ys[:,1:] + 1 ))
    return (outputs_pitch,term_9)


def cross_entropy(output, input_y):
    with tf.name_scope('cross_entropy'):
        
        
        ce = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=input_y[:,1:], logits=output))
        
    return ce


def train_step(loss, learning_rate=1e-3):
    with tf.name_scope('train_step'):
        step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

    return step


def evaluate(output, input_y):
    with tf.name_scope('evaluate'):
        pred = tf.argmax(output, axis=1)
        error_num = tf.count_nonzero(pred - tf.cast(input_y, tf.int64), name='error_num')
        
    return error_num

def training(song,t_layer_sizes,p_layer_sizes, pre_trained_model=None):
    
    tf.reset_default_graph()
    # define the variables and parameter needed during training
    with tf.name_scope('inputs'):
        xs = tf.placeholder(tf.float32, [None,None,None, t_input_size])
        ys = tf.placeholder(tf.float32, [None,None,None, 2])
    m= Model(t_layer_sizes,p_layer_sizes,xs,ys) 
    output= m[0]
    
    print(output.shape)
    loss=m[1]
    #loss=cross_entropy(output, ys)
    
    iters = int(np.array(list(song.values())[0]).shape[0] / batch_len)
    #iters=2
    print('number of batches for training: {}'.format(iters))

    step = train_step(loss)
    eve = evaluate(output, ys)

    iter_total = 0
    best_acc = 0
    cur_model_name = 'amadeus'

    epoch=1
    
    with tf.Session() as sess:
        
        merge = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter("log/{}".format(cur_model_name), sess.graph)
        saver = tf.train.Saver()
        
        sess.run(tf.global_variables_initializer())
        #train_writer = tf.summary.FileWriter("log/", graph=tf.get_default_graph())

        # try to restore the pre_trained
        if pre_trained_model is not None:
            try:
                print("Load the model from: {}".format(pre_trained_model))
                saver.restore(sess, 'model_save/{}'.format(pre_trained_model))
            except Exception:
                print("Load model Failed!")
                pass

        for epc in range(epoch):
            print("epoch {} ".format(epc + 1))

            for itr in range(iters):
   
                
                
                
                training_batch_x,training_batch_y= map(numpy.array, getPieceBatch(song))
                #xtrain,n_batch,n_time,n_note,n_ipn=reshape_time(training_batch_x)
                #ytrain,_,_,_,_=reshape_time(training_batch_y)
                _, cur_loss = sess.run([step, loss], feed_dict={xs: training_batch_x, ys:training_batch_y})
                #out=sess.run(output, feed_dict={xs: training_batch_x, ys: training_batch_y})
                
                #variables_names =[v.name for v in tf.trainable_variables()]
                #print(variables_names)
                    
                merge_result=sess.run(merge, feed_dict={xs: training_batch_x, ys: training_batch_y})
                
                
                
                
                saver.save(sess,'model_save/{}'.format(cur_model_name))
                
                
                
                train_writer.add_summary(merge_result,itr)
               
                
                print(cur_loss)
                
                if itr==iters-1:
                    out=sess.run(output, feed_dict={xs: training_batch_x, ys: training_batch_y})
                    
                    
                    
                    
                    #u=noteStateMatrixToMidi(out, name="output")
                    
                    return out
                    
                    
                    
                



In [97]:
t_input_size=80

In [98]:
a=training(song,[300,300],[100,50], pre_trained_model=None)

(?, ?, ?, 2)
number of batches for training: 33
epoch 1 
0.716203
0.7066
0.697048
0.68673
0.6774
0.668691
0.659611
0.652213
0.64457
0.637618
0.631845
0.626535
0.62153
0.616815
0.612021
0.608712
0.604914
0.601328
0.596423
0.591237
0.585229
0.582174
0.578902
0.577721
0.575391
0.573682
0.571926
0.570089
0.568151
0.567584
0.566515
0.565727
0.56282


In [122]:
a[9][10]

array([[-0.25056762, -0.40417114],
       [-0.2394416 , -0.40448803],
       [-0.22346891, -0.40092903],
       [-0.20804687, -0.39344454],
       [-0.18789391, -0.3820577 ],
       [-0.1728268 , -0.36757389],
       [-0.16255133, -0.35132882],
       [-0.15626599, -0.33480337],
       [-0.15750639, -0.31893617],
       [-0.16328757, -0.30327019],
       [-0.1728562 , -0.28979069],
       [-0.18288557, -0.2778641 ],
       [-0.19437385, -0.2663919 ],
       [-0.20668656, -0.26051664],
       [-0.21709837, -0.25710186],
       [-0.22817586, -0.25540087],
       [-0.23419763, -0.25717551],
       [-0.24271592, -0.2617335 ],
       [-0.24933326, -0.26824287],
       [-0.25568137, -0.27412373],
       [-0.26160192, -0.28287664],
       [-0.26658174, -0.29250243],
       [-0.27207205, -0.30091143],
       [-0.27575022, -0.30839542],
       [-0.27914846, -0.31374696],
       [-0.2825343 , -0.32109359],
       [-0.28445145, -0.32724613],
       [-0.27919963, -0.32580498],
       [-0.28744939,

In [88]:
for i in range(u.shape[0]):
    for j in range(u.shape[1]):
        for k in range(u.shape[2]):
            if (u[i][j][k]>=-0.01):
                u[i][j][k]=int(1)
            else:
                u[i][j][k]=0

In [90]:
np.max(u)

0.0

In [95]:
def convert_predictions(output):
    print(1)
    

In [96]:
convert_predictions(1)

1


In [19]:
for i in range(a.shape[0]):
    
    noteStateMatrixToMidi(a[i], name="output"+str(i))
    

In [29]:
a[1][121]

array([[ 0.37040362,  0.40908483],
       [ 0.36755195,  0.40925404],
       [ 0.36170253,  0.40837231],
       [ 0.3560617 ,  0.40561283],
       [ 0.34812027,  0.40247095],
       [ 0.34115115,  0.39772391],
       [ 0.33381495,  0.39364222],
       [ 0.3277202 ,  0.38734758],
       [ 0.32576615,  0.38102186],
       [ 0.32767773,  0.37218866],
       [ 0.33295134,  0.36143649],
       [ 0.33928391,  0.35086879],
       [ 0.34780291,  0.34026527],
       [ 0.35637474,  0.32949015],
       [ 0.36285746,  0.32135764],
       [ 0.37163585,  0.31198409],
       [ 0.37798977,  0.30646452],
       [ 0.38574812,  0.2993013 ],
       [ 0.39108843,  0.29767367],
       [ 0.3939552 ,  0.29714197],
       [ 0.3963429 ,  0.30069649],
       [ 0.39912614,  0.30352744],
       [ 0.40130192,  0.30564278],
       [ 0.40158674,  0.31167349],
       [ 0.40207464,  0.31722325],
       [ 0.4016588 ,  0.32252705],
       [ 0.39989945,  0.33097124],
       [ 0.40070662,  0.33540243],
       [ 0.39871579,

In [None]:
# show the graph
from func.jupyter_tensorboard import show_graph 
tf.reset_default_graph()
with tf.Session() as sess:
    saver = tf.train.import_meta_graph('model_save/amadeus.meta')
    graph = tf.get_default_graph()
    show_graph(graph)