In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
cd /content/drive/MyDrive/MusicNotebook/

In [None]:
!apt-get install swig

In [None]:
cd python3-midi/

In [None]:
!python setup.py install

In [None]:
cd ../

In [11]:
import numpy as np
import pandas as pd
import msgpack
import glob
import tensorflow as tf
from tensorflow.python.ops import control_flow_ops
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
import time
from tensorflow import keras
from tensorflow.keras import layers

import midi_manipulation


%matplotlib inline

In [None]:
%tensorflow_version 1.x

In [14]:
def get_songs(path):
    files = glob.glob('{}/*.mid*'.format(path))
    print(files)
    songs = []
    for f in tqdm(files):
        print(f)
        song = np.array(midi_manipulation.midiToNoteStateMatrix(f))
        if np.array(song).shape[0] > 50:
             songs.append(song)
             print('gottcha')
        else:
             print(np.array(song).shape[0], 'no good')   
    return songs

In [None]:
songs = get_songs('/content/drive/MyDrive/MusicNotebook/mozart') #These songs have already been converted from midi to msgpack
print("{} songs processed".format(len(songs)))


In [None]:
lowest_note = midi_manipulation.lowerBound #the index of the lowest note on the piano roll
highest_note = midi_manipulation.upperBound #the index of the highest note on the piano roll
note_range = highest_note-lowest_note #the note range

num_timesteps  = 4 #64 #32 #16 #This is the number of timesteps that we will create at a time  (16 = one bar)
n_visible      = 2*note_range*num_timesteps #This is the size of the visible layer. 
n_hidden       = 50 #50 #This is the size of the hidden layer

In [None]:
#x  = tf.placeholder(tf.float32, [None, n_visible], name="x") #The placeholder variable that holds our data
#W  = tf.Variable(tf.random_normal([n_visible, n_hidden], 0.01), name="W") #The weight matrix that stores the edge weights
#bh = tf.Variable(tf.zeros([1, n_hidden],  tf.float32, name="bh")) #The bias vector for the hidden layer
#bv = tf.Variable(tf.zeros([1, n_visible],  tf.float32, name="bv")) #The bias vector for the visible layer

In [None]:
z_dim = n_hidden #100
X_dim = n_visible #mnist.train.images.shape[1]
h_dim = n_hidden #128

print(X_dim)

In [None]:
def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)

In [None]:
X = tf.placeholder(tf.float32, shape=[None, X_dim], name="X")
z = tf.placeholder(tf.float32, shape=[None, z_dim], name="z")

Q_b1 = tf.Variable(tf.zeros(shape=[h_dim]), name="Q_b1")
Q_W1 = tf.Variable(xavier_init([X_dim, h_dim]), name="Q_W1")

Q_W2_mu = tf.Variable(xavier_init([h_dim, z_dim]), name="Q_W2_mu")
Q_b2_mu = tf.Variable(tf.zeros(shape=[z_dim]), name="Q_b2_mu")

Q_W2_sigma = tf.Variable(xavier_init([h_dim, z_dim]),name="Q_W2_sigma")
Q_b2_sigma = tf.Variable(tf.zeros(shape=[z_dim]),name="Q_b2_sigma")


In [None]:
#def gaussian_noise_layer(input_layer, std):
#    noise = tf.random_normal(shape=tf.shape(input_layer), mean=0.0, stddev=std, dtype=tf.float32) 
#    return input_layer + noise
#noise = gaussian_noise_layer(X,1)
#X = tf.add(X, noise)



In [None]:
# Gaussian Noise
gaussian_noise_layer = tf.random_normal(shape=tf.shape(X), mean=0.0, stddev=1.0, dtype=tf.float32) 
X = tf.add(X, gaussian_noise_layer)

In [None]:
def Q(X):
    h = tf.nn.relu(tf.matmul(X, Q_W1) + Q_b1)
    z_mu = tf.matmul(h, Q_W2_mu) + Q_b2_mu
    z_logvar = tf.matmul(h, Q_W2_sigma) + Q_b2_sigma
    return z_mu, z_logvar


def sample_z(mu, log_var):
    eps = tf.random_normal(shape=tf.shape(mu))
    return mu + tf.exp(log_var / 2) * eps

In [None]:
#mean, var = tf.nn.moments(X, axes=[1])
#print(mean)
#print(var)

In [None]:
# =============================== P(X|z) ======================================

P_W1 = tf.Variable(xavier_init([z_dim, h_dim]), name="P_W1")
P_b1 = tf.Variable(tf.zeros(shape=[h_dim]), name="P_b1")

P_b2 = tf.Variable(tf.zeros(shape=[X_dim]), name="P_b2")
P_W2 = tf.Variable(xavier_init([h_dim, X_dim]), name="P_W2")


# output value of decoder, both with linear output unit and logit unit
def P(z):
    h = tf.nn.relu(tf.matmul(z, P_W1) + P_b1)
    logits = tf.matmul(h, P_W2) + P_b2
    prob = tf.nn.sigmoid(logits)
    return prob, logits

In [None]:
z_mu, z_logvar = Q(X)
z_sample = sample_z(z_mu, z_logvar)
_, logits = P(z_sample)

# Sampling from random z
X_samples, _ = P(z)


# E[log P(X|z)]
recon_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=X), 1)
# D_KL(Q(z|X) || P(z|X)); calculate in closed form as both dist. are Gaussian
kl_loss = 0.5 * tf.reduce_sum(tf.exp(z_logvar) + z_mu**2 - 1. - z_logvar, 1)
# VAE loss
vae_loss = tf.reduce_mean(recon_loss + kl_loss)

solver = tf.train.AdamOptimizer().minimize(vae_loss) #check learning rate

#saver = tf.train.Saver()

In [None]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

In [None]:
num_epochs = 20000 #The number of training epochs that we are going to run. For each epoch we go through the entire data set.
batch_size = 100 #The number of training examples that we are going to send through the model at a time. 
# lr         = tf.constant(0.005, tf.float32) #The learning rate of our model

i = 0
loss_value = np.array([])
iter_value = np.array([])
songs = [songs[0]]
while i <= num_epochs:
    start_time = time.time()
    for song in songs:
        # The songs are stored in a time x notes format. The size of each song is timesteps_in_song x 2*note_range
        # Here we reshape the songs so that each training example is a vector with num_timesteps x 2*note_range elements
        song = np.array(song)
        song = song[:np.floor(song.shape[0]/num_timesteps).astype(int)*num_timesteps]
        song = np.reshape(song, [int(song.shape[0]/num_timesteps), song.shape[1]*num_timesteps])

        # Train the VAE on batch_size examples at a time
        for ind in range(0, len(song), batch_size): 
            X_mb = song[ind:ind+batch_size]
            _, loss = sess.run([solver, vae_loss], feed_dict={X: X_mb})
    
    
    if i % 100 == 0:
        end_time = time.time()
        log_str = '[Iter {}] '.format(i)
        log_str += '[Loss {}]'.format(loss)
        timeTaken = (end_time - start_time) * 100
        log_str += '({:.3f} sec / 100 epoch)'.format(timeTaken)
        print(log_str)
        iter_value = np.append(iter_value, i)
            
        loss_value = np.append(loss_value, loss)

        print(iter_value)
        print(loss_value)
        plt.figure(figsize=(8, 8))
        plt.plot(iter_value, loss_value, 'o-')
        
        plt.show() 
        
    if i % 1000 == 0:
        samples = sess.run(X_samples, feed_dict={z: np.random.randn(1,z_dim)})
        S = np.reshape(samples, (num_timesteps, 2*note_range))
        thresh_S = S>=0.5
        plt.figure(figsize=(12,2))
        plt.subplot(1,2,1)
        plt.imshow(S)
        plt.subplot(1,2,2)
        plt.imshow(thresh_S)
        plt.tight_layout()
        plt.pause(0.1)
        #midi_manipulation.noteStateMatrixToMidi(thresh_S, "out/generated_chord_{}".format(i))
#                 print(i)
        
       
    i += 1
    

In [None]:
from scipy.optimize import curve_fit
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import FunctionTransformer

In [None]:
x_samp = iter_value
y_samp = loss_value

In [None]:
print(y_samp)
y_samp = np.delete(y_samp, 0)
print(y_samp.size)


In [None]:
print(x_samp.size)
x_samp = np.delete(x_samp, 149)
print(x_samp.size)

In [None]:
def func( x_samp, a, b, c, d ):
	return a*np.log( b*x_samp + c ) + d


yn = y_samp + 0.2*np.random.normal(size=len(x_samp))

popt, pcov = curve_fit(func, x_samp, yn)


plt.figure(figsize=(15, 15))
plt.plot(x_samp, yn, 'bo', label="Original Noised Data")
plt.plot(x_samp, func(x_samp, *popt), 'r-', label="Fitted Curve")
plt.xlabel("Batch Iteration")
plt.ylabel("Loss Values with Noise")
plt.legend()
plt.show()

In [None]:
import statistics
def variance(data):
  n = len(data)
  mean = sum(data) / n
  deviations = [(x - mean) ** 2 for x in data]
  variance = sum(deviations) / n
  return variance


In [None]:
statistics.stdev(loss_value)

In [None]:
saver = tf.train.Saver()

#Save just in case
mname = 'model'+'_h'+str(n_hidden)+'nt'+str(num_timesteps)
save_path = saver.save(sess, "./tmp/"+mname+".ckpt")
print("Model saved in path: %s" % save_path)

## Training Ended
# Generative Part Starts Here

In [None]:
#tf.reset_default_graph()
#imported_graph = tf.train.import_meta_graph("./tmp/"+mname+".ckpt.meta")

#saver = tf.train.Saver()
#restore_path = saver.restore(sess, "./tmp/"+mname+".ckpt")

#sess = tf.Session()
#imported_graph.restore(sess, tf.train.latest_checkpoint("./tmp/")
#imported_graph.restore(sess, "./tmp/"+mname+".ckpt")

In [None]:
#def gaussian_noise_layer(input_layer, std):
 #   noise = tf.random_normal(shape=tf.shape(input_layer), mean=0.0, stddev=std, dtype=tf.float32) 
 #   return input_layer + noise
#
#noise = gaussian_noise_layer(z, 49)
#noise.eval(session=tf.Session(), feed_dict={z: np.zeros((4, 50))})

In [None]:
bars = int(25*16/num_timesteps)
print(bars, z_dim)
samples = sess.run(X_samples, feed_dict={z: np.random.randn(bars,z_dim)})
S = np.reshape(samples, (bars*num_timesteps, 2*note_range))
thresh_S = S>=0.5 #0.5 # threshold
midi_manipulation.noteStateMatrixToMidi(thresh_S, "out/generated_chord_long")

In [None]:
from mpl_toolkits.mplot3d import axes3d

In [None]:
vn = 1
q = "/content/drive/MyDrive/MusicNotebook/haydn/hob1"+".mid"
# q = "/content/drive/MyDrive/naruto/n2.mid"
querysong = np.array(midi_manipulation.midiToNoteStateMatrix(q))
print(np.shape(querysong))
plt.figure(figsize=(20, 20))
plt.imshow(querysong.T)

In [None]:
song = np.array(querysong)
zeropadsong = np.zeros(((np.floor(song.shape[0]/num_timesteps).astype(int)+1)*num_timesteps, song.shape[1]))
zeropadsong[:song.shape[0],:song.shape[1]] = song
#song = song[:(np.floor(song.shape[0]/num_timesteps).astype(int)+1)*num_timesteps]
song = np.reshape(zeropadsong, [int(song.shape[0]/num_timesteps)+1, song.shape[1]*num_timesteps])
print(np.shape(song))

decode_bars = np.shape(song)[0]
S_reconstruct = np.reshape(song, (decode_bars*num_timesteps, 2*note_range))

midi_manipulation.noteStateMatrixToMidi(S_reconstruct, "out/song_reconstruct"+"_vf"+str(vn))   

In [None]:
# Encode the VAE on query
# These are the functions for Encoding-Decoding
# Encoding: Reading from Data to get z_sample
#z_mu, z_logvar = Q(X)
#z_sample = sample_z(z_mu, z_logvar)
# Decoding: Sampling from z
#X_samples, _ = P(z)

Xq = song
zs = True
if zs:
    zq_sample = sess.run(z_sample, feed_dict={X: Xq})
    ztype = "_zs"
else: #using the mean instead of sampling: 
    zq_sample = sess.run(z_mu, feed_dict={X: Xq})  #Check why this causes breaks / missing values in the output
    ztype = "_zmu"
print(np.shape(zq_sample))

In [None]:
# Decode with the z from query
samples = sess.run(X_samples, feed_dict={z: zq_sample})

S = np.reshape(samples, (decode_bars*num_timesteps, 2*note_range))
thresh_S = S>=0.857 #0.857 #0.5
fout = "generated_query"+'_h'+str(n_hidden)+'_nt'+str(num_timesteps)+str(ztype)+"_vf"+str(vn)
print(fout)
midi_manipulation.noteStateMatrixToMidi(thresh_S, "out/"+fout)   

In [None]:
plt.figure(figsize=(30,10))
plt.imshow(np.log(np.abs(zq_sample)))
print(np.shape(zq_sample))