In [1]:
import keras
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
import matplotlib.pyplot as plt
from keras.callbacks import TensorBoard
from time import time
import os
import pypianoroll as pr
import numpy as np
import random

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
#load midi files with melody tracks
mel_roll = []
def load_mel_midi():
     for root, dirs, files in os.walk("./midi_with_mel/", topdown=False):
        for name in files:
            full_name = os.path.join(root,name)
            if full_name.endswith('.mid'):
                temp = pr.parse(full_name)
                temp.name = name.split('.')[0]
                mel_roll.append(temp)
load_mel_midi()

In [None]:
#cleanse out midi files with multiple melody tracks and store into mel_roll
def cleanse_multi_mel(mel_roll):
    multi = []
    cleansed_mel_roll = []
    melody_track = 0
    for song in mel_roll:
        for track in song.tracks:
            if track.name.lower() == 'melody':
                melody_track = melody_track+1
                if melody_track>=2:
                    multi.append(song)
                    break
        if melody_track <= 1:
            cleansed_mel_roll.append(song)
        melody_track = 0 
    return cleansed_mel_roll
    print(len(cleansed_mel_roll))


mel_roll = cleanse_multi_mel(mel_roll)
print(len(mel_roll))

In [21]:
#sample 150 songs as training data and the rest as testing data
x_train_temp = random.sample(mel_roll,150)
x_test_temp = [x for x in mel_roll if x not in x_train_temp]
def parsed_data(x_data):
    
    parsed_x = []
    parsed_y = []
    
    #creating training data
    for song in x_data:
        n = 0
        for x in range(int(song.tracks[0].pianoroll.shape[0]/(4*96))):
            for track in song.tracks:
                temp = np.array(track.pianoroll[n:n+(4*96),:]).swapaxes(0,1)
                parsed_x.append(temp)
                if track.name.lower()=="melody":
                    parsed_y.append(1)
                else:
                    parsed_y.append(0)
            n = n+ (4*96)

    parsed_x = np.array(parsed_x)
    parsed_y = np.array(parsed_y)
    


    return parsed_x, parsed_y

x_train, y_train = parsed_data(x_train_temp)
x_test, y_test = parsed_data(x_test_temp)

In [22]:
img_rows = 128
img_cols = (4*96)
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)

(29137, 128, 500, 1)
(128, 500, 1)
(29137,)
[1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0]


In [23]:
print(x_train.shape)
print(x_train[0])
print(x_train[0].shape)
print(y_train.shape)
print(y_train[0:200])

(29137, 128, 500, 1)
[[[0]
  [0]
  [0]
  ...
  [0]
  [0]
  [0]]

 [[0]
  [0]
  [0]
  ...
  [0]
  [0]
  [0]]

 [[0]
  [0]
  [0]
  ...
  [0]
  [0]
  [0]]

 ...

 [[0]
  [0]
  [0]
  ...
  [0]
  [0]
  [0]]

 [[0]
  [0]
  [0]
  ...
  [0]
  [0]
  [0]]

 [[0]
  [0]
  [0]
  ...
  [0]
  [0]
  [0]]]
(128, 500, 1)
(29137,)
[1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0]


In [25]:
classifier = Sequential()
classifier.add(Conv2D(8, (32, 32), input_shape = (128,(4*96),1), activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (2, 2)))
classifier.add(Conv2D(16, (10,10), activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (2, 2)))
classifier.add(Flatten())
classifier.add(Dense(units = 64, activation = 'relu'))
classifier.add(Dense(units = 32, activation = 'relu'))
classifier.add(Dense(units = 1, activation = 'sigmoid'))
classifier.compile(loss=keras.losses.binary_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
tensorboard = TensorBoard(log_dir="logs/{}".format(time()))
history = classifier.fit(x_train, y_train, shuffle=True, batch_size=25, epochs=8, 
                         verbose=1, callbacks = [tensorboard],)

Epoch 1/8
   50/29137 [..............................] - ETA: 3:50:40 - loss: 2.6506 - acc: 0.6000

KeyboardInterrupt: 

In [None]:
test_loss, test_acc = classifier.evaluate(x_test, y_test)

print('Test accuracy:', test_acc)

In [None]:
from keras.utils import plot_model
plot_model(classifier, to_file='model.png')

# serialize model to JSON
model_json = classifier.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
classifier.save_weights("model.h5")
print("Saved model to disk")

In [30]:
from keras.models import model_from_json
# load YAML and create model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")
print("Loaded model from disk")
loaded_model.compile(loss=keras.losses.binary_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
test_loss, test_acc = loaded_model.evaluate(x_test, y_test)

print('Test accuracy:', test_acc)

Loaded model from disk
 1376/27080 [>.............................] - ETA: 44:58

KeyboardInterrupt: 