In [1]:
#Baisc libraries
import os
import time
import sys
import random
from math import ceil

#Basic ML libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle

#MIDI file libraries
from mido import MidiFile, Message, MetaMessage, MidiTrack

#Keras libraries
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers.recurrent import LSTM, SimpleRNN
from keras.layers.core import *
from keras.layers.normalization import *
from keras.callbacks import History, EarlyStopping
from keras.layers import TimeDistributed

Using TensorFlow backend.


In [2]:
TIME_PER_TIME_SLICE = 0.02
HIGHEST_NOTE = 81 
LOWEST_NOTE = 33
INPUT_DIM = HIGHEST_NOTE - LOWEST_NOTE + 1
OUTPUT_DIM = HIGHEST_NOTE - LOWEST_NOTE + 1
MICROSECONDS_PER_MINUTE = 60000000

NUM_UNITS = 64
X_SEQ_LENGTH = 50
Y_SEQ_LENGTH = 50
LOSS_FUNCTION = 'categorical_crossentropy'
OPTIMIZER = Adam()
BATCH_SIZE = 64
NUM_EPOCHS = 100

DIR = '/home/suraj/FML/scale_chords_small/midi/'

In [3]:
def midiToPianoroll(filepath):
    midi_data = MidiFile(filepath)
    resolution = midi_data.ticks_per_beat
    
    set_tempo_events = [x for t in midi_data.tracks for x in t if str(x.type) == 'set_tempo']
    tempo = MICROSECONDS_PER_MINUTE/set_tempo_events[0].tempo
    
    ticks_per_time_slice = 1.0 * (resolution * tempo * TIME_PER_TIME_SLICE)/60 

    #find maximum ticks across all tracks
    total_ticks =0
    for t in midi_data.tracks:
        #since ticks represent delta times we need a cumulative sum to get the total ticks in that track
        sum_ticks = 0
        for e in t:
            if str(e.type) == 'note_on' or str(e.type) == 'note_off' or str(e.type) == 'end_of_track':
                sum_ticks += e.time
        if sum_ticks > total_ticks:
            total_ticks = sum_ticks

    time_slices = int(ceil(total_ticks / ticks_per_time_slice))
    piano_roll = np.zeros((INPUT_DIM, time_slices), dtype =int)

    note_states = {}
    for track in midi_data.tracks:
        total_ticks = 0
        for event in track:
            if str(event.type) == 'note_on' and event.velocity > 0:
                total_ticks += event.time
                time_slice_idx = int(total_ticks / ticks_per_time_slice )

                if event.note <= HIGHEST_NOTE and event.note >= LOWEST_NOTE: 
                    note_idx = event.note - LOWEST_NOTE
                    piano_roll[note_idx][time_slice_idx] = 1
                    note_states[note_idx] = time_slice_idx

            elif str(event.type) == 'note_off' or ( str(event.type) == 'note_on' and event.velocity == 0 ):
                note_idx = event.note - LOWEST_NOTE
                total_ticks += event.time
                time_slice_idx = int(total_ticks /ticks_per_time_slice )

                if note_idx in note_states:
                    last_time_slice_index = note_states[note_idx]
                    piano_roll[note_idx][last_time_slice_index:time_slice_idx] = 1
                    del note_states[note_idx]
    return piano_roll.T

In [4]:
def get_data(data_dir):
    pianoroll_data = []
    for file in os.listdir(data_dir):
        filepath = data_dir + "/" + file
        piano_roll = midiToPianoroll(filepath)
        pianoroll_data.append(piano_roll)

    return pianoroll_data

In [5]:
def createSeqNetInputs(pianoroll_data, x_seq_length, y_seq_length):
    x = []
    y = []

    for i,piano_roll in enumerate(pianoroll_data):
        pos = 0
        while pos + x_seq_length + y_seq_length < piano_roll.shape[0]:
            x.append(piano_roll[pos:pos + x_seq_length])
            y.append(piano_roll [pos+ x_seq_length: pos + x_seq_length + y_seq_length])
            pos += x_seq_length

    X = np.array(x)
    Y = np.array(y)

    x_1, y_1 = shuffle(X,Y)

    return x_1, y_1

In [6]:
def createSeqTestNetInputs(pianoroll_data, seq_length):
    x_test = []

    for i,piano_roll in enumerate(pianoroll_data):
        x = []
        pos = 0
        while pos + seq_length < piano_roll.shape[0]:
            x.append(piano_roll[pos:pos + seq_length])
            pos +=10
        x_test.append(np.array(x))

    return np.array(x_test)

In [7]:
def seqNetOutToPianoroll(output, threshold = 0.1):
    piano_roll = []
    for seq_out in output:
        for time_slice in seq_out:
            idx = [i for i,t in enumerate(time_slice) if t > threshold]
            pianoroll_slice = np.zeros(time_slice.shape)
            pianoroll_slice[idx] = 1
            piano_roll.append(pianoroll_slice)

    return np.array(piano_roll)

In [8]:
def pianorollToMidi(piano_roll, filepath): 
    ticks_per_time_slice=1
    tempo = 1/TIME_PER_TIME_SLICE
    resolution = 60*ticks_per_time_slice/(tempo*TIME_PER_TIME_SLICE)

    mid = MidiFile(ticks_per_beat = int(resolution))
    track = MidiTrack()
    mid.tracks.append(track)
    track.append(MetaMessage('set_tempo', tempo = int(MICROSECONDS_PER_MINUTE/tempo), time =0))

    current_state = np.zeros(INPUT_DIM)

    index_of_last_event = 0

    for slice_index, time_slice in enumerate(np.concatenate((piano_roll, np.zeros((1, INPUT_DIM))), axis =0)):
        note_changes = time_slice - current_state

        for note_idx, note in enumerate(note_changes):
            if note == 1:
                note_event = Message('note_on', time = (slice_index - index_of_last_event)*ticks_per_time_slice, velocity = 65, note = note_idx + LOWEST_NOTE )
                track.append(note_event)
                index_of_last_event = slice_index
            elif note == -1:
                note_event = Message('note_off', time = (slice_index - index_of_last_event)*ticks_per_time_slice, velocity = 65, note = note_idx + LOWEST_NOTE )
                track.append(note_event)
                index_of_last_event = slice_index

        current_state = time_slice

    eot = MetaMessage('end_of_track', time=1)
    track.append(eot)

    mid.save(filepath)

In [9]:
def createSeq2Seq_LSTM():
    #encoder
    model = Sequential()
    model.add(LSTM(input_dim = INPUT_DIM, output_dim = NUM_UNITS, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))
    model.add(BatchNormalization())
    model.add(LSTM(NUM_UNITS))

    #decoder
    model.add(RepeatVector(Y_SEQ_LENGTH))
    num_layers= 2
    for _ in range(num_layers):
        model.add(LSTM(NUM_UNITS, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))
        model.add(BatchNormalization())

    model.add(TimeDistributed(Dense(OUTPUT_DIM, activation= 'softmax')))
    return model

In [10]:
def createSeq2Seq_RNN():
    #encoder
    model = Sequential()
    model.add(SimpleRNN(input_dim = INPUT_DIM, output_dim = NUM_UNITS, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))
    model.add(BatchNormalization())
    model.add(SimpleRNN(NUM_UNITS))

    #decoder
    model.add(RepeatVector(Y_SEQ_LENGTH))
    num_layers= 2
    for _ in range(num_layers):
        model.add(SimpleRNN(NUM_UNITS, dropout=0.2, recurrent_dropout=0.2, return_sequences = True))
        model.add(BatchNormalization())

    model.add(TimeDistributed(Dense(OUTPUT_DIM, activation= 'softmax')))
    return model

In [11]:
#Prepare training data
pianoroll_data = get_data(DIR)
input_data, target_data = createSeqNetInputs(pianoroll_data, X_SEQ_LENGTH, Y_SEQ_LENGTH)
input_data = input_data.astype(np.bool)
target_data = target_data.astype(np.bool)

In [12]:
#Training RNN
model_rnn = createSeq2Seq_RNN()
model_rnn.summary()
model_rnn.compile(loss=LOSS_FUNCTION, optimizer = OPTIMIZER)
earlystop = EarlyStopping(monitor='loss', patience= 10, min_delta = 0.01 , verbose=0, mode= 'auto') 
history = History()
hist = model_rnn.fit(input_data, target_data, batch_size =  BATCH_SIZE, epochs=NUM_EPOCHS, callbacks=[ earlystop, history ])

  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_1 (SimpleRNN)     (None, None, 64)          7296      
_________________________________________________________________
batch_normalization_1 (Batch (None, None, 64)          256       
_________________________________________________________________
simple_rnn_2 (SimpleRNN)     (None, 64)                8256      
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 50, 64)            0         
_________________________________________________________________
simple_rnn_3 (SimpleRNN)     (None, 50, 64)            8256      
_________________________________________________________________
batch_normalization_2 (Batch (None, 50, 64)            256       
_________________________________________________________________
simple_rnn_4 (SimpleRNN)     (None, 50, 64)            8256      
__________

In [14]:
#Training LSTM
model_lstm = createSeq2Seq_LSTM()
model_lstm.summary()
model_lstm.compile(loss=LOSS_FUNCTION, optimizer = OPTIMIZER)
earlystop = EarlyStopping(monitor='loss', patience= 10, min_delta = 0.01 , verbose=0, mode= 'auto') 
history = History()
hist = model_lstm.fit(input_data, target_data, batch_size =  BATCH_SIZE, epochs=NUM_EPOCHS, callbacks=[ earlystop, history ])

  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, None, 64)          29184     
_________________________________________________________________
batch_normalization_1 (Batch (None, None, 64)          256       
_________________________________________________________________
lstm_2 (LSTM)                (None, 64)                33024     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 50, 64)            0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 50, 64)            33024     
_________________________________________________________________
batch_normalization_2 (Batch (None, 50, 64)            256       
_________________________________________________________________
lstm_4 (LSTM)                (None, 50, 64)            33024     
__________

In [14]:
test_piano_roll = midiToPianoroll('/home/suraj/FML/scale_chords_small/midi/scale_b_phrygian.mid')
test_data = [test_piano_roll]
test_input = createSeqTestNetInputs(test_data, X_SEQ_LENGTH)

In [16]:
# generate music using RNN
for i,song in enumerate(test_input):
    net_output = model_rnn.predict(song)
    net_roll = seqNetOutToPianoroll(net_output)
    pianorollToMidi(net_roll, 'output_harmony_rnn.mid')

In [17]:
# generate music using LSTM
for i,song in enumerate(test_input):
    net_output = model_lstm.predict(song)
    net_roll = seqNetOutToPianoroll(net_output)
    pianorollToMidi(net_roll, 'output_harmony_lstm.mid')