In [0]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0" 
import midi
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import copy
import keras
import warnings
import time
import random
import itertools
import pickle
import keras
from keras.models import Sequential
from keras.layers import Dense, TimeDistributed, LSTM, Dropout, CuDNNLSTM, Embedding, Input, Conv1D
from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler

In [0]:
mid_jingle = midi.read_midifile('data/jingle-bells-guitar-glenn-jarrett.mid')
mid_jingle.make_ticks_abs()

In [0]:
def get_max_tick(track):
    '''
    track: list of MIDI events
    
    returns: last MIDI tick in track
    '''
    max_tick = None
    
    for event in reversed(track):
        if type(event) in [midi.NoteOnEvent, midi.NoteOffEvent]:
            max_tick = event.tick
            break
            
    return max_tick

def find_pitch(notes, pitch):
    '''
    notes: list/set of (pitch, velocity) tuples
    pitch: pitch value to find
    
    returns: first (pitch, velocity) tuple that matches input pitch
    '''
    
    for note in notes:
        if note[0] == pitch:
            return note
        
    print(notes, pitch)
    
def parse_data(track):
    '''
    track: list of MIDI events
    
    returns: time series list of lists, each list fixed length,
             contains activated notes at corresponding tick
    '''
    events = []
    activated_notes = set([])
    max_tick = get_max_tick(track)
    
    if max_tick is None:
        raise ValueError()
    
    note_starts = {}
    note_ends = {}
    
    for event in track:
        if type(event) not in [midi.NoteOnEvent, midi.NoteOffEvent]:
            continue
            
        tick = event.tick
        pitch, velocity = event.data
                
        if velocity != 0:
            try:
                note_starts[tick].add((pitch, velocity))
            except KeyError:
                note_starts[tick] = set([(pitch, velocity)])
            finally:
                if (pitch, velocity) in activated_notes:
                    print((pitch, velocity), activated_notes)
                    velocity += np.random.choice([-1, 1])
                    
                activated_notes.add((pitch, velocity))

        else:
            pitch, velocity = find_pitch(activated_notes, pitch)
            
            try:
                note_ends[tick].add((pitch, velocity))
            except KeyError:
                note_ends[tick] = set([(pitch, velocity)])
            finally:
                activated_notes.remove((pitch, velocity))
    
    for tick in range(max_tick):
        try:
            for note in note_starts[tick]:
                activated_notes.add(note)
        except KeyError:
            pass
        
        try:
            for note in note_ends[tick]:
                activated_notes.remove(note)
        except KeyError:
            pass
        
        events.append(list(activated_notes))
        
    return keras.preprocessing.sequence.pad_sequences(events)

def process_data(data, timestep):
    '''
    data: time series MIDI data
    timestep: specifies length of convolution
    
    returns: 1-D convolution of time series data with window 
             specified by timestep
    '''
    X, y = [], []
    for i in range(len(data)-timestep-1):
        X.append(np.array([data[i:(i+timestep)]]))
        y.append(np.array([data[(i+timestep)]]))
        
    X, y = np.array(X), np.array(y)
    return X.reshape(*[_ for _ in X.shape if _ != 1]), \
           y.reshape(*[_ for _ in y.shape if _ != 1])

In [0]:
class MidiScaler():
    '''
    Custom scaler for MIDI time series data
    '''
    def __init__(self):
        pass
    
    def fit(self, data):
        pass
    
    def fit_transform(self, data):
        return ((data-64)/128).astype(np.float128)
    
    def transform(self, data):
        return ((data-64)/128).astype(np.float128)
    
    def inverse_transform(self, data):
        return ((data*128)+64).astype(np.float128)

In [0]:
class RNN:
    '''
    Custom RNN class/data container
    '''
    def __init__(self, X, train_test_split=0.8, epochs=100, batch_size=32, lstm_units=128, timestep=256):
        self.X = X
        self.split = int(self.X.shape[0]*train_test_split)
        self.X_train, self.X_test = self.X[:self.split], self.X[self.split:]
        self.epochs = epochs
        self.batch_size = batch_size
        self.lstm_units = lstm_units
        self.timestep = timestep
        self.scaler = MidiScaler()
        self.scale_data()
        
        self.model = Sequential()
        self.model.add(
            CuDNNLSTM(self.lstm_units, input_shape=(
                self.timestep, self.X_train_processed.shape[-1]
            ), 
            return_sequences=True)
        )
        self.model.add(Dropout(0.5))
        self.model.add(CuDNNLSTM(self.lstm_units))
        self.model.add(Dropout(0.5))
        self.model.add(Dense(self.lstm_units, activation='relu'))
        self.model.add(Dense(self.X_train_processed.shape[-1], activation='softmax'))
        self.model.compile(optimizer='adam', loss=['mse'], metrics=['accuracy'])
        
    def flatten_data(self):
        try:
            self.X_train_flattened = self.X_train[:,:,0]
            self.X_test_flattened = self.X_test[:,:,0]
        except IndexError:
            self.X_train_flattened = self.X_train
            self.X_test_flattened = self.X_test
        
    def scale_data(self):
        self.flatten_data()
        self.scaler.fit(self.X_train_flattened)
        self.X_train_scaled = self.scaler.transform(self.X_train_flattened)
        self.X_test_scaled = self.scaler.transform(self.X_test_flattened)
                
        self.X_train_processed, self.y_train_processed = \
            process_data(self.X_train_scaled, self.timestep)
        self.X_test_processed, self.y_test_processed = \
            process_data(self.X_test_scaled, self.timestep)
        
    def train(self, epochs, validation_split=0.25, verbose=0):
        history = self.model.fit(self.X_train_processed, self.y_train_processed,
                                 batch_size=self.batch_size, epochs=epochs,
                                 validation_split=validation_split)

        return history

In [0]:
def detokenize_data(pred):
    '''
    pred: predicted MIDI time series data
    
    returns: corresponding MIDI pattern
    '''
    pattern = midi.Pattern()
    track = midi.Track()
    prev = []

    for tick, note_arr in enumerate(pred[:-1].tolist()):
        for note in note_arr:
            if note != 0: 
                if note in prev:
                    if note not in pred[tick+1]:
                        track.append(midi.NoteOffEvent(tick=tick, channel=10, data=[note, 0]))
                else:
                    if note in pred[tick+1]:
                        track.append(midi.NoteOnEvent(tick=tick, channel=10, data=[note, 60]))
                    else:
                        track.append(midi.NoteOnEvent(tick=tick, channel=10, data=[note, 60]))
                        track.append(midi.NoteOffEvent(tick=tick, channel=10, data=[note, 0]))

        prev = note_arr

    for i, event in reversed(list(enumerate(track))):
        if i == 0:
            continue

        event.tick = (event.tick - track[i-1].tick)

    pattern.append(track)
    
    return pattern

In [0]:
track_data = parse_data(mid_jingle[1])

In [0]:
rnn = RNN(track_data)

In [20]:
print(rnn.model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cu_dnnlstm_3 (CuDNNLSTM)     (None, 256, 128)          70144     
_________________________________________________________________
dropout_3 (Dropout)          (None, 256, 128)          0         
_________________________________________________________________
cu_dnnlstm_4 (CuDNNLSTM)     (None, 128)               132096    
_________________________________________________________________
dropout_4 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_4 (Dense)              (None, 7)                 903       
Total params: 219,655
Trainable params: 219,655
Non-trainable params: 0
_________________________________________________________________
None

In [21]:
history = rnn.train(epochs=20, verbose=1)

Train on 22770 samples, validate on 7590 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [0]:
rnd = np.vectorize(round)

In [0]:
scaled_pred = rnn.model.predict(rnn.X_train_processed)
pred = rnn.scaler.inverse_transform(scaled_pred)

In [0]:
pattern = detokenize_data(rnd(pred).astype(int))

In [0]:
midi.write_midifile('data/output/jingle-bells-pred-train.mid', pattern)