## First, transpose each song into C (major|minor)

In [None]:
import glob
import os
import numpy as np
import music21
import pretty_midi
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
def slidingWindow(sequence,winSize,step=1):
    """Returns a generator that will iterate through
    the defined chunks of input sequence.  Input sequence
    must be iterable."""
 
    # Verify the inputs
    try: it = iter(sequence)
    except TypeError:
        raise Exception("**ERROR** sequence must be iterable.")
    if step > winSize:
        raise Exception("**ERROR** step must not be larger than winSize.")
    if winSize > len(sequence):
        raise Exception("**ERROR** winSize must not be larger than sequence length.")
 
    # Pre-compute number of chunks to emit
    numOfChunks = ((len(sequence)-winSize)//step)+1
 
    # Do the work
    i = 0
    while (i + winSize) < len(sequence):
        yield sequence[i:i+winSize]
        i += step

In [None]:
#See http://nickkellyresearch.com/python-script-transpose-midi-files-c-minor/
#converts all midi files in the current folder
#converting everything into the key of C major or C minor

# major conversions
key_halfsteps = dict([('G#', 4),("A-", 4),("A", 3),("A#", 2),("B-", 2),("B", 1),("C", 0),("C#", -1),("D-", -1),("D", -2),("D#", -3),("E-", -3),("E", -4),("F", -5),("F#", 6),("G-", 6),("G", 5)])


# os.chdir("./audio_files/bach/JSB Chorales/test")
os.chdir("/home/eko/Downloads/lakh_train/")

for file in glob.glob("*.mid"):
    score = music21.converter.parse(file)
    key = score.analyze('key')
    halfSteps = key_halfsteps[key.tonic.name]
    
    newscore = score.transpose(halfSteps)
    key = newscore.analyze('key')
    print(key.tonic.name, key.mode)
    newFileName = "/home/eko/Downloads/lakh_train/preprocessing/CN_" + file
    newscore.write('midi',newFileName)


## Create the mudb

The Bach chorales are all the same tempo, so all we need to do is segment the file into frames

In [None]:
# import mido
# import warnings

def create_mudb(data='train', num_bars=8):
    bars = []
    T = 2 * num_bars #Every fs time steps is half a bar
    fs = 100
    for file in glob.glob("/home/eko/winter2018/Nottingham/%s/*.mid" % data):
#     for file in glob.glob("/home/eko/Downloads/lakh_train/preprocessing/*.mid"):

        pm = pretty_midi.PrettyMIDI(file)
#         pm = mido.MidiFile(file, debug=True)
        warnings.simplefilter('ignore')

        initial_tempo = pm.get_tempo_changes()[1][0]
        fs = (initial_tempo / 60) * 8
        piano_roll = pm.get_piano_roll(fs=fs)
        piano_roll = (piano_roll.T > 0).astype(np.float32)
        #Add an empty bar before the start
        piano_roll = np.vstack((np.zeros((int(fs), len(piano_roll[0])), dtype=np.float32), piano_roll))
#         fs = (initial_tempo / 60) * 8
        windowSize = T * fs
        
#         print len(piano_roll)

        if len(piano_roll) > windowSize:
            for bar in slidingWindow(sequence=piano_roll, winSize=int(windowSize), step=int(fs)):
                bars.append(bar)

    bars = np.asarray(bars)
    np.savez('/home/eko/winter2018/Nottingham/preprocessing/CN_mudb_%s.npz' % data, bars=bars, fs=fs, T=T, num_bars=num_bars)

In [None]:
# create_mudb('train')
create_mudb('valid')
create_mudb('test')