In [1]:
'''
*** This is a slightly modified version of the original data processing script available with the dataset

This is the data processing script for POP909:A Pop song Dataset for Music Arrangement Generation
============
It will allow you to quickly process the POP909 Files (Midi) into the Google Magenta's music representation 
    as like [Music Transformer](https://magenta.tensorflow.org/music-transformer) 
            [Performance RNN](https://magenta.tensorflow.org/performance-rnn).

'''
import pickle
import os
import sys
from helpers.midi import MidiEventProcessor
import pretty_midi as pyd
import numpy as np

BASE_DIR = "/home/rithomas"
DATA_DIR = os.path.join(BASE_DIR, "data", "POP909-Dataset", "POP909")
OUTPUT_DIR = os.path.join(BASE_DIR, "cache", "preprocessed", "POP909")

ModuleNotFoundError: No module named 'numpy'

In [None]:
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR, exist_ok=True)

def prepare_midi_notes(notes):
    for i in range(len(notes)):
        notes[i].start = round(notes[i].start,2)    
    notes.sort(key = lambda x:x.start)
    return notes

def preprocess_midi(path):
    data = pyd.PrettyMIDI(path)    

    mpr = MidiEventProcessor()
    separated_notes = {}
    repr_seq = {}
    
    for instr in data.instruments:
        separated_notes[instr.name] = prepare_midi_notes(instr.notes)
        repr_seq[instr.name] = mpr.encode(separated_notes[instr.name])
        print(len(repr_seq[instr.name]))

    return repr_seq

def preprocess_pop909(midi_root, save_dir):
    save_py = []
    midi_paths = [d for d in os.listdir(midi_root)]
    i = 0
    out_fmt = '{}-{}.data'
    for path in midi_paths:
        print(' ', end='[{}]'.format(path), flush=True)
        filename = midi_root + '/' + path + '/' + path + '.mid'
        try:
            data = preprocess_midi(filename)
        except KeyboardInterrupt:
            print(' Abort')
            return
        except EOFError:
            print('EOF Error')
            return
        save_py.append(data)
        
    save_py = np.array(save_py)
    print(save_py.size) 
    np.save(os.path.join(OUTPUT_DIR, "pop909-event-token.npy"), save_py)
            
    
# replace the folder with your POP909 data folder
preprocess_pop909(DATA_DIR, OUTPUT_DIR)