In [1]:
'''
*** This is a slightly modified version of the original data processing script available with the dataset

This is the data processing script for POP909:A Pop song Dataset for Music Arrangement Generation
============
It will allow you to quickly process the POP909 Files (Midi) into the Google Magenta's music representation 
    as like [Music Transformer](https://magenta.tensorflow.org/music-transformer) 
            [Performance RNN](https://magenta.tensorflow.org/performance-rnn).

'''
import pickle
import os
import sys
from helpers.midi import MidiEventProcessor
import pretty_midi as pyd
import numpy as np
import tensorflow as tf

#BASE_DIR = "/home/rithomas"
BASE_DIR = "/home/richhiey/Desktop/workspace/projects/virtual_musicians"

#DATA_DIR = os.path.join(BASE_DIR, "data", "POP909-Dataset", "POP909")
DATA_DIR = os.path.join(BASE_DIR, "data", "POP909-Dataset", "POP909")

#OUTPUT_DIR = os.path.join(BASE_DIR, "cache", "preprocessed", "POP909")
OUTPUT_DIR = os.path.join(BASE_DIR, "data", "preprocessed")

MIDI_EVENTS_PATH = os.path.join(OUTPUT_DIR, "pop909-event-token.npy")
TFRECORD_DATASET = os.path.join(OUTPUT_DIR, "tfrecords", "train.tfrecords")

In [2]:
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR, exist_ok=True)

def prepare_midi_notes(notes):
    for i in range(len(notes)):
        notes[i].start = round(notes[i].start,2)    
    notes.sort(key = lambda x:x.start)
    return notes

def preprocess_midi(path):
    data = pyd.PrettyMIDI(path)    

    mpr = MidiEventProcessor()
    separated_notes = {}
    repr_seq = {}
    
    for instr in data.instruments:
        separated_notes[instr.name] = prepare_midi_notes(instr.notes)
        repr_seq[instr.name] = mpr.encode(separated_notes[instr.name])
        print(len(repr_seq[instr.name]))

    return repr_seq

def preprocess_pop909(midi_root, save_dir):
    save_py = []
    midi_paths = [d for d in os.listdir(midi_root)]
    i = 0
    out_fmt = '{}-{}.data'
    for path in midi_paths:
        if (path.isnumeric()):
            print(' ', end='[{}]'.format(path), flush=True)
            filename = midi_root + '/' + path + '/' + path + '.mid'
            try:
                data = preprocess_midi(filename)
            except KeyboardInterrupt:
                print(' Abort')
                return
            except EOFError:
                print('EOF Error')
                return
            save_py.append(data)
        
    save_py = np.array(save_py)
    print(save_py.size)
    np.save(save_path, save_py)
            
    
# replace the folder with your POP909 data folder
#preprocess_pop909(DATA_DIR, OUTPUT_DIR)

In [3]:
pop909 = np.load(MIDI_EVENTS_PATH, allow_pickle=True)
print(np.shape(pop909))
melody = [song['MELODY'] for song in pop909]
piano = [song['PIANO'] for song in pop909]

def create_tfrecord_dataset(file_path, inputs, targets):
    with tf.io.TFRecordWriter(file_path) as file_writer:
        for x, y in zip(inputs, targets):
            example = tf.train.Example(
                features=tf.train.Features(
                    feature={
                        "y": tf.train.Feature(int64_list=tf.train.Int64List(value=y)),
                        "x": tf.train.Feature(int64_list=tf.train.Int64List(value=x))
                      }
                )
            )
            file_writer.write(example.SerializeToString())
            file_writer.close()

create_tfrecord_dataset(TFRECORD_DATASET, melody, piano)

(909,)


FailedPreconditionError: Writer is closed.