In [35]:
import errno
import os
import shutil

import numpy as np
import pretty_midi
from pypianoroll import Multitrack, Track


In [36]:
ROOT_PATH = 'datasets'
CONVERTER_PATH = os.path.join(ROOT_PATH, 'bhairavi_test/converter')
CLEANER_PATH = os.path.join(ROOT_PATH, 'bhairavi_test/cleaner')
test_per = 0.2

In [37]:

# helper functions

# return a list of path of midi files in the given path
def get_midi_path(path):
    paths = []
    for dirpath, _, filenames in os.walk(path):
        for filename in filenames:
            if filename.endswith('.mid') or filename.endswith('.midi'):
                paths.append(os.path.join(dirpath, filename))
    return paths


# extract information from the given midi file
def get_midi_info(pm):
    if pm.time_signature_changes:
        pm.time_signature_changes.sort(key=lambda x: x.time)
        first_beat_time = pm.time_signature_changes[0].time
    else:
        first_beat_time = pm.estimate_beat_start()

    tc_times, tempi = pm.get_tempo_changes()

    if len(pm.time_signature_changes) == 1:
        time_sign = '{}/{}'.format(pm.time_signature_changes[0].numerator,
                                   pm.time_signature_changes[0].denominator)
    else:
        time_sign = None

    midi_info = {
        'first_beat_time': first_beat_time,
        'num_time_signature_change': len(pm.time_signature_changes),
        'time_signature': time_sign,
        'tempo': tempi[0] if len(tc_times) == 1 else None
    }

    return midi_info


# check this
def get_merged(multitrack):
    """Return a `pypianoroll.Multitrack` instance with piano-rolls merged to
    five tracks (Bass, Drums, Guitar, Piano and Strings)"""
    category_list = {'Bass': [], 'Drums': [], 'Guitar': [], 'Piano': [], 'Strings': []}
    program_dict = {'Piano': 0, 'Drums': 0, 'Guitar': 24, 'Bass': 32, 'Strings': 48}

    for idx, track in enumerate(multitrack.tracks):
        if track.is_drum:
            category_list['Drums'].append(idx)
        elif track.program // 8 == 0:
            category_list['Piano'].append(idx)
        elif track.program // 8 == 3:
            category_list['Guitar'].append(idx)
        elif track.program // 8 == 4:
            category_list['Bass'].append(idx)
        else:
            category_list['Strings'].append(idx)

    tracks = []
    for key in category_list:
        if category_list[key]:
            merged = multitrack[category_list[key]].get_merged_pianoroll()
            tracks.append(Track(merged, program_dict[key], key == 'Drums', key))
        else:
            tracks.append(Track(None, program_dict[key], key == 'Drums', key))
    return Multitrack(None, tracks, multitrack.tempo, multitrack.downbeat, multitrack.beat_resolution, multitrack.name)


def path_exists(path):
    try:
        os.makedirs(path)
    except OSError as exception:
        if exception.errno != errno.EEXIST:
            raise


# convert midi file to multi-track piano-roll and save to a particular dataset directory
def convert_midi_to_pianoroll(path):
    try:
        midi_name = os.path.splitext(os.path.basename(path))[0]
        print('midi name')
        print(midi_name)
        multitrack = Multitrack(resolution=24, name=midi_name)
        print('hello')
        print('multitrack')
        print(multitrack)
        pm = pretty_midi.PrettyMIDI(path)
        midi_info = get_midi_info(pm)
        print('info')
        print(midi_info)
        print(multitrack.parse_pretty_midi(pm))
        merged = get_merged(multitrack)
        print('merger')
        print(merged)

    
        path_exists(CONVERTER_PATH)
        merged.save(os.path.join(CONVERTER_PATH, midi_name + '.npz'))
        return [midi_name, midi_info]

    except:
        return None


def midi_filter(midi_info):
    if midi_info['first_beat_time'] > 0.0:
        return False
    elif midi_info['num_time_signature_change'] > 1:
        return False
    elif midi_info['time_signature'] not in ['4/4']:
        return False
    return True

1. dividing the set into train and test

In [38]:
# l = [f for f in os.listdir(os.path.join(ROOT_PATH, 'bhairavi'))]
# print(l)
# idx = np.random.choice(len(l), int(test_per * len(l)), replace=False)
# print(len(idx))
# path_exists(os.path.join(ROOT_PATH, 'bhairavi_test'))
# for i in idx:
#     shutil.move(os.path.join(ROOT_PATH, 'bhairavi', l[i]),
#                 os.path.join(ROOT_PATH, 'bhairavi_test', l[i]))

In [39]:
midi_paths = get_midi_path(os.path.join(ROOT_PATH, 'bhairavi_test'))
midi_dict = {}
kv_pairs = [convert_midi_to_pianoroll(midi_path) for midi_path in midi_paths]
for kv_pair in kv_pairs:
    if kv_pair is not None:
        midi_dict[kv_pair[0]] = kv_pair[1]

# print(kv_pairs)
# with open(os.path.join(ROOT_PATH, 'bhairavi_test/midis.json'), 'w') as outfile:
#     json.dump(midi_dict, outfile)

# print("[Done] {} files out of {} have been successfully converted".format(len(midi_dict), len(midi_paths)))

# with open(os.path.join(ROOT_PATH, 'bhairavi_test/midis.json')) as infile:
#     midi_dict = json.load(infile)
# count = 0
# path_exists(CLEANER_PATH)
# midi_dict_clean = {}
# for key in midi_dict:
#     if midi_filter(midi_dict[key]):
#         midi_dict_clean[key] = midi_dict[key]
#         count += 1
#         shutil.copyfile(os.path.join(CONVERTER_PATH, key + '.npz'),
#                             os.path.join(CLEANER_PATH, key + '.npz'))

# with open(os.path.join(ROOT_PATH, 'bhairavi_test/midis_clean.json'), 'w') as outfile:
#     json.dump(midi_dict_clean, outfile)

# print("[Done] {} files out of {} have been successfully cleaned".format(count, len(midi_dict)))


midi name
Bhairavi02.wav
hello
multitrack
Multitrack(name='Bhairavi02.wav', resolution=24, tracks=[])
info
{'first_beat_time': 0.0, 'num_time_signature_change': 0, 'time_signature': None, 'tempo': 120.0}
